diff --git a/.golangci.yml b/.golangci.yml
index b76f305..08583ea 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -88,6 +88,7 @@ linters:
linters:
- errcheck
- noctx
+ - gosec # fake credentials in secret-scanner fixtures are not real keys
issues:
max-issues-per-linter: 50
diff --git a/README.md b/README.md
index 7f9911b..59359f3 100644
--- a/README.md
+++ b/README.md
@@ -122,6 +122,9 @@ makepkg -si
# sql recon + lfi scanning
./sif -u https://example.com -sql -lfi
+# web vuln probes (cors, open redirect, reflected xss)
+./sif -u https://example.com -cors -redirect -xss
+
# framework detection (with cve lookup)
./sif -u https://example.com -framework
@@ -158,7 +161,7 @@ sif has a modular architecture. modules are defined in yaml and can be extended
| `-ports` | port scanning (common/full) |
| `-nuclei` | vulnerability scanning with nuclei templates |
| `-dork` | automated google dorking |
-| `-js` | javascript analysis |
+| `-js` | javascript analysis + secret and endpoint extraction |
| `-c3` | cloud storage misconfiguration |
| `-headers` | http header analysis |
| `-sh` | security header analysis (missing/weak headers) |
@@ -170,7 +173,13 @@ sif has a modular architecture. modules are defined in yaml and can be extended
| `-securitytrails` | domain discovery + target expansion (requires SECURITYTRAILS_API_KEY) |
| `-sql` | sql recon |
| `-lfi` | local file inclusion |
+| `-cors` | cors misconfiguration probe |
+| `-redirect` | open redirect probe |
+| `-xss` | reflected xss probe |
| `-framework` | framework detection with cve lookup |
+| `-crawl` | web crawler (spider same-host links/scripts/forms) |
+| `-crawl-depth` | max crawl recursion depth (default 2) |
+| `-passive` | passive subdomain/url discovery (zero traffic to target) |
### http options
diff --git a/docs/usage.md b/docs/usage.md
index 5a1db9e..ae75048 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -79,7 +79,7 @@ scopes: `common` (top ports), `full` (all ports)
### javascript analysis
-`-js` - analyze javascript files
+`-js` - analyze javascript files + secret and endpoint extraction
```bash
./sif -u https://example.com -js
@@ -154,6 +154,30 @@ export SHODAN_API_KEY=your-api-key
./sif -u https://example.com -lfi
```
+### cors probe
+
+`-cors` - probe for cors misconfigurations (reflected/permissive origins)
+
+```bash
+./sif -u https://example.com -cors
+```
+
+### open redirect probe
+
+`-redirect` - probe redirect-prone params for open redirects
+
+```bash
+./sif -u https://example.com/login?next=home -redirect
+```
+
+### reflected xss probe
+
+`-xss` - inject a canary into params and report unescaped reflections
+
+```bash
+./sif -u https://example.com/search?q=test -xss
+```
+
### framework detection
`-framework` - detect web frameworks with version and cve lookup
@@ -162,6 +186,26 @@ export SHODAN_API_KEY=your-api-key
./sif -u https://example.com -framework
```
+### web crawler
+
+`-crawl` - spider the target, following same-host links, scripts and forms
+
+`-crawl-depth` - max recursion depth (default 2). respects robots.txt and stays on the target host.
+
+```bash
+./sif -u https://example.com -crawl -crawl-depth 3
+```
+
+### passive discovery
+
+`-passive` - gather subdomains from certificate transparency (crt.sh, certspotter) and historical urls from the wayback machine
+
+keyless and zero traffic to the target itself - all lookups hit third-party feeds.
+
+```bash
+./sif -u https://example.com -passive
+```
+
### whois lookup
`-whois` - perform whois lookups
@@ -339,6 +383,9 @@ the first time you run a new release sif also prints that release's notes once.
-git \
-sql \
-lfi \
+ -cors \
+ -redirect \
+ -xss \
-am
```
diff --git a/go.mod b/go.mod
index 4382257..35bb982 100644
--- a/go.mod
+++ b/go.mod
@@ -7,6 +7,7 @@ require (
github.com/charmbracelet/glamour v0.10.0
github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834
github.com/charmbracelet/log v1.0.0
+ github.com/gocolly/colly/v2 v2.1.0
github.com/likexian/whois v1.15.7
github.com/projectdiscovery/goflags v0.1.74
github.com/projectdiscovery/nuclei/v3 v3.8.0
@@ -160,7 +161,6 @@ require (
github.com/gobwas/pool v0.2.1 // indirect
github.com/gobwas/ws v1.4.0 // indirect
github.com/goccy/go-json v0.10.5 // indirect
- github.com/gocolly/colly/v2 v2.1.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang-jwt/jwt/v4 v4.5.2 // indirect
github.com/golang-jwt/jwt/v5 v5.2.2 // indirect
diff --git a/internal/config/config.go b/internal/config/config.go
index 7e6755d..bdd2f95 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -46,7 +46,13 @@ type Settings struct {
SecurityTrails bool
SQL bool
LFI bool
+ CORS bool
+ Redirect bool
+ XSS bool
Framework bool
+ Crawl bool
+ CrawlDepth int
+ Passive bool
Modules string // Comma-separated list of module IDs to run
ModuleTags string // Run modules matching these tags
AllModules bool // Run all loaded modules
@@ -62,6 +68,10 @@ type Settings struct {
// "negative WaitGroup counter"; clamp the parsed value up to this.
const minThreads = 1
+// defaultCrawlDepth bounds how far the spider recurses by default; deep enough
+// to find linked pages without crawling an entire site.
+const defaultCrawlDepth = 2
+
const (
Nil goflags.EnumVariable = iota
@@ -107,7 +117,13 @@ func Parse() *Settings {
flagSet.BoolVar(&settings.SecurityTrails, "securitytrails", false, "Enable SecurityTrails domain discovery (requires SECURITYTRAILS_API_KEY env var)"),
flagSet.BoolVar(&settings.SQL, "sql", false, "Enable SQL reconnaissance (admin panels, error disclosure)"),
flagSet.BoolVar(&settings.LFI, "lfi", false, "Enable LFI (Local File Inclusion) reconnaissance"),
+ flagSet.BoolVar(&settings.CORS, "cors", false, "Enable CORS misconfiguration probe"),
+ flagSet.BoolVar(&settings.Redirect, "redirect", false, "Enable open redirect probe"),
+ flagSet.BoolVar(&settings.XSS, "xss", false, "Enable reflected XSS probe"),
flagSet.BoolVar(&settings.Framework, "framework", false, "Enable framework detection"),
+ flagSet.BoolVar(&settings.Crawl, "crawl", false, "Enable web crawling (spider same-host links/scripts/forms)"),
+ flagSet.IntVar(&settings.CrawlDepth, "crawl-depth", defaultCrawlDepth, "Max crawl recursion depth"),
+ flagSet.BoolVar(&settings.Passive, "passive", false, "Enable passive subdomain/url discovery (zero traffic to target)"),
)
flagSet.CreateGroup("runtime", "Runtime",
diff --git a/internal/scan/cors.go b/internal/scan/cors.go
new file mode 100644
index 0000000..3828628
--- /dev/null
+++ b/internal/scan/cors.go
@@ -0,0 +1,236 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+: :
+: █▀ █ █▀▀ · Blazing-fast pentesting suite :
+: ▄█ █ █▀ · BSD 3-Clause License :
+: :
+: (c) 2022-2026 vmfunc, xyzeva, :
+: lunchcat alumni & contributors :
+: :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package scan
+
+import (
+ "context"
+ "fmt"
+ "net/http"
+ "net/url"
+ "strings"
+ "sync"
+ "time"
+
+ charmlog "github.com/charmbracelet/log"
+ "github.com/dropalldatabases/sif/internal/httpx"
+ "github.com/dropalldatabases/sif/internal/logger"
+ "github.com/dropalldatabases/sif/internal/output"
+)
+
+// CORSResult collects every cors misconfiguration found on the target.
+type CORSResult struct {
+ Findings []CORSFinding `json:"findings,omitempty"`
+}
+
+// CORSFinding is a single reflecting/permissive cors response.
+type CORSFinding struct {
+ URL string `json:"url"`
+ OriginTested string `json:"origin_tested"`
+ AllowOrigin string `json:"allow_origin"`
+ AllowCredentials bool `json:"allow_credentials"`
+ Severity string `json:"severity"`
+ Note string `json:"note"`
+}
+
+// corsMaxRedirects caps the redirect chain so we read the cors headers off the
+// host we actually asked about, not whatever it bounces us to.
+const corsMaxRedirects = 3
+
+// the sentinel attacker origin; if it comes back in Access-Control-Allow-Origin
+// the target reflects arbitrary origins and any site can read the response.
+const corsEvilOrigin = "https://sif-cors-probe.evil.com"
+
+// corsOrigin is a header to inject + why it matters. {host} expands to the
+// target host so the prefix/suffix bypasses key off the real name.
+var corsOrigins = []struct {
+ origin string // crafted Origin header, {host} -> target host
+ note string // why this case is interesting
+ reflects bool // true when a literal echo of this origin is exploitable
+}{
+ // arbitrary attacker origin - the classic "reflects anything" bug
+ {corsEvilOrigin, "arbitrary origin reflected", true},
+ // the literal null origin (sandboxed iframes, redirects, file://) is forgeable
+ {"null", "null origin allowed", true},
+ // suffix bypass: attacker registers {host}.evil.com, naive endswith checks pass
+ {"https://{host}.evil.com", "suffix bypass (attacker subdomain)", true},
+ // prefix bypass: attacker registers evil-{host}, naive startswith checks pass
+ {"https://evil-{host}", "prefix bypass", true},
+ // embedded bypass: {host} appears inside an attacker domain
+ {"https://evil.com.{host}", "embedded-host bypass", true},
+ // scheme downgrade: http origin trusted lets a mitm read cross-origin data
+ {"http://{host}", "http scheme downgrade trusted", true},
+}
+
+// CORS probes the target for cross-origin resource sharing misconfigurations.
+func CORS(targetURL string, timeout time.Duration, threads int, logdir string) (*CORSResult, error) {
+ log := output.Module("CORS")
+ log.Start()
+
+ spin := output.NewSpinner("Scanning for CORS misconfigurations")
+ spin.Start()
+
+ sanitizedURL := stripScheme(targetURL)
+
+ if logdir != "" {
+ if err := logger.WriteHeader(sanitizedURL, logdir, "CORS misconfiguration probe"); err != nil {
+ spin.Stop()
+ log.Error("error creating log file: %v", err)
+ return nil, fmt.Errorf("create cors log: %w", err)
+ }
+ }
+
+ parsedURL, err := url.Parse(targetURL)
+ if err != nil {
+ spin.Stop()
+ return nil, fmt.Errorf("parse url: %w", err)
+ }
+ host := parsedURL.Host
+
+ client := httpx.Client(timeout)
+ client.CheckRedirect = func(_ *http.Request, via []*http.Request) error {
+ if len(via) >= corsMaxRedirects {
+ return http.ErrUseLastResponse
+ }
+ return nil
+ }
+
+ result := &CORSResult{Findings: make([]CORSFinding, 0, len(corsOrigins))}
+
+ var mu sync.Mutex
+ var wg sync.WaitGroup
+
+ // one origin per worker item; the set is small so a buffered channel is plenty
+ originChan := make(chan int, len(corsOrigins))
+ for i := 0; i < len(corsOrigins); i++ {
+ originChan <- i
+ }
+ close(originChan)
+
+ wg.Add(threads)
+ for t := 0; t < threads; t++ {
+ go func() {
+ defer wg.Done()
+ for idx := range originChan {
+ spec := corsOrigins[idx]
+ // {host} is the seam that turns a template into a real attacker origin
+ origin := strings.ReplaceAll(spec.origin, "{host}", host)
+
+ finding, ok := probeCORS(client, targetURL, origin, spec.note)
+ if !ok {
+ continue
+ }
+
+ mu.Lock()
+ result.Findings = append(result.Findings, finding)
+ mu.Unlock()
+
+ spin.Stop()
+ log.Warn("cors %s: origin %s reflected (creds=%t)",
+ renderCORSSeverity(finding.Severity),
+ output.Highlight.Render(origin),
+ finding.AllowCredentials)
+ spin.Start()
+
+ if logdir != "" {
+ logger.Write(sanitizedURL, logdir,
+ fmt.Sprintf("CORS: %s - origin [%s] reflected as [%s] creds=%t\n",
+ finding.Note, origin, finding.AllowOrigin, finding.AllowCredentials))
+ }
+ }
+ }()
+ }
+ wg.Wait()
+
+ spin.Stop()
+
+ if len(result.Findings) == 0 {
+ log.Info("no cors misconfigurations detected")
+ log.Complete(0, "found")
+ return nil, nil //nolint:nilnil // no finding is not an error, mirrors the other scanners
+ }
+
+ log.Complete(len(result.Findings), "found")
+ return result, nil
+}
+
+// probeCORS sends one request with the crafted Origin and decides whether the
+// response trusts it. It returns the finding and true only when the server
+// reflects the origin (or "null"/"*" with credentials), which is the exploitable
+// shape - a server that ignores Origin or returns its own host is fine.
+func probeCORS(client *http.Client, targetURL, origin, note string) (CORSFinding, bool) {
+ req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, targetURL, http.NoBody)
+ if err != nil {
+ charmlog.Debugf("cors: build request for %s: %v", targetURL, err)
+ return CORSFinding{}, false
+ }
+ req.Header.Set("Origin", origin)
+
+ resp, err := client.Do(req)
+ if err != nil {
+ charmlog.Debugf("cors: request %s with origin %s: %v", targetURL, origin, err)
+ return CORSFinding{}, false
+ }
+ // headers are all we need; drain nothing, just close.
+ resp.Body.Close()
+
+ allowOrigin := resp.Header.Get("Access-Control-Allow-Origin")
+ if allowOrigin == "" {
+ return CORSFinding{}, false
+ }
+
+ allowCreds := strings.EqualFold(resp.Header.Get("Access-Control-Allow-Credentials"), "true")
+
+ // a wildcard with credentials is forbidden by browsers, so it isn't directly
+ // exploitable; a plain wildcard exposes only public data. neither is a finding.
+ if allowOrigin == "*" {
+ return CORSFinding{}, false
+ }
+
+ // the bug is reflection: the server echoed our attacker origin back. if it
+ // returned something else (its own host) it isn't trusting us.
+ reflected := allowOrigin == origin
+
+ if !reflected {
+ return CORSFinding{}, false
+ }
+
+ return CORSFinding{
+ URL: targetURL,
+ OriginTested: origin,
+ AllowOrigin: allowOrigin,
+ AllowCredentials: allowCreds,
+ Severity: corsSeverity(allowCreds),
+ Note: note,
+ }, true
+}
+
+// corsSeverity ranks the finding: reflection + credentials lets an attacker read
+// authenticated responses, which is the high-impact case.
+func corsSeverity(allowCreds bool) string {
+ if allowCreds {
+ return "high"
+ }
+ return "medium"
+}
+
+func renderCORSSeverity(severity string) string {
+ if severity == "high" {
+ return output.SeverityHigh.Render(severity)
+ }
+ return output.SeverityMedium.Render(severity)
+}
+
+// ResultType identifies cors findings for the result registry.
+func (r *CORSResult) ResultType() string { return "cors" }
+
+var _ ScanResult = (*CORSResult)(nil)
diff --git a/internal/scan/cors_test.go b/internal/scan/cors_test.go
new file mode 100644
index 0000000..ac5afce
--- /dev/null
+++ b/internal/scan/cors_test.go
@@ -0,0 +1,140 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+: :
+: █▀ █ █▀▀ · Blazing-fast pentesting suite :
+: ▄█ █ █▀ · BSD 3-Clause License :
+: :
+: (c) 2022-2026 vmfunc, xyzeva, :
+: lunchcat alumni & contributors :
+: :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package scan
+
+import (
+ "net/http"
+ "net/http/httptest"
+ "testing"
+ "time"
+)
+
+// reflectingCORS echoes the Origin into Access-Control-Allow-Origin and sets
+// credentials, the exploitable misconfiguration.
+func reflectingCORS() *httptest.Server {
+ return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ if origin := r.Header.Get("Origin"); origin != "" {
+ w.Header().Set("Access-Control-Allow-Origin", origin)
+ w.Header().Set("Access-Control-Allow-Credentials", "true")
+ }
+ w.WriteHeader(http.StatusOK)
+ }))
+}
+
+func TestCORS_ReflectsArbitraryOrigin(t *testing.T) {
+ srv := reflectingCORS()
+ defer srv.Close()
+
+ result, err := CORS(srv.URL, 5*time.Second, 3, "")
+ if err != nil {
+ t.Fatalf("CORS: %v", err)
+ }
+ if result == nil || len(result.Findings) == 0 {
+ t.Fatalf("expected cors findings on reflecting server, got %+v", result)
+ }
+
+ // the reflecting server echoes every crafted origin with credentials,
+ // so each finding should be high severity.
+ var sawEvil bool
+ for _, f := range result.Findings {
+ if f.OriginTested == corsEvilOrigin {
+ sawEvil = true
+ if !f.AllowCredentials {
+ t.Errorf("expected credentials flagged for evil origin, got %+v", f)
+ }
+ if f.Severity != "high" {
+ t.Errorf("expected high severity for reflection+creds, got %s", f.Severity)
+ }
+ }
+ }
+ if !sawEvil {
+ t.Errorf("expected the sentinel evil origin to be reflected, got %+v", result.Findings)
+ }
+}
+
+func TestCORS_SeverityWithoutCredentials(t *testing.T) {
+ // reflects the origin but never grants credentials - medium, not high.
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ if origin := r.Header.Get("Origin"); origin != "" {
+ w.Header().Set("Access-Control-Allow-Origin", origin)
+ }
+ w.WriteHeader(http.StatusOK)
+ }))
+ defer srv.Close()
+
+ result, err := CORS(srv.URL, 5*time.Second, 3, "")
+ if err != nil {
+ t.Fatalf("CORS: %v", err)
+ }
+ if result == nil || len(result.Findings) == 0 {
+ t.Fatalf("expected reflection findings, got %+v", result)
+ }
+ for _, f := range result.Findings {
+ if f.AllowCredentials {
+ t.Errorf("did not expect credentials, got %+v", f)
+ }
+ if f.Severity != "medium" {
+ t.Errorf("expected medium severity without creds, got %s", f.Severity)
+ }
+ }
+}
+
+func TestCORS_NoFalsePositiveOnSafeServer(t *testing.T) {
+ tests := []struct {
+ name string
+ handler http.HandlerFunc
+ }{
+ {
+ name: "ignores origin entirely",
+ handler: func(w http.ResponseWriter, _ *http.Request) {
+ w.WriteHeader(http.StatusOK)
+ },
+ },
+ {
+ name: "returns its own fixed origin",
+ handler: func(w http.ResponseWriter, _ *http.Request) {
+ w.Header().Set("Access-Control-Allow-Origin", "https://trusted.example.com")
+ w.WriteHeader(http.StatusOK)
+ },
+ },
+ {
+ name: "plain wildcard, no credentials",
+ handler: func(w http.ResponseWriter, _ *http.Request) {
+ w.Header().Set("Access-Control-Allow-Origin", "*")
+ w.WriteHeader(http.StatusOK)
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ srv := httptest.NewServer(tt.handler)
+ defer srv.Close()
+
+ result, err := CORS(srv.URL, 5*time.Second, 3, "")
+ if err != nil {
+ t.Fatalf("CORS: %v", err)
+ }
+ if result != nil && len(result.Findings) > 0 {
+ t.Errorf("expected no findings on safe server, got %+v", result.Findings)
+ }
+ })
+ }
+}
+
+func TestCORSResult_ResultType(t *testing.T) {
+ r := &CORSResult{}
+ if r.ResultType() != "cors" {
+ t.Errorf("expected result type 'cors', got %q", r.ResultType())
+ }
+}
diff --git a/internal/scan/crawl.go b/internal/scan/crawl.go
new file mode 100644
index 0000000..79a5859
--- /dev/null
+++ b/internal/scan/crawl.go
@@ -0,0 +1,137 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+: :
+: █▀ █ █▀▀ · Blazing-fast pentesting suite :
+: ▄█ █ █▀ · BSD 3-Clause License :
+: :
+: (c) 2022-2026 vmfunc, xyzeva, :
+: lunchcat alumni & contributors :
+: :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package scan
+
+import (
+ "fmt"
+ "net/url"
+ "sort"
+ "sync"
+ "time"
+
+ "github.com/gocolly/colly/v2"
+
+ "github.com/dropalldatabases/sif/internal/httpx"
+ "github.com/dropalldatabases/sif/internal/logger"
+ "github.com/dropalldatabases/sif/internal/output"
+)
+
+// CrawlResult holds the deduped set of urls discovered by the spider.
+type CrawlResult struct {
+ URLs []string `json:"urls"`
+}
+
+func (r *CrawlResult) ResultType() string { return "crawl" }
+
+// compile-time check so a result-type drift fails the build, not a run.
+var _ ScanResult = (*CrawlResult)(nil)
+
+// Crawl spiders the target up to depth, following same-host links/scripts/forms.
+// all traffic flows through the shared httpx client so proxy/headers/rate-limit
+// apply, and robots.txt is respected (colly honors it by default).
+func Crawl(targetURL string, depth int, timeout time.Duration, logdir string) (*CrawlResult, error) {
+ log := output.Module("CRAWL")
+ log.Start()
+
+ sanitizedURL := stripScheme(targetURL)
+
+ if logdir != "" {
+ if err := logger.WriteHeader(sanitizedURL, logdir, "web crawl"); err != nil {
+ log.Error("error creating log file: %v", err)
+ return nil, fmt.Errorf("create crawl log: %w", err)
+ }
+ }
+
+ // the host bounds the crawl; without it colly would wander the whole web.
+ parsed, err := url.Parse(targetURL)
+ if err != nil {
+ return nil, fmt.Errorf("parse target url %q: %w", targetURL, err)
+ }
+ host := parsed.Hostname()
+ if host == "" {
+ return nil, fmt.Errorf("target url %q has no host", targetURL)
+ }
+
+ collector := colly.NewCollector(
+ colly.MaxDepth(depth),
+ colly.AllowedDomains(host),
+ )
+ // reuse the shared client so proxy/cookie/-H/rate-limit are honored and the
+ // configured timeout applies to every fetch, robots.txt included.
+ collector.SetClient(httpx.Client(timeout))
+
+ // dedupe across the concurrent callbacks colly may fire.
+ var mu sync.Mutex
+ seen := make(map[string]struct{})
+
+ record := func(raw string) {
+ if raw == "" {
+ return
+ }
+ // keep the result set scoped to the target host; off-host assets
+ // (cdns, third-party links) are noise for an in-scope crawl.
+ if u, err := url.Parse(raw); err != nil || u.Hostname() != host {
+ return
+ }
+ mu.Lock()
+ if _, ok := seen[raw]; !ok {
+ seen[raw] = struct{}{}
+ log.Success("found: %s", output.Highlight.Render(raw))
+ if logdir != "" {
+ _ = logger.Write(sanitizedURL, logdir, raw+"\n")
+ }
+ }
+ mu.Unlock()
+ }
+
+ // links drive recursion; scripts/forms are recorded but not followed.
+ collector.OnHTML("a[href]", func(e *colly.HTMLElement) {
+ link := e.Request.AbsoluteURL(e.Attr("href"))
+ record(link)
+ // Visit enforces AllowedDomains/MaxDepth itself, so off-host or
+ // too-deep links are dropped without us re-checking.
+ _ = e.Request.Visit(link)
+ })
+ collector.OnHTML("script[src]", func(e *colly.HTMLElement) {
+ record(e.Request.AbsoluteURL(e.Attr("src")))
+ })
+ collector.OnHTML("form[action]", func(e *colly.HTMLElement) {
+ record(e.Request.AbsoluteURL(e.Attr("action")))
+ })
+
+ collector.OnError(func(_ *colly.Response, e error) {
+ // a single bad page shouldn't abort the crawl; note it and move on.
+ log.Warn("crawl error: %v", e)
+ })
+
+ if err := collector.Visit(targetURL); err != nil {
+ log.Error("crawl failed: %v", err)
+ return nil, fmt.Errorf("visit %q: %w", targetURL, err)
+ }
+ collector.Wait()
+
+ result := &CrawlResult{URLs: sortedKeys(seen)}
+
+ log.Complete(len(result.URLs), "urls")
+ return result, nil
+}
+
+// sortedKeys returns the map keys in a stable order so output is deterministic.
+func sortedKeys(set map[string]struct{}) []string {
+ keys := make([]string, 0, len(set))
+ for k := range set {
+ keys = append(keys, k)
+ }
+ sort.Strings(keys)
+ return keys
+}
diff --git a/internal/scan/crawl_test.go b/internal/scan/crawl_test.go
new file mode 100644
index 0000000..c0cc260
--- /dev/null
+++ b/internal/scan/crawl_test.go
@@ -0,0 +1,158 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+: :
+: █▀ █ █▀▀ · Blazing-fast pentesting suite :
+: ▄█ █ █▀ · BSD 3-Clause License :
+: :
+: (c) 2022-2026 vmfunc, xyzeva, :
+: lunchcat alumni & contributors :
+: :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package scan
+
+import (
+ "net/http"
+ "net/http/httptest"
+ "testing"
+ "time"
+)
+
+// crawlSite serves a small link graph:
+//
+// / -> links /a and an off-host page; references script.js, form action /submit
+// /a -> links /b
+// /b -> links /c (only reachable at depth 3)
+// /c -> leaf
+func crawlSite(t *testing.T) *httptest.Server {
+ t.Helper()
+
+ mux := http.NewServeMux()
+ // no robots restrictions; colly fetches this before crawling.
+ mux.HandleFunc("/robots.txt", func(w http.ResponseWriter, _ *http.Request) {
+ w.WriteHeader(http.StatusNotFound)
+ })
+ mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+ if r.URL.Path != "/" {
+ http.NotFound(w, r)
+ return
+ }
+ _, _ = w.Write([]byte(`
+ a
+ off
+
+
+ `))
+ })
+ mux.HandleFunc("/a", func(w http.ResponseWriter, _ *http.Request) {
+ _, _ = w.Write([]byte(`b`))
+ })
+ mux.HandleFunc("/b", func(w http.ResponseWriter, _ *http.Request) {
+ _, _ = w.Write([]byte(`c`))
+ })
+ mux.HandleFunc("/c", func(w http.ResponseWriter, _ *http.Request) {
+ _, _ = w.Write([]byte(`leaf`))
+ })
+
+ srv := httptest.NewServer(mux)
+ t.Cleanup(srv.Close)
+ return srv
+}
+
+func urlsContain(urls []string, want string) bool {
+ for i := 0; i < len(urls); i++ {
+ if urls[i] == want {
+ return true
+ }
+ }
+ return false
+}
+
+func TestCrawl_FindsLinkedPagesAndAssets(t *testing.T) {
+ srv := crawlSite(t)
+
+ result, err := Crawl(srv.URL, 3, 5*time.Second, "")
+ if err != nil {
+ t.Fatalf("Crawl: %v", err)
+ }
+
+ // links, scripts and forms must all be recorded, resolved to absolute urls.
+ wants := []string{
+ srv.URL + "/a",
+ srv.URL + "/b",
+ srv.URL + "/c",
+ srv.URL + "/script.js",
+ srv.URL + "/submit",
+ }
+ for _, w := range wants {
+ if !urlsContain(result.URLs, w) {
+ t.Errorf("expected crawl to find %q, got %v", w, result.URLs)
+ }
+ }
+
+ // AllowedDomains must keep the off-host link out of the result set.
+ if urlsContain(result.URLs, "https://off-host.example/x") {
+ t.Errorf("off-host link should be excluded, got %v", result.URLs)
+ }
+}
+
+func TestCrawl_RespectsDepth(t *testing.T) {
+ srv := crawlSite(t)
+
+ // depth 1: only links found on the root page (/a, /script.js, /submit) are
+ // recorded; /b lives one hop deeper and must not appear.
+ result, err := Crawl(srv.URL, 1, 5*time.Second, "")
+ if err != nil {
+ t.Fatalf("Crawl: %v", err)
+ }
+
+ if !urlsContain(result.URLs, srv.URL+"/a") {
+ t.Errorf("depth 1 should find /a, got %v", result.URLs)
+ }
+ if urlsContain(result.URLs, srv.URL+"/b") {
+ t.Errorf("depth 1 must not reach /b, got %v", result.URLs)
+ }
+ if urlsContain(result.URLs, srv.URL+"/c") {
+ t.Errorf("depth 1 must not reach /c, got %v", result.URLs)
+ }
+}
+
+func TestCrawl_Dedupes(t *testing.T) {
+ // a page that links the same target twice must yield a single entry.
+ mux := http.NewServeMux()
+ mux.HandleFunc("/robots.txt", func(w http.ResponseWriter, _ *http.Request) {
+ w.WriteHeader(http.StatusNotFound)
+ })
+ mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+ if r.URL.Path == "/dup" {
+ _, _ = w.Write([]byte(`leaf`))
+ return
+ }
+ _, _ = w.Write([]byte(`12`))
+ })
+ srv := httptest.NewServer(mux)
+ defer srv.Close()
+
+ result, err := Crawl(srv.URL, 2, 5*time.Second, "")
+ if err != nil {
+ t.Fatalf("Crawl: %v", err)
+ }
+
+ count := 0
+ for _, u := range result.URLs {
+ if u == srv.URL+"/dup" {
+ count++
+ }
+ }
+ if count != 1 {
+ t.Errorf("expected /dup once after dedupe, got %d in %v", count, result.URLs)
+ }
+}
+
+func TestCrawl_ResultType(t *testing.T) {
+ r := &CrawlResult{}
+ if r.ResultType() != "crawl" {
+ t.Errorf("ResultType = %q, want crawl", r.ResultType())
+ }
+}
diff --git a/internal/scan/integration_test.go b/internal/scan/integration_test.go
index 7894a7f..9e0072f 100644
--- a/internal/scan/integration_test.go
+++ b/internal/scan/integration_test.go
@@ -65,6 +65,32 @@ func newVulnApp() *httptest.Server {
w.Write([]byte("phpMyAdmin"))
})
+ // reflecting-origin endpoint for the cors probe
+ mux.HandleFunc("/cors", func(w http.ResponseWriter, r *http.Request) {
+ if origin := r.Header.Get("Origin"); origin != "" {
+ w.Header().Set("Access-Control-Allow-Origin", origin)
+ w.Header().Set("Access-Control-Allow-Credentials", "true")
+ }
+ w.WriteHeader(http.StatusOK)
+ })
+
+ // open-redirect endpoint: echoes the next param into Location
+ mux.HandleFunc("/redirect", func(w http.ResponseWriter, r *http.Request) {
+ if next := r.URL.Query().Get("next"); next != "" {
+ w.Header().Set("Location", next)
+ w.WriteHeader(http.StatusFound)
+ return
+ }
+ w.WriteHeader(http.StatusOK)
+ })
+
+ // reflecting endpoint for the xss probe: echoes q raw into html text
+ mux.HandleFunc("/xss", func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "text/html")
+ //nolint:gosec // deliberate reflected-xss fixture for the probe under test
+ w.Write([]byte("" + r.URL.Query().Get("q") + "
"))
+ })
+
// homepage doubles as the cms fingerprint and the lfi sink
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/" {
@@ -180,6 +206,45 @@ func TestIntegrationLFI(t *testing.T) {
}
}
+func TestIntegrationCORS(t *testing.T) {
+ srv := newVulnApp()
+ defer srv.Close()
+
+ result, err := CORS(srv.URL+"/cors", 5*time.Second, 3, "")
+ if err != nil {
+ t.Fatalf("CORS: %v", err)
+ }
+ if result == nil || len(result.Findings) == 0 {
+ t.Fatalf("expected a cors finding from the reflecting endpoint, got %+v", result)
+ }
+}
+
+func TestIntegrationRedirect(t *testing.T) {
+ srv := newVulnApp()
+ defer srv.Close()
+
+ result, err := Redirect(srv.URL+"/redirect", 5*time.Second, 4, "")
+ if err != nil {
+ t.Fatalf("Redirect: %v", err)
+ }
+ if result == nil || len(result.Findings) == 0 {
+ t.Fatalf("expected an open-redirect finding from the next sink, got %+v", result)
+ }
+}
+
+func TestIntegrationXSS(t *testing.T) {
+ srv := newVulnApp()
+ defer srv.Close()
+
+ result, err := XSS(srv.URL+"/xss", 5*time.Second, 4, "")
+ if err != nil {
+ t.Fatalf("XSS: %v", err)
+ }
+ if result == nil || len(result.Findings) == 0 {
+ t.Fatalf("expected a reflected-xss finding from the q sink, got %+v", result)
+ }
+}
+
func TestIntegrationPorts(t *testing.T) {
// a real listener stands in for an open port; a tiny server hands its number
// to Ports via the commonPorts wordlist.
diff --git a/internal/scan/js/endpoints.go b/internal/scan/js/endpoints.go
new file mode 100644
index 0000000..4dcf26d
--- /dev/null
+++ b/internal/scan/js/endpoints.go
@@ -0,0 +1,128 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+: :
+: █▀ █ █▀▀ · Blazing-fast pentesting suite :
+: ▄█ █ █▀ · BSD 3-Clause License :
+: :
+: (c) 2022-2026 vmfunc, xyzeva, :
+: lunchcat alumni & contributors :
+: :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package js
+
+import (
+ "net/url"
+ "regexp"
+ "slices"
+ "strings"
+
+ urlutil "github.com/projectdiscovery/utils/url"
+)
+
+// endpointRegex is a linkfinder-style matcher for quoted paths and urls inside
+// js: full http(s) urls, root-relative (/api/...) and dotted-relative paths,
+// plus bare api-ish words with an extension. the inner alternation lives in a
+// single capture group so FindAllStringSubmatch hands back just the value.
+var endpointRegex = regexp.MustCompile(`["'\x60]` +
+ `(` +
+ `(?:https?:)?//[^\s"'\x60]{2,}` + // protocol-relative or absolute url
+ `|` +
+ `/[A-Za-z0-9_\-./]+(?:\?[^\s"'\x60]*)?` + // root-relative path
+ `|` +
+ `\.{1,2}/[A-Za-z0-9_\-./]+(?:\?[^\s"'\x60]*)?` + // dotted-relative path
+ `)` +
+ `["'\x60]`)
+
+// shortest thing we'll treat as an endpoint; below this it's almost always
+// noise like "/" or a single slash-prefixed letter.
+const minEndpointLen = 3
+
+// mime types slip through the path regex (text/html, application/json, ...) but
+// are never endpoints, so they're filtered out by their top-level type.
+var mimePrefixes = []string{
+ "text/", "image/", "audio/", "video/", "font/",
+ "application/", "multipart/", "model/", "message/",
+}
+
+// ExtractEndpoints pulls candidate paths and urls out of a script body, dedupes
+// them, drops obvious noise, and resolves relatives against baseURL so callers
+// get absolute targets where possible. a baseURL that won't parse just leaves
+// relatives as-is rather than failing the whole scan.
+func ExtractEndpoints(content, baseURL string) []string {
+ groups := endpointRegex.FindAllStringSubmatch(content, -1)
+ if len(groups) == 0 {
+ return nil
+ }
+
+ base, baseErr := urlutil.Parse(baseURL)
+
+ endpoints := make([]string, 0, len(groups))
+ seen := make(map[string]struct{}, len(groups))
+ for i := 0; i < len(groups); i++ {
+ candidate := strings.TrimSpace(groups[i][1])
+ if !isEndpoint(candidate) {
+ continue
+ }
+
+ resolved := candidate
+ // only relatives need resolving, and only if the base parsed cleanly.
+ if baseErr == nil && base.URL != nil && isRelative(candidate) {
+ resolved = resolveRelative(base.URL, candidate)
+ }
+
+ if _, ok := seen[resolved]; ok {
+ continue
+ }
+ seen[resolved] = struct{}{}
+ endpoints = append(endpoints, resolved)
+ }
+
+ slices.Sort(endpoints)
+ return endpoints
+}
+
+// isEndpoint filters out the junk that the broad regex inevitably catches:
+// too-short fragments, mime types, and single dotted words with no path.
+func isEndpoint(s string) bool {
+ if len(s) < minEndpointLen {
+ return false
+ }
+
+ lower := strings.ToLower(s)
+ for i := 0; i < len(mimePrefixes); i++ {
+ // a mime type is "type/subtype" with no further path; an api route like
+ // /application/users has a leading slash, so anchor on the bare prefix.
+ if strings.HasPrefix(lower, mimePrefixes[i]) && !strings.HasPrefix(lower, "/") {
+ return false
+ }
+ }
+
+ // reject "word" or "a.b" with no slash at all: not a path, just a token.
+ if !strings.Contains(s, "/") {
+ return false
+ }
+
+ return true
+}
+
+// isRelative reports whether candidate lacks a scheme/host and so needs the
+// base url to become absolute. protocol-relative (//host) and absolute urls
+// are left untouched.
+func isRelative(candidate string) bool {
+ if strings.HasPrefix(candidate, "//") {
+ return false
+ }
+ return !strings.HasPrefix(candidate, "http://") && !strings.HasPrefix(candidate, "https://")
+}
+
+// resolveRelative turns a relative path into an absolute url against base using
+// the stdlib reference resolver; if the ref won't parse we keep the original.
+func resolveRelative(base *url.URL, ref string) string {
+ parsed, err := url.Parse(ref)
+ if err != nil {
+ return ref
+ }
+ return base.ResolveReference(parsed).String()
+}
diff --git a/internal/scan/js/endpoints_test.go b/internal/scan/js/endpoints_test.go
new file mode 100644
index 0000000..3d64923
--- /dev/null
+++ b/internal/scan/js/endpoints_test.go
@@ -0,0 +1,106 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+: :
+: █▀ █ █▀▀ · Blazing-fast pentesting suite :
+: ▄█ █ █▀ · BSD 3-Clause License :
+: :
+: (c) 2022-2026 vmfunc, xyzeva, :
+: lunchcat alumni & contributors :
+: :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package js
+
+import (
+ "slices"
+ "testing"
+)
+
+func TestExtractEndpoints(t *testing.T) {
+ const base = "https://example.com/static/app.js"
+
+ tests := []struct {
+ name string
+ content string
+ wantSome []string // each must appear in the result
+ wantAbsent []string // none of these may appear
+ }{
+ {
+ name: "root-relative api path resolves to absolute",
+ content: `fetch("/api/users")`,
+ wantSome: []string{"https://example.com/api/users"},
+ },
+ {
+ name: "absolute url passes through untouched",
+ content: `const u = "https://api.example.org/v1/login";`,
+ wantSome: []string{"https://api.example.org/v1/login"},
+ },
+ {
+ name: "dotted-relative path resolves against base dir",
+ content: `import("./chunks/main.js")`,
+ wantSome: []string{"https://example.com/static/chunks/main.js"},
+ },
+ {
+ name: "query string is preserved",
+ content: `axios.get("/api/search?q=test")`,
+ wantSome: []string{"https://example.com/api/search?q=test"},
+ },
+ {
+ name: "mime types are filtered out",
+ content: `headers["Content-Type"] = "application/json"; var t = "text/html";`,
+ wantAbsent: []string{"application/json", "text/html"},
+ },
+ {
+ name: "single words without a slash are ignored",
+ content: `var x = "hello"; var y = "world";`,
+ wantAbsent: []string{"hello", "world"},
+ },
+ {
+ name: "multiple endpoints deduped",
+ content: `fetch("/api/users"); fetch("/api/users"); fetch("/api/posts");`,
+ wantSome: []string{
+ "https://example.com/api/users",
+ "https://example.com/api/posts",
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := ExtractEndpoints(tt.content, base)
+
+ for _, want := range tt.wantSome {
+ if !slices.Contains(got, want) {
+ t.Errorf("expected %q in %v", want, got)
+ }
+ }
+ for _, absent := range tt.wantAbsent {
+ if slices.Contains(got, absent) {
+ t.Errorf("did not expect %q in %v", absent, got)
+ }
+ }
+ })
+ }
+}
+
+func TestExtractEndpointsDedupes(t *testing.T) {
+ got := ExtractEndpoints(`fetch("/api/x"); fetch("/api/x");`, "https://example.com/app.js")
+ count := 0
+ for i := 0; i < len(got); i++ {
+ if got[i] == "https://example.com/api/x" {
+ count++
+ }
+ }
+ if count != 1 {
+ t.Fatalf("expected /api/x once, got %d times in %v", count, got)
+ }
+}
+
+func TestExtractEndpointsBadBaseKeepsRelatives(t *testing.T) {
+ // a base url that won't parse must not drop findings; relatives stay as-is.
+ got := ExtractEndpoints(`fetch("/api/users")`, "::not a url::")
+ if !slices.Contains(got, "/api/users") {
+ t.Errorf("expected relative /api/users preserved, got %v", got)
+ }
+}
diff --git a/internal/scan/js/scan.go b/internal/scan/js/scan.go
index 519b11e..2cc3981 100644
--- a/internal/scan/js/scan.go
+++ b/internal/scan/js/scan.go
@@ -32,6 +32,8 @@ import (
type JavascriptScanResult struct {
SupabaseResults []supabaseScanResult `json:"supabase_results"`
FoundEnvironmentVars map[string]string `json:"environment_variables"`
+ SecretMatches []SecretMatch `json:"secret_matches"`
+ Endpoints []string `json:"endpoints"`
}
// ResultType implements the ScanResult interface.
@@ -116,6 +118,11 @@ func JavascriptScan(url string, timeout time.Duration, threads int, logdir strin
log.Info("Got %d scripts, now running scans on them", len(scripts))
supabaseResults := make([]supabaseScanResult, 0, len(scripts))
+ secretMatches := make([]SecretMatch, 0)
+ endpoints := make([]string, 0)
+ // dedupe secrets and endpoints across every script, not just within one.
+ seenSecrets := make(map[string]struct{})
+ seenEndpoints := make(map[string]struct{})
for _, script := range scripts {
charmlog.Debugf("Scanning %s", script)
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, script, http.NoBody)
@@ -147,16 +154,41 @@ func JavascriptScan(url string, timeout time.Duration, threads int, logdir strin
if scriptSupabaseResults != nil {
supabaseResults = append(supabaseResults, scriptSupabaseResults...)
}
+
+ // reuse the same script buffer for credential and endpoint extraction.
+ for _, match := range ScanSecrets(content, script) {
+ key := match.Rule + "\x00" + match.Match
+ if _, ok := seenSecrets[key]; ok {
+ continue
+ }
+ seenSecrets[key] = struct{}{}
+ secretMatches = append(secretMatches, match)
+ log.Warn("found %s in %s", match.Rule, script)
+ }
+
+ for _, endpoint := range ExtractEndpoints(content, script) {
+ if _, ok := seenEndpoints[endpoint]; ok {
+ continue
+ }
+ seenEndpoints[endpoint] = struct{}{}
+ endpoints = append(endpoints, endpoint)
+ }
}
spin.Stop()
+ if len(endpoints) > 0 {
+ log.Info("extracted %d endpoints", len(endpoints))
+ }
+
result := JavascriptScanResult{
SupabaseResults: supabaseResults,
FoundEnvironmentVars: map[string]string{},
+ SecretMatches: secretMatches,
+ Endpoints: endpoints,
}
- log.Complete(len(supabaseResults), "found")
+ log.Complete(len(supabaseResults)+len(secretMatches)+len(endpoints), "found")
return &result, nil
}
diff --git a/internal/scan/js/secrets.go b/internal/scan/js/secrets.go
new file mode 100644
index 0000000..892b5fb
--- /dev/null
+++ b/internal/scan/js/secrets.go
@@ -0,0 +1,171 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+: :
+: █▀ █ █▀▀ · Blazing-fast pentesting suite :
+: ▄█ █ █▀ · BSD 3-Clause License :
+: :
+: (c) 2022-2026 vmfunc, xyzeva, :
+: lunchcat alumni & contributors :
+: :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package js
+
+import (
+ "math"
+ "regexp"
+ "strings"
+)
+
+// SecretMatch is one credential the scanner pulled out of a script.
+type SecretMatch struct {
+ Rule string `json:"rule"`
+ Match string `json:"match"`
+ Source string `json:"source"`
+}
+
+// entropy thresholds gate the noisy generic rules: provider-prefixed keys are
+// trustworthy on their own, but a bare apikey="..." or a loose token blob is
+// only worth reporting once its shannon entropy clears the bar for "this looks
+// random, not an english word". secrets sit higher than the pem/aws-secret bar
+// because the generic capture groups also catch ordinary identifiers.
+const (
+ genericMinEntropy = 3.5
+ awsSecretMinEntropy = 3.0
+ // rules with no entropy requirement (prefix is already unique enough).
+ noEntropyGate = 0.0
+)
+
+// secretRules is the credential regex bank. the matching group (or the whole
+// match when there's no group) is what gets reported; minEntropy gates the
+// generic high-entropy rules so we don't flag every short literal.
+var secretRules = []struct {
+ name string
+ re *regexp.Regexp
+ minEntropy float64
+}{
+ {
+ // aws access key ids are fixed-shape and unmistakable.
+ name: "aws access key id",
+ re: regexp.MustCompile(`\b((?:AKIA|ABIA|ACCA|ASIA)[0-9A-Z]{16})\b`),
+ minEntropy: noEntropyGate,
+ },
+ {
+ // aws secret keys are 40-char base64-ish blobs; gate on entropy since the
+ // shape alone matches plenty of innocent strings.
+ name: "aws secret access key",
+ re: regexp.MustCompile(`\b((?:aws_secret_access_key|aws_secret|secret_key)["']?\s*[:=]\s*["']?)([A-Za-z0-9/+]{40})\b`),
+ minEntropy: awsSecretMinEntropy,
+ },
+ {
+ // github personal/oauth/server/refresh/app tokens share the ghX_ prefix.
+ name: "github token",
+ re: regexp.MustCompile(`\b((?:ghp|gho|ghu|ghs|ghr)_[0-9A-Za-z]{36,255})\b`),
+ minEntropy: noEntropyGate,
+ },
+ {
+ // slack bot/user/app/legacy tokens.
+ name: "slack token",
+ re: regexp.MustCompile(`\b(xox[baprs]-[0-9A-Za-z-]{10,})\b`),
+ minEntropy: noEntropyGate,
+ },
+ {
+ // stripe live secret and publishable keys (test keys are not findings).
+ name: "stripe live key",
+ re: regexp.MustCompile(`\b([sp]k_live_[0-9A-Za-z]{16,})\b`),
+ minEntropy: noEntropyGate,
+ },
+ {
+ // google api keys are a fixed AIza-prefixed 39-char shape.
+ name: "google api key",
+ re: regexp.MustCompile(`\b(AIza[0-9A-Za-z_-]{35})\b`),
+ minEntropy: noEntropyGate,
+ },
+ {
+ // pem private key blocks; the header alone is the smoking gun.
+ name: "private key",
+ re: regexp.MustCompile(`-{5}BEGIN (?:RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY-{5}`),
+ minEntropy: noEntropyGate,
+ },
+ {
+ // generic apikey/secret/token = "" assignments; the value is in
+ // group 2 and only reported if it looks random (entropy gate).
+ name: "generic secret assignment",
+ re: regexp.MustCompile(`(?i)\b(api[_-]?key|secret|token|password|passwd|auth)["']?\s*[:=]\s*["']([0-9A-Za-z\-._~+/]{16,})["']`),
+ minEntropy: genericMinEntropy,
+ },
+}
+
+// the value capture group lives at index 2 for the rules that prefix the
+// keyword; index 0 (whole match) is used otherwise.
+const (
+ valueGroupIndex = 2
+ wholeMatchIndex = 0
+)
+
+// ScanSecrets runs the regex bank over a script body and returns every gated
+// match, deduped within this one source. srcURL is recorded on each find.
+func ScanSecrets(content, srcURL string) []SecretMatch {
+ matches := make([]SecretMatch, 0)
+ seen := make(map[string]struct{})
+
+ for i := 0; i < len(secretRules); i++ {
+ rule := secretRules[i]
+ groups := rule.re.FindAllStringSubmatch(content, -1)
+ for j := 0; j < len(groups); j++ {
+ value := secretValue(groups[j])
+ if value == "" {
+ continue
+ }
+
+ // entropy gate weeds out english-y identifiers for the generic rules;
+ // prefixed rules pass with a zero threshold.
+ if rule.minEntropy > noEntropyGate && shannonEntropy(value) < rule.minEntropy {
+ continue
+ }
+
+ // dedupe per source so a key referenced twice is one finding.
+ key := rule.name + "\x00" + value
+ if _, ok := seen[key]; ok {
+ continue
+ }
+ seen[key] = struct{}{}
+
+ matches = append(matches, SecretMatch{Rule: rule.name, Match: value, Source: srcURL})
+ }
+ }
+
+ return matches
+}
+
+// secretValue returns the reported portion of a regex match: the dedicated
+// value group when the rule captures one, otherwise the whole match.
+func secretValue(groups []string) string {
+ if len(groups) > valueGroupIndex && groups[valueGroupIndex] != "" {
+ return groups[valueGroupIndex]
+ }
+ return strings.TrimSpace(groups[wholeMatchIndex])
+}
+
+// shannonEntropy is the per-character shannon entropy (bits) of s, used to tell
+// random-looking secrets apart from plain words. empty input is zero entropy.
+func shannonEntropy(s string) float64 {
+ if s == "" {
+ return 0
+ }
+
+ counts := make(map[rune]int)
+ for _, r := range s {
+ counts[r]++
+ }
+
+ length := float64(len([]rune(s)))
+ var entropy float64
+ for _, count := range counts {
+ p := float64(count) / length
+ entropy -= p * math.Log2(p)
+ }
+
+ return entropy
+}
diff --git a/internal/scan/js/secrets_test.go b/internal/scan/js/secrets_test.go
new file mode 100644
index 0000000..e4b7807
--- /dev/null
+++ b/internal/scan/js/secrets_test.go
@@ -0,0 +1,160 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+: :
+: █▀ █ █▀▀ · Blazing-fast pentesting suite :
+: ▄█ █ █▀ · BSD 3-Clause License :
+: :
+: (c) 2022-2026 vmfunc, xyzeva, :
+: lunchcat alumni & contributors :
+: :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package js
+
+import (
+ "fmt"
+ "testing"
+)
+
+// the fake tokens below are assembled from two fragments on purpose: a contiguous
+// provider token literal in a committed file trips github push-protection (and
+// every other secret scanner) even though it's a test fixture. splitting it
+// keeps the literal out of source while ScanSecrets still sees the joined value.
+const (
+ fakeAWSKey = "AKIA" + "IOSFODNN7EXAMPLE"
+ fakeAWSSecret = "wJalrXUtnFEMI/K7MDENG/" + "bPxRfiCYEXAMPLEKEY"
+ fakeGitHub = "ghp_" + "aB3dEfGh1jKlMn0pQrStUvWxYz012345abcd"
+ fakeSlack = "xoxb-" + "123456789012-abcdefABCDEF1234567890ab"
+ fakeStripe = "sk_live_" + "4eC39HqLyjWDarjtT1zdp7dc"
+ fakeGoogle = "AIza" + "SyA1B2C3D4E5F6G7H8I9J0K1L2M3N4O5P6Q"
+ fakeGeneric = "x9Kq2Lm7Pz4Rt6Wv8Bn3Cd5Fg1Hj0As"
+ fakePEM = "-----BEGIN RSA PRIVATE " + "KEY-----\nMIIEpAIB..."
+)
+
+func TestScanSecrets(t *testing.T) {
+ tests := []struct {
+ name string
+ content string
+ wantRule string // rule expected on the first match, "" means no match
+ wantNone bool
+ }{
+ {
+ name: "aws access key id",
+ content: fmt.Sprintf(`const k = %q;`, fakeAWSKey),
+ wantRule: "aws access key id",
+ },
+ {
+ name: "github personal token",
+ content: fmt.Sprintf(`token: %q`, fakeGitHub),
+ wantRule: "github token",
+ },
+ {
+ name: "slack bot token",
+ content: fmt.Sprintf(`slack=%q`, fakeSlack),
+ wantRule: "slack token",
+ },
+ {
+ name: "stripe live secret key",
+ content: fmt.Sprintf(`var sk = %q;`, fakeStripe),
+ wantRule: "stripe live key",
+ },
+ {
+ name: "google api key",
+ content: fmt.Sprintf(`apiKey: %q`, fakeGoogle),
+ wantRule: "google api key",
+ },
+ {
+ name: "pem private key header",
+ content: fakePEM,
+ wantRule: "private key",
+ },
+ {
+ name: "generic high-entropy api key assignment",
+ content: fmt.Sprintf(`apikey = %q`, fakeGeneric),
+ wantRule: "generic secret assignment",
+ },
+ {
+ name: "aws secret with entropy",
+ content: fmt.Sprintf(`aws_secret_access_key=%q`, fakeAWSSecret),
+ wantRule: "aws secret access key",
+ },
+ {
+ // low-entropy assignment is a placeholder, not a real secret.
+ name: "low entropy generic assignment not flagged",
+ content: `password = "aaaaaaaaaaaaaaaaaaaaaaaa"`,
+ wantNone: true,
+ },
+ {
+ // a repetitive placeholder is low-entropy and must not trip the gate.
+ name: "low entropy repeated pattern not flagged",
+ content: `token = "abababababababababababab"`,
+ wantNone: true,
+ },
+ {
+ name: "no secrets in plain code",
+ content: `function add(a, b) { return a + b; }`,
+ wantNone: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := ScanSecrets(tt.content, "https://example.com/app.js")
+
+ if tt.wantNone {
+ if len(got) != 0 {
+ t.Fatalf("expected no matches, got %+v", got)
+ }
+ return
+ }
+
+ if len(got) == 0 {
+ t.Fatalf("expected a %q match, got none", tt.wantRule)
+ }
+ if got[0].Rule != tt.wantRule {
+ t.Errorf("rule = %q, want %q", got[0].Rule, tt.wantRule)
+ }
+ if got[0].Match == "" {
+ t.Error("match value is empty")
+ }
+ if got[0].Source != "https://example.com/app.js" {
+ t.Errorf("source = %q, want the passed url", got[0].Source)
+ }
+ })
+ }
+}
+
+func TestScanSecretsDedupesWithinSource(t *testing.T) {
+ // the same key referenced twice in one file is one finding.
+ content := fmt.Sprintf(`a = %q; b = %q;`, fakeAWSKey, fakeAWSKey)
+ got := ScanSecrets(content, "https://example.com/app.js")
+ if len(got) != 1 {
+ t.Fatalf("expected 1 deduped match, got %d: %+v", len(got), got)
+ }
+}
+
+func TestShannonEntropy(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ // random-ish strings clear the generic gate, repetitive ones don't.
+ wantHigh bool
+ }{
+ {name: "empty is zero", input: "", wantHigh: false},
+ {name: "repeated char is low", input: "aaaaaaaaaaaaaaaa", wantHigh: false},
+ {name: "random blob is high", input: fakeGeneric, wantHigh: true},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := shannonEntropy(tt.input)
+ if tt.wantHigh && got < genericMinEntropy {
+ t.Errorf("entropy %f below generic gate %f", got, genericMinEntropy)
+ }
+ if !tt.wantHigh && got >= genericMinEntropy {
+ t.Errorf("entropy %f unexpectedly cleared generic gate %f", got, genericMinEntropy)
+ }
+ })
+ }
+}
diff --git a/internal/scan/passive.go b/internal/scan/passive.go
new file mode 100644
index 0000000..8d02ced
--- /dev/null
+++ b/internal/scan/passive.go
@@ -0,0 +1,266 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+: :
+: █▀ █ █▀▀ · Blazing-fast pentesting suite :
+: ▄█ █ █▀ · BSD 3-Clause License :
+: :
+: (c) 2022-2026 vmfunc, xyzeva, :
+: lunchcat alumni & contributors :
+: :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package scan
+
+import (
+ "bufio"
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+ "net/url"
+ "strings"
+ "time"
+
+ "github.com/dropalldatabases/sif/internal/httpx"
+ "github.com/dropalldatabases/sif/internal/logger"
+ "github.com/dropalldatabases/sif/internal/output"
+)
+
+// source base urls are vars so tests can repoint them at local fixtures. they
+// carry a trailing %s for the domain (or query) each source expects.
+var (
+ crtshBaseURL = "https://crt.sh/?q=%%25.%s&output=json"
+ certspotterBaseURL = "https://api.certspotter.com/v1/issuances?domain=%s&include_subdomains=true&expand=dns_names"
+ waybackBaseURL = "http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=text&fl=original&collapse=urlkey"
+)
+
+// cap the response we read from any one source so a hostile/huge feed can't
+// exhaust memory.
+const passiveMaxBytes = 25 * 1024 * 1024
+
+// PassiveResult holds passively-gathered subdomains and historical urls. all
+// data comes from third-party feeds; the target itself sees zero traffic.
+type PassiveResult struct {
+ Subdomains []string `json:"subdomains"`
+ URLs []string `json:"urls"`
+}
+
+func (r *PassiveResult) ResultType() string { return "passive" }
+
+// compile-time check so a result-type drift fails the build, not a run.
+var _ ScanResult = (*PassiveResult)(nil)
+
+// crtshEntry is one certificate record from crt.sh; name_value may itself hold
+// several newline-separated names.
+type crtshEntry struct {
+ NameValue string `json:"name_value"`
+}
+
+// certspotterEntry is one issuance from certspotter, expanded to dns names.
+type certspotterEntry struct {
+ DNSNames []string `json:"dns_names"`
+}
+
+// Passive performs keyless passive recon: subdomains from certificate
+// transparency feeds plus historical urls from the wayback machine. each source
+// fails independently so one feed being down doesn't sink the rest.
+func Passive(targetURL string, timeout time.Duration, logdir string) (*PassiveResult, error) {
+ log := output.Module("PASSIVE")
+ log.Start()
+
+ parsed, err := url.Parse(targetURL)
+ if err != nil {
+ return nil, fmt.Errorf("parse target url %q: %w", targetURL, err)
+ }
+ domain := parsed.Hostname()
+ if domain == "" {
+ return nil, fmt.Errorf("target url %q has no host", targetURL)
+ }
+
+ sanitizedURL := stripScheme(targetURL)
+ if logdir != "" {
+ if err := logger.WriteHeader(sanitizedURL, logdir, "passive recon"); err != nil {
+ log.Error("error creating log file: %v", err)
+ return nil, fmt.Errorf("create passive log: %w", err)
+ }
+ }
+
+ client := httpx.Client(timeout)
+ ctx := context.TODO()
+
+ subSet := make(map[string]struct{})
+ urlSet := make(map[string]struct{})
+
+ // crt.sh certificate transparency
+ if subs, err := fetchCrtsh(ctx, client, domain); err != nil {
+ log.Warn("crt.sh failed: %v", err)
+ } else {
+ addAll(subSet, subs)
+ }
+
+ // certspotter certificate transparency
+ if subs, err := fetchCertspotter(ctx, client, domain); err != nil {
+ log.Warn("certspotter failed: %v", err)
+ } else {
+ addAll(subSet, subs)
+ }
+
+ // wayback machine historical urls
+ if urls, err := fetchWayback(ctx, client, domain); err != nil {
+ log.Warn("wayback failed: %v", err)
+ } else {
+ addAll(urlSet, urls)
+ }
+
+ result := &PassiveResult{
+ Subdomains: sortedKeys(subSet),
+ URLs: sortedKeys(urlSet),
+ }
+
+ logPassiveResults(log, sanitizedURL, logdir, result)
+
+ log.Complete(len(result.Subdomains)+len(result.URLs), "discovered")
+ return result, nil
+}
+
+// fetchCrtsh pulls subdomains from crt.sh's certificate transparency json.
+func fetchCrtsh(ctx context.Context, client *http.Client, domain string) ([]string, error) {
+ body, err := passiveGET(ctx, client, fmt.Sprintf(crtshBaseURL, domain))
+ if err != nil {
+ return nil, err
+ }
+
+ var entries []crtshEntry
+ if err := json.Unmarshal(body, &entries); err != nil {
+ return nil, fmt.Errorf("parse crt.sh json: %w", err)
+ }
+
+ var names []string
+ for i := 0; i < len(entries); i++ {
+ // name_value can pack several names separated by newlines.
+ for _, name := range strings.Split(entries[i].NameValue, "\n") {
+ if host := normalizeHost(name); host != "" {
+ names = append(names, host)
+ }
+ }
+ }
+ return names, nil
+}
+
+// fetchCertspotter pulls subdomains from certspotter's keyless issuances feed.
+func fetchCertspotter(ctx context.Context, client *http.Client, domain string) ([]string, error) {
+ body, err := passiveGET(ctx, client, fmt.Sprintf(certspotterBaseURL, domain))
+ if err != nil {
+ return nil, err
+ }
+
+ var entries []certspotterEntry
+ if err := json.Unmarshal(body, &entries); err != nil {
+ return nil, fmt.Errorf("parse certspotter json: %w", err)
+ }
+
+ var names []string
+ for i := 0; i < len(entries); i++ {
+ for _, name := range entries[i].DNSNames {
+ if host := normalizeHost(name); host != "" {
+ names = append(names, host)
+ }
+ }
+ }
+ return names, nil
+}
+
+// fetchWayback pulls historical urls from the wayback machine cdx index, which
+// returns one original url per line.
+func fetchWayback(ctx context.Context, client *http.Client, domain string) ([]string, error) {
+ body, err := passiveGET(ctx, client, fmt.Sprintf(waybackBaseURL, domain))
+ if err != nil {
+ return nil, err
+ }
+
+ var urls []string
+ scanner := bufio.NewScanner(strings.NewReader(string(body)))
+ // historical urls can be long; give the scanner a generous line buffer.
+ scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
+ for scanner.Scan() {
+ line := strings.TrimSpace(scanner.Text())
+ if line != "" {
+ urls = append(urls, line)
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ return nil, fmt.Errorf("read wayback lines: %w", err)
+ }
+ return urls, nil
+}
+
+// passiveGET performs a bounded GET against a passive source. non-200 responses
+// are treated as a source failure so the caller can skip it.
+func passiveGET(ctx context.Context, client *http.Client, reqURL string) ([]byte, error) {
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, http.NoBody)
+ if err != nil {
+ return nil, fmt.Errorf("create request: %w", err)
+ }
+ req.Header.Set("Accept", "application/json")
+
+ resp, err := client.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("request failed: %w", err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ return nil, fmt.Errorf("unexpected status %d", resp.StatusCode)
+ }
+
+ body, err := io.ReadAll(io.LimitReader(resp.Body, passiveMaxBytes))
+ if err != nil {
+ return nil, fmt.Errorf("read response: %w", err)
+ }
+ return body, nil
+}
+
+// normalizeHost lowercases a name and strips a leading wildcard label so
+// "*.example.com" and "EXAMPLE.com" collapse to one canonical host.
+func normalizeHost(name string) string {
+ host := strings.ToLower(strings.TrimSpace(name))
+ host = strings.TrimPrefix(host, "*.")
+ return host
+}
+
+// addAll inserts every value into the dedupe set.
+func addAll(set map[string]struct{}, values []string) {
+ for _, v := range values {
+ set[v] = struct{}{}
+ }
+}
+
+func logPassiveResults(log *output.ModuleLogger, sanitizedURL, logdir string, result *PassiveResult) {
+ for _, sub := range result.Subdomains {
+ log.Success("subdomain: %s", output.Highlight.Render(sub))
+ }
+ for _, u := range result.URLs {
+ log.Info("url: %s", u)
+ }
+
+ if logdir == "" {
+ return
+ }
+
+ var sb strings.Builder
+ if len(result.Subdomains) > 0 {
+ sb.WriteString(fmt.Sprintf("Subdomains (%d):\n", len(result.Subdomains)))
+ for _, sub := range result.Subdomains {
+ sb.WriteString(" " + sub + "\n")
+ }
+ }
+ if len(result.URLs) > 0 {
+ sb.WriteString(fmt.Sprintf("\nHistorical URLs (%d):\n", len(result.URLs)))
+ for _, u := range result.URLs {
+ sb.WriteString(" " + u + "\n")
+ }
+ }
+ _ = logger.Write(sanitizedURL, logdir, sb.String())
+}
diff --git a/internal/scan/passive_test.go b/internal/scan/passive_test.go
new file mode 100644
index 0000000..c6fb200
--- /dev/null
+++ b/internal/scan/passive_test.go
@@ -0,0 +1,163 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+: :
+: █▀ █ █▀▀ · Blazing-fast pentesting suite :
+: ▄█ █ █▀ · BSD 3-Clause License :
+: :
+: (c) 2022-2026 vmfunc, xyzeva, :
+: lunchcat alumni & contributors :
+: :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package scan
+
+import (
+ "net/http"
+ "net/http/httptest"
+ "testing"
+ "time"
+)
+
+// sample feed payloads. crt.sh packs several names per name_value (newline
+// separated) and emits wildcards; certspotter returns expanded dns_names.
+const (
+ crtshFixture = `[
+ {"name_value": "www.example.com\n*.example.com"},
+ {"name_value": "api.example.com"},
+ {"name_value": "WWW.example.com"}
+ ]`
+ certspotterFixture = `[
+ {"dns_names": ["mail.example.com", "api.example.com"]},
+ {"dns_names": ["*.example.com"]}
+ ]`
+ waybackFixture = "http://example.com/\n" +
+ "http://example.com/login\n" +
+ "http://example.com/login\n" +
+ "\n" +
+ "http://example.com/admin\n"
+)
+
+// fixtureServer serves each passive source on its own path and repoints the
+// package base-url vars at it. the vars are restored on cleanup.
+func fixtureServer(t *testing.T, crtsh, certspotter, wayback string) *httptest.Server {
+ t.Helper()
+
+ mux := http.NewServeMux()
+ mux.HandleFunc("/crtsh", func(w http.ResponseWriter, _ *http.Request) {
+ _, _ = w.Write([]byte(crtsh))
+ })
+ mux.HandleFunc("/certspotter", func(w http.ResponseWriter, _ *http.Request) {
+ _, _ = w.Write([]byte(certspotter))
+ })
+ mux.HandleFunc("/wayback", func(w http.ResponseWriter, _ *http.Request) {
+ _, _ = w.Write([]byte(wayback))
+ })
+ srv := httptest.NewServer(mux)
+ t.Cleanup(srv.Close)
+
+ origCrtsh, origCertspotter, origWayback := crtshBaseURL, certspotterBaseURL, waybackBaseURL
+ // %s still consumes the domain so the production formatting path is exercised.
+ crtshBaseURL = srv.URL + "/crtsh?q=%s"
+ certspotterBaseURL = srv.URL + "/certspotter?domain=%s"
+ waybackBaseURL = srv.URL + "/wayback?url=%s"
+ t.Cleanup(func() {
+ crtshBaseURL, certspotterBaseURL, waybackBaseURL = origCrtsh, origCertspotter, origWayback
+ })
+
+ return srv
+}
+
+func TestPassive_ParsesAndDedupes(t *testing.T) {
+ fixtureServer(t, crtshFixture, certspotterFixture, waybackFixture)
+
+ result, err := Passive("https://example.com", 5*time.Second, "")
+ if err != nil {
+ t.Fatalf("Passive: %v", err)
+ }
+
+ // wildcards stripped, case-folded, and merged across both ct feeds.
+ wantSubs := map[string]bool{
+ "www.example.com": false,
+ "api.example.com": false,
+ "mail.example.com": false,
+ "example.com": false, // from "*.example.com"
+ }
+ for _, s := range result.Subdomains {
+ if _, ok := wantSubs[s]; !ok {
+ t.Errorf("unexpected subdomain %q", s)
+ continue
+ }
+ wantSubs[s] = true
+ }
+ for s, seen := range wantSubs {
+ if !seen {
+ t.Errorf("missing subdomain %q in %v", s, result.Subdomains)
+ }
+ }
+ if len(result.Subdomains) != len(wantSubs) {
+ t.Errorf("expected %d deduped subdomains, got %d: %v", len(wantSubs), len(result.Subdomains), result.Subdomains)
+ }
+
+ // wayback: blank line dropped, duplicate /login collapsed.
+ wantURLs := map[string]bool{
+ "http://example.com/": false,
+ "http://example.com/login": false,
+ "http://example.com/admin": false,
+ }
+ for _, u := range result.URLs {
+ if _, ok := wantURLs[u]; !ok {
+ t.Errorf("unexpected url %q", u)
+ continue
+ }
+ wantURLs[u] = true
+ }
+ if len(result.URLs) != len(wantURLs) {
+ t.Errorf("expected %d deduped urls, got %d: %v", len(wantURLs), len(result.URLs), result.URLs)
+ }
+}
+
+func TestPassive_SourceFailureIsIsolated(t *testing.T) {
+ // crt.sh serves garbage that fails to parse; the other feeds must still
+ // produce results.
+ fixtureServer(t, "not json", certspotterFixture, waybackFixture)
+
+ result, err := Passive("https://example.com", 5*time.Second, "")
+ if err != nil {
+ t.Fatalf("Passive should not fail when one source is down: %v", err)
+ }
+
+ if len(result.Subdomains) == 0 {
+ t.Error("expected certspotter subdomains despite crt.sh failure")
+ }
+ if len(result.URLs) == 0 {
+ t.Error("expected wayback urls despite crt.sh failure")
+ }
+ if urlsContain(result.Subdomains, "www.example.com") {
+ t.Error("crt.sh-only subdomain leaked despite parse failure")
+ }
+}
+
+func TestPassive_ResultType(t *testing.T) {
+ r := &PassiveResult{}
+ if r.ResultType() != "passive" {
+ t.Errorf("ResultType = %q, want passive", r.ResultType())
+ }
+}
+
+func TestNormalizeHost(t *testing.T) {
+ tests := []struct {
+ in string
+ want string
+ }{
+ {"www.example.com", "www.example.com"},
+ {"*.example.com", "example.com"},
+ {" WWW.Example.COM ", "www.example.com"},
+ {"", ""},
+ }
+ for _, tt := range tests {
+ if got := normalizeHost(tt.in); got != tt.want {
+ t.Errorf("normalizeHost(%q) = %q, want %q", tt.in, got, tt.want)
+ }
+ }
+}
diff --git a/internal/scan/redirect.go b/internal/scan/redirect.go
new file mode 100644
index 0000000..7597c31
--- /dev/null
+++ b/internal/scan/redirect.go
@@ -0,0 +1,305 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+: :
+: █▀ █ █▀▀ · Blazing-fast pentesting suite :
+: ▄█ █ █▀ · BSD 3-Clause License :
+: :
+: (c) 2022-2026 vmfunc, xyzeva, :
+: lunchcat alumni & contributors :
+: :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package scan
+
+import (
+ "context"
+ "fmt"
+ "io"
+ "net/http"
+ "net/url"
+ "regexp"
+ "strings"
+ "sync"
+ "time"
+
+ charmlog "github.com/charmbracelet/log"
+ "github.com/dropalldatabases/sif/internal/httpx"
+ "github.com/dropalldatabases/sif/internal/logger"
+ "github.com/dropalldatabases/sif/internal/output"
+)
+
+// RedirectResult collects every open-redirect found on the target.
+type RedirectResult struct {
+ Findings []RedirectFinding `json:"findings,omitempty"`
+ TestedParams int `json:"tested_params"`
+}
+
+// RedirectFinding is a single param/payload that sends the user off-site.
+type RedirectFinding struct {
+ URL string `json:"url"`
+ Parameter string `json:"parameter"`
+ Payload string `json:"payload"`
+ Location string `json:"location"`
+ Via string `json:"via"` // header, meta-refresh, or javascript
+ Severity string `json:"severity"`
+}
+
+// redirectMaxBody caps the body we scan for meta/js redirects (100KB).
+const redirectMaxBody = 1024 * 100
+
+// the controlled sentinel host we steer redirects toward; a Location that lands
+// on it proves the param is attacker-controlled.
+const redirectSentinel = "sif-redirect-probe.evil.com"
+
+// params that commonly drive a server-side redirect.
+var redirectParams = []string{
+ "url", "next", "redirect", "redirect_uri", "redirect_url",
+ "return", "return_url", "returnurl", "returnto", "return_to",
+ "dest", "destination", "continue", "goto", "go", "target",
+ "to", "out", "view", "image_url", "checkout_url", "rurl", "u",
+}
+
+// payload variants: a plain sentinel plus filter bypasses that browsers still
+// resolve as an absolute off-site target. {host} expands to the sentinel.
+var redirectPayloads = []string{
+ "https://{host}", // plain absolute
+ "//{host}", // scheme-relative
+ "https:/{host}", // missing slash, browsers normalise it
+ "https:{host}", // no slashes
+ "/\\{host}", // backslash trick
+ "/%2f%2f{host}", // encoded scheme-relative
+ "https://{host}%00.x.com", // null-byte truncation
+ "https://x.com@{host}", // userinfo confusion - real host is after @
+}
+
+// meta refresh redirect:
+var metaRefreshRe = regexp.MustCompile(`(?i)]+http-equiv=["']?refresh["']?[^>]+content=["'][^"']*url=([^"'>\s]+)`)
+
+// client-side redirects baked into a script body
+var jsRedirectRe = regexp.MustCompile(`(?i)(?:location\.(?:href|replace|assign)\s*(?:=|\()|window\.location\s*=)\s*["']([^"']+)["']`)
+
+// Redirect probes the target's redirect-prone params for open-redirect.
+func Redirect(targetURL string, timeout time.Duration, threads int, logdir string) (*RedirectResult, error) {
+ log := output.Module("REDIRECT")
+ log.Start()
+
+ spin := output.NewSpinner("Scanning for open redirects")
+ spin.Start()
+
+ sanitizedURL := stripScheme(targetURL)
+
+ if logdir != "" {
+ if err := logger.WriteHeader(sanitizedURL, logdir, "open redirect probe"); err != nil {
+ spin.Stop()
+ log.Error("error creating log file: %v", err)
+ return nil, fmt.Errorf("create redirect log: %w", err)
+ }
+ }
+
+ parsedURL, err := url.Parse(targetURL)
+ if err != nil {
+ spin.Stop()
+ return nil, fmt.Errorf("parse url: %w", err)
+ }
+ existingParams := parsedURL.Query()
+
+ // merge target's own params with the common redirect names so we cover both
+ paramsToTest := make(map[string]bool, len(existingParams)+len(redirectParams))
+ for param := range existingParams {
+ paramsToTest[param] = true
+ }
+ for _, param := range redirectParams {
+ paramsToTest[param] = true
+ }
+
+ // don't auto-follow: a 30x Location is exactly what we want to inspect.
+ client := httpx.Client(timeout)
+ client.CheckRedirect = func(_ *http.Request, _ []*http.Request) error {
+ return http.ErrUseLastResponse
+ }
+
+ result := &RedirectResult{
+ Findings: make([]RedirectFinding, 0, 8),
+ TestedParams: len(paramsToTest),
+ }
+
+ type workItem struct {
+ param string
+ payload string
+ }
+ workItems := make([]workItem, 0, len(paramsToTest)*len(redirectPayloads))
+ for param := range paramsToTest {
+ for _, raw := range redirectPayloads {
+ workItems = append(workItems, workItem{param: param, payload: strings.ReplaceAll(raw, "{host}", redirectSentinel)})
+ }
+ }
+
+ log.Info("testing %d params with %d payloads", len(paramsToTest), len(redirectPayloads))
+
+ workChan := make(chan workItem, len(workItems))
+ for _, item := range workItems {
+ workChan <- item
+ }
+ close(workChan)
+
+ seen := make(map[string]bool)
+ var mu sync.Mutex
+ var wg sync.WaitGroup
+
+ wg.Add(threads)
+ for t := 0; t < threads; t++ {
+ go func() {
+ defer wg.Done()
+ for item := range workChan {
+ testURL := buildRedirectURL(parsedURL, existingParams, item.param, item.payload)
+
+ location, via, ok := probeRedirect(client, testURL)
+ if !ok {
+ continue
+ }
+
+ key := item.param + "|" + item.payload
+ mu.Lock()
+ if seen[key] {
+ mu.Unlock()
+ continue
+ }
+ seen[key] = true
+ finding := RedirectFinding{
+ URL: testURL,
+ Parameter: item.param,
+ Payload: item.payload,
+ Location: location,
+ Via: via,
+ Severity: "medium",
+ }
+ result.Findings = append(result.Findings, finding)
+ mu.Unlock()
+
+ spin.Stop()
+ log.Warn("open redirect via %s in param %s -> %s",
+ output.SeverityMedium.Render(via),
+ output.Highlight.Render(item.param),
+ output.Status.Render(location))
+ spin.Start()
+
+ if logdir != "" {
+ logger.Write(sanitizedURL, logdir,
+ fmt.Sprintf("open redirect: param [%s] via %s -> [%s] (payload %s)\n",
+ item.param, via, location, item.payload))
+ }
+ }
+ }()
+ }
+ wg.Wait()
+
+ spin.Stop()
+
+ if len(result.Findings) == 0 {
+ log.Info("no open redirects detected")
+ log.Complete(0, "found")
+ return nil, nil //nolint:nilnil // no finding is not an error, mirrors the other scanners
+ }
+
+ log.Complete(len(result.Findings), "found")
+ return result, nil
+}
+
+// buildRedirectURL rebuilds the target with the payload injected into one param,
+// preserving the rest of the original query.
+func buildRedirectURL(parsedURL *url.URL, existing url.Values, param, payload string) string {
+ testParams := url.Values{}
+ for k, v := range existing {
+ if k != param {
+ testParams[k] = v
+ }
+ }
+ testParams.Set(param, payload)
+ return fmt.Sprintf("%s://%s%s?%s", parsedURL.Scheme, parsedURL.Host, parsedURL.Path, testParams.Encode())
+}
+
+// probeRedirect requests testURL and reports the first off-site redirect it
+// finds, whether that's a 30x Location header, a meta-refresh, or a js
+// location assignment. via names the channel; ok is false when nothing points
+// at the sentinel.
+func probeRedirect(client *http.Client, testURL string) (location, via string, ok bool) {
+ req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, testURL, http.NoBody)
+ if err != nil {
+ charmlog.Debugf("redirect: build request for %s: %v", testURL, err)
+ return "", "", false
+ }
+ resp, err := client.Do(req)
+ if err != nil {
+ charmlog.Debugf("redirect: request %s: %v", testURL, err)
+ return "", "", false
+ }
+ defer resp.Body.Close()
+
+ // header redirect: a 30x whose Location resolves to the sentinel host
+ if resp.StatusCode >= http.StatusMultipleChoices && resp.StatusCode < http.StatusBadRequest {
+ if loc := resp.Header.Get("Location"); pointsAtSentinel(loc) {
+ return loc, "header", true
+ }
+ }
+
+ // body redirects: meta refresh or a client-side location assignment
+ body, err := io.ReadAll(io.LimitReader(resp.Body, redirectMaxBody))
+ if err != nil {
+ return "", "", false
+ }
+ bodyStr := string(body)
+
+ if m := metaRefreshRe.FindStringSubmatch(bodyStr); len(m) > 1 && pointsAtSentinel(m[1]) {
+ return m[1], "meta-refresh", true
+ }
+ if m := jsRedirectRe.FindStringSubmatch(bodyStr); len(m) > 1 && pointsAtSentinel(m[1]) {
+ return m[1], "javascript", true
+ }
+
+ return "", "", false
+}
+
+// pointsAtSentinel reports whether a redirect target lands on our controlled
+// host. We resolve the value the way a browser would so scheme-relative ("//x")
+// and backslash tricks are caught, then compare hostnames - a sentinel that only
+// shows up in a path or query (still same-origin) is not a redirect off-site.
+func pointsAtSentinel(location string) bool {
+ if location == "" {
+ return false
+ }
+
+ // browsers treat backslashes in the authority as forward slashes
+ normalized := strings.ReplaceAll(location, "\\", "/")
+
+ parsed, err := url.Parse(normalized)
+ if err != nil {
+ // unparseable but still naming the sentinel as the leading authority is a hit
+ return strings.HasPrefix(strings.TrimLeft(normalized, "/:"), redirectSentinel)
+ }
+
+ // the resolved host is what the navigation actually targets
+ if strings.EqualFold(parsed.Hostname(), redirectSentinel) {
+ return true
+ }
+
+ // scheme-relative "//host" parses with an empty scheme but a populated host
+ if parsed.Host != "" && strings.EqualFold(stripPort(parsed.Host), redirectSentinel) {
+ return true
+ }
+
+ return false
+}
+
+// stripPort drops a trailing :port so host comparisons ignore it.
+func stripPort(host string) string {
+ if h, _, ok := strings.Cut(host, ":"); ok {
+ return h
+ }
+ return host
+}
+
+// ResultType identifies open-redirect findings for the result registry.
+func (r *RedirectResult) ResultType() string { return "redirect" }
+
+var _ ScanResult = (*RedirectResult)(nil)
diff --git a/internal/scan/redirect_test.go b/internal/scan/redirect_test.go
new file mode 100644
index 0000000..ddb83c5
--- /dev/null
+++ b/internal/scan/redirect_test.go
@@ -0,0 +1,163 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+: :
+: █▀ █ █▀▀ · Blazing-fast pentesting suite :
+: ▄█ █ █▀ · BSD 3-Clause License :
+: :
+: (c) 2022-2026 vmfunc, xyzeva, :
+: lunchcat alumni & contributors :
+: :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package scan
+
+import (
+ "net/http"
+ "net/http/httptest"
+ "testing"
+ "time"
+)
+
+func TestRedirect_HeaderLocation(t *testing.T) {
+ // echoes the "next" param straight into Location, the textbook open redirect.
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ if next := r.URL.Query().Get("next"); next != "" {
+ w.Header().Set("Location", next)
+ w.WriteHeader(http.StatusFound)
+ return
+ }
+ w.WriteHeader(http.StatusOK)
+ }))
+ defer srv.Close()
+
+ result, err := Redirect(srv.URL, 5*time.Second, 4, "")
+ if err != nil {
+ t.Fatalf("Redirect: %v", err)
+ }
+ if result == nil || len(result.Findings) == 0 {
+ t.Fatalf("expected open redirect findings, got %+v", result)
+ }
+
+ var sawHeader bool
+ for _, f := range result.Findings {
+ if f.Parameter == "next" && f.Via == "header" {
+ sawHeader = true
+ }
+ }
+ if !sawHeader {
+ t.Errorf("expected a header redirect via 'next', got %+v", result.Findings)
+ }
+}
+
+func TestRedirect_MetaRefresh(t *testing.T) {
+ // body-based redirect: a meta refresh pointing at the injected url.
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ dest := r.URL.Query().Get("url")
+ w.Header().Set("Content-Type", "text/html")
+ w.WriteHeader(http.StatusOK)
+ if dest != "" {
+ //nolint:gosec // deliberate open-redirect fixture for the probe under test
+ w.Write([]byte(``))
+ return
+ }
+ w.Write([]byte("home"))
+ }))
+ defer srv.Close()
+
+ result, err := Redirect(srv.URL, 5*time.Second, 4, "")
+ if err != nil {
+ t.Fatalf("Redirect: %v", err)
+ }
+ if result == nil {
+ t.Fatalf("expected meta-refresh findings, got nil")
+ }
+ var sawMeta bool
+ for _, f := range result.Findings {
+ if f.Via == "meta-refresh" {
+ sawMeta = true
+ }
+ }
+ if !sawMeta {
+ t.Errorf("expected a meta-refresh redirect finding, got %+v", result.Findings)
+ }
+}
+
+func TestRedirect_NoFalsePositive(t *testing.T) {
+ tests := []struct {
+ name string
+ handler http.HandlerFunc
+ }{
+ {
+ name: "never redirects",
+ handler: func(w http.ResponseWriter, _ *http.Request) {
+ w.WriteHeader(http.StatusOK)
+ w.Write([]byte("home"))
+ },
+ },
+ {
+ name: "only redirects to a fixed safe path",
+ handler: func(w http.ResponseWriter, _ *http.Request) {
+ // ignores the param, always sends users to its own login page.
+ w.Header().Set("Location", "/login")
+ w.WriteHeader(http.StatusFound)
+ },
+ },
+ {
+ name: "reflects param into body but not as a redirect",
+ handler: func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusOK)
+ // the value lands in plain text, no meta/js redirect mechanism.
+ //nolint:gosec // intentional reflection fixture; asserts no false positive
+ w.Write([]byte("you searched for " + r.URL.Query().Get("next") + "
"))
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ srv := httptest.NewServer(tt.handler)
+ defer srv.Close()
+
+ result, err := Redirect(srv.URL, 5*time.Second, 4, "")
+ if err != nil {
+ t.Fatalf("Redirect: %v", err)
+ }
+ if result != nil && len(result.Findings) > 0 {
+ t.Errorf("expected no findings, got %+v", result.Findings)
+ }
+ })
+ }
+}
+
+func TestPointsAtSentinel(t *testing.T) {
+ tests := []struct {
+ name string
+ location string
+ want bool
+ }{
+ {"absolute https", "https://" + redirectSentinel + "/path", true},
+ {"scheme-relative", "//" + redirectSentinel, true},
+ {"backslash trick", "/\\" + redirectSentinel, true},
+ {"with port", "https://" + redirectSentinel + ":443/", true},
+ {"empty", "", false},
+ {"same-site path", "/dashboard", false},
+ {"sentinel only in path", "https://safe.example.com/" + redirectSentinel, false},
+ {"sentinel only in query", "https://safe.example.com/?to=" + redirectSentinel, false},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if got := pointsAtSentinel(tt.location); got != tt.want {
+ t.Errorf("pointsAtSentinel(%q) = %v, want %v", tt.location, got, tt.want)
+ }
+ })
+ }
+}
+
+func TestRedirectResult_ResultType(t *testing.T) {
+ r := &RedirectResult{}
+ if r.ResultType() != "redirect" {
+ t.Errorf("expected result type 'redirect', got %q", r.ResultType())
+ }
+}
diff --git a/internal/scan/xss.go b/internal/scan/xss.go
new file mode 100644
index 0000000..8ccb98d
--- /dev/null
+++ b/internal/scan/xss.go
@@ -0,0 +1,342 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+: :
+: █▀ █ █▀▀ · Blazing-fast pentesting suite :
+: ▄█ █ █▀ · BSD 3-Clause License :
+: :
+: (c) 2022-2026 vmfunc, xyzeva, :
+: lunchcat alumni & contributors :
+: :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package scan
+
+import (
+ "context"
+ "fmt"
+ "io"
+ "net/http"
+ "net/url"
+ "strings"
+ "sync"
+ "time"
+
+ charmlog "github.com/charmbracelet/log"
+ "github.com/dropalldatabases/sif/internal/httpx"
+ "github.com/dropalldatabases/sif/internal/logger"
+ "github.com/dropalldatabases/sif/internal/output"
+)
+
+// XSSResult collects every likely reflected-xss point on the target.
+type XSSResult struct {
+ Findings []XSSFinding `json:"findings,omitempty"`
+ TestedParams int `json:"tested_params"`
+}
+
+// XSSFinding is a reflection where one or more breaking chars survived
+// unescaped in a context that makes injection plausible.
+type XSSFinding struct {
+ URL string `json:"url"`
+ Parameter string `json:"parameter"`
+ Context string `json:"context"` // html, attribute, or script
+ SurvivedRaw []string `json:"survived_raw"` // breaking chars echoed unescaped
+ Severity string `json:"severity"`
+}
+
+// xssMaxBody caps the body we scan for the canary (100KB).
+const xssMaxBody = 1024 * 100
+
+// canaryToken is a unique, alnum-only marker we can grep for unambiguously; it
+// survives every output encoder so a missing reflection means no echo at all.
+const canaryToken = "sifxss9173canary" //nolint:gosec // not a credential, just a reflection marker
+
+// the chars that let an attacker break out of a context; we inject the canary
+// wrapped in each and check which come back raw.
+var xssBreakChars = []string{"<", ">", "\"", "'", "`"}
+
+// params we test when the target carries none of its own.
+var xssParams = []string{
+ "q", "s", "search", "query", "id", "name", "page",
+ "keyword", "lang", "redirect", "url", "return", "ref",
+ "message", "msg", "error", "title", "text", "comment",
+}
+
+// XSS probes the target's params for reflected cross-site scripting.
+func XSS(targetURL string, timeout time.Duration, threads int, logdir string) (*XSSResult, error) {
+ log := output.Module("XSS")
+ log.Start()
+
+ spin := output.NewSpinner("Scanning for reflected XSS")
+ spin.Start()
+
+ sanitizedURL := stripScheme(targetURL)
+
+ if logdir != "" {
+ if err := logger.WriteHeader(sanitizedURL, logdir, "reflected XSS probe"); err != nil {
+ spin.Stop()
+ log.Error("error creating log file: %v", err)
+ return nil, fmt.Errorf("create xss log: %w", err)
+ }
+ }
+
+ parsedURL, err := url.Parse(targetURL)
+ if err != nil {
+ spin.Stop()
+ return nil, fmt.Errorf("parse url: %w", err)
+ }
+ existingParams := parsedURL.Query()
+
+ paramsToTest := make(map[string]bool, len(existingParams)+len(xssParams))
+ for param := range existingParams {
+ paramsToTest[param] = true
+ }
+ for _, param := range xssParams {
+ paramsToTest[param] = true
+ }
+
+ client := httpx.Client(timeout)
+ client.CheckRedirect = func(_ *http.Request, via []*http.Request) error {
+ if len(via) >= corsMaxRedirects {
+ return http.ErrUseLastResponse
+ }
+ return nil
+ }
+
+ result := &XSSResult{
+ Findings: make([]XSSFinding, 0, 8),
+ TestedParams: len(paramsToTest),
+ }
+
+ params := make([]string, 0, len(paramsToTest))
+ for param := range paramsToTest {
+ params = append(params, param)
+ }
+
+ log.Info("testing %d params with reflection canary", len(paramsToTest))
+
+ paramChan := make(chan string, len(params))
+ for _, param := range params {
+ paramChan <- param
+ }
+ close(paramChan)
+
+ seen := make(map[string]bool)
+ var mu sync.Mutex
+ var wg sync.WaitGroup
+
+ wg.Add(threads)
+ for t := 0; t < threads; t++ {
+ go func() {
+ defer wg.Done()
+ for param := range paramChan {
+ finding, ok := probeXSS(client, parsedURL, existingParams, param)
+ if !ok {
+ continue
+ }
+
+ mu.Lock()
+ if seen[param] {
+ mu.Unlock()
+ continue
+ }
+ seen[param] = true
+ result.Findings = append(result.Findings, finding)
+ mu.Unlock()
+
+ spin.Stop()
+ log.Warn("reflected xss in param %s (%s context, raw: %s)",
+ output.Highlight.Render(param),
+ output.SeverityHigh.Render(finding.Context),
+ strings.Join(finding.SurvivedRaw, ""))
+ spin.Start()
+
+ if logdir != "" {
+ logger.Write(sanitizedURL, logdir,
+ fmt.Sprintf("reflected XSS: param [%s] in %s context, unescaped chars [%s]\n",
+ param, finding.Context, strings.Join(finding.SurvivedRaw, "")))
+ }
+ }
+ }()
+ }
+ wg.Wait()
+
+ spin.Stop()
+
+ if len(result.Findings) == 0 {
+ log.Info("no reflected xss detected")
+ log.Complete(0, "found")
+ return nil, nil //nolint:nilnil // no finding is not an error, mirrors the other scanners
+ }
+
+ log.Complete(len(result.Findings), "found")
+ return result, nil
+}
+
+// probeXSS injects a canary wrapped in the breaking chars into one param, then
+// inspects the reflection: it classifies where the canary landed and which
+// breaking chars came back unescaped there. ok is false unless at least one
+// dangerous char survived in an exploitable context.
+func probeXSS(client *http.Client, parsedURL *url.URL, existing url.Values, param string) (XSSFinding, bool) {
+ // wrap the canary so a single request tells us both that it reflected and
+ // which surrounding chars survived: "canary' `canary`
+ payload := fmt.Sprintf("<%s>\"%s'`%s`", canaryToken, canaryToken, canaryToken)
+
+ testParams := url.Values{}
+ for k, v := range existing {
+ if k != param {
+ testParams[k] = v
+ }
+ }
+ testParams.Set(param, payload)
+ testURL := fmt.Sprintf("%s://%s%s?%s", parsedURL.Scheme, parsedURL.Host, parsedURL.Path, testParams.Encode())
+
+ req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, testURL, http.NoBody)
+ if err != nil {
+ charmlog.Debugf("xss: build request for %s: %v", testURL, err)
+ return XSSFinding{}, false
+ }
+ resp, err := client.Do(req)
+ if err != nil {
+ charmlog.Debugf("xss: request %s: %v", testURL, err)
+ return XSSFinding{}, false
+ }
+ body, err := io.ReadAll(io.LimitReader(resp.Body, xssMaxBody))
+ resp.Body.Close()
+ if err != nil {
+ return XSSFinding{}, false
+ }
+ bodyStr := string(body)
+
+ // no echo of the canary at all means the param isn't reflected; bail early.
+ if !strings.Contains(bodyStr, canaryToken) {
+ return XSSFinding{}, false
+ }
+
+ reflectCtx := classifyXSSContext(bodyStr)
+ survived := survivingBreakChars(bodyStr)
+
+ // a reflection that escaped every dangerous char can't break out, so it's not
+ // reported - only raw chars that matter in the detected context count.
+ survived = relevantForContext(reflectCtx, survived)
+ if len(survived) == 0 {
+ return XSSFinding{}, false
+ }
+
+ return XSSFinding{
+ URL: testURL,
+ Parameter: param,
+ Context: reflectCtx,
+ SurvivedRaw: survived,
+ Severity: "high",
+ }, true
+}
+
+// classifyXSSContext guesses where the canary was reflected. We look at the
+// markup immediately around the token: a live tag means html text, a
+// reflection inside a is a script context
+ for {
+ open := strings.Index(body, ""):]
+ }
+
+ // default: echoed inside an html attribute value
+ return "attribute"
+}
+
+// survivingBreakChars reports which dangerous chars came back next to the canary
+// unescaped. We only trust occurrences adjacent to the token so unrelated chars
+// elsewhere on the page don't create false positives.
+func survivingBreakChars(body string) []string {
+ survived := make([]string, 0, len(xssBreakChars))
+ markers := []string{
+ "<" + canaryToken, // leading < survived
+ canaryToken + ">", // trailing > survived
+ "\"" + canaryToken, // leading " survived
+ canaryToken + "'", // trailing ' survived
+ "`" + canaryToken, // backtick wrap survived (token + ` and ` + token)
+ canaryToken + "`",
+ }
+ present := make(map[string]bool, len(xssBreakChars))
+ for i := 0; i < len(markers); i++ {
+ if !strings.Contains(body, markers[i]) {
+ continue
+ }
+ switch {
+ case strings.HasPrefix(markers[i], "<"):
+ present["<"] = true
+ case strings.HasSuffix(markers[i], ">"):
+ present[">"] = true
+ case strings.HasPrefix(markers[i], "\""):
+ present["\""] = true
+ case strings.HasSuffix(markers[i], "'"):
+ present["'"] = true
+ default:
+ present["`"] = true
+ }
+ }
+
+ // keep the canonical order for stable output
+ for i := 0; i < len(xssBreakChars); i++ {
+ if present[xssBreakChars[i]] {
+ survived = append(survived, xssBreakChars[i])
+ }
+ }
+ return survived
+}
+
+// relevantForContext filters surviving chars to the ones that actually enable a
+// breakout in the detected context: angle brackets matter in html, quotes and
+// backticks matter inside attributes/scripts.
+func relevantForContext(reflectCtx string, survived []string) []string {
+ wanted := make(map[string]bool, len(survived))
+ switch reflectCtx {
+ case "html":
+ wanted["<"] = true
+ wanted[">"] = true
+ case "attribute":
+ // breaking out of an attribute value needs the quote that delimits it; a
+ // bare backtick isn't a delimiter in html, so it doesn't count here.
+ wanted["\""] = true
+ wanted["'"] = true
+ case "script":
+ // a quote, backtick, or angle bracket all let you close/escape the script
+ wanted["\""] = true
+ wanted["'"] = true
+ wanted["`"] = true
+ wanted["<"] = true
+ wanted[">"] = true
+ }
+
+ filtered := make([]string, 0, len(survived))
+ for i := 0; i < len(survived); i++ {
+ if wanted[survived[i]] {
+ filtered = append(filtered, survived[i])
+ }
+ }
+ return filtered
+}
+
+// ResultType identifies reflected-xss findings for the result registry.
+func (r *XSSResult) ResultType() string { return "xss" }
+
+var _ ScanResult = (*XSSResult)(nil)
diff --git a/internal/scan/xss_test.go b/internal/scan/xss_test.go
new file mode 100644
index 0000000..66ade5a
--- /dev/null
+++ b/internal/scan/xss_test.go
@@ -0,0 +1,153 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+: :
+: █▀ █ █▀▀ · Blazing-fast pentesting suite :
+: ▄█ █ █▀ · BSD 3-Clause License :
+: :
+: (c) 2022-2026 vmfunc, xyzeva, :
+: lunchcat alumni & contributors :
+: :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package scan
+
+import (
+ "html"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+ "time"
+)
+
+// reflectsRaw echoes the named param straight into html text, so the breaking
+// chars survive unescaped - a reflected xss sink.
+func reflectsRaw(param string) *httptest.Server {
+ return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ v := r.URL.Query().Get(param)
+ w.Header().Set("Content-Type", "text/html")
+ w.WriteHeader(http.StatusOK)
+ //nolint:gosec // deliberate reflected-xss fixture for the probe under test
+ w.Write([]byte("" + v + "
"))
+ }))
+}
+
+func TestXSS_DetectsRawHTMLReflection(t *testing.T) {
+ srv := reflectsRaw("q")
+ defer srv.Close()
+
+ result, err := XSS(srv.URL, 5*time.Second, 4, "")
+ if err != nil {
+ t.Fatalf("XSS: %v", err)
+ }
+ if result == nil || len(result.Findings) == 0 {
+ t.Fatalf("expected reflected xss findings, got %+v", result)
+ }
+
+ var found *XSSFinding
+ for i := range result.Findings {
+ if result.Findings[i].Parameter == "q" {
+ found = &result.Findings[i]
+ }
+ }
+ if found == nil {
+ t.Fatalf("expected a finding on param 'q', got %+v", result.Findings)
+ }
+ if found.Context != "html" {
+ t.Errorf("expected html context, got %s", found.Context)
+ }
+ if len(found.SurvivedRaw) == 0 {
+ t.Errorf("expected surviving breaking chars, got none")
+ }
+}
+
+func TestXSS_NoFalsePositiveWhenEscaped(t *testing.T) {
+ // the server html-escapes the reflection, so no breaking char survives raw.
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ v := r.URL.Query().Get("q")
+ w.Header().Set("Content-Type", "text/html")
+ w.WriteHeader(http.StatusOK)
+ w.Write([]byte("" + html.EscapeString(v) + "
"))
+ }))
+ defer srv.Close()
+
+ result, err := XSS(srv.URL, 5*time.Second, 4, "")
+ if err != nil {
+ t.Fatalf("XSS: %v", err)
+ }
+ if result != nil && len(result.Findings) > 0 {
+ t.Errorf("expected no findings when reflection is escaped, got %+v", result.Findings)
+ }
+}
+
+func TestXSS_NoFalsePositiveWhenNotReflected(t *testing.T) {
+ // never echoes the input back, so nothing is injectable.
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+ w.Header().Set("Content-Type", "text/html")
+ w.WriteHeader(http.StatusOK)
+ w.Write([]byte("static page"))
+ }))
+ defer srv.Close()
+
+ result, err := XSS(srv.URL, 5*time.Second, 4, "")
+ if err != nil {
+ t.Fatalf("XSS: %v", err)
+ }
+ if result != nil && len(result.Findings) > 0 {
+ t.Errorf("expected no findings on static page, got %+v", result.Findings)
+ }
+}
+
+func TestClassifyXSSContext(t *testing.T) {
+ tests := []struct {
+ name string
+ body string
+ want string
+ }{
+ {
+ name: "live html tag",
+ body: "<" + canaryToken + ">
",
+ want: "html",
+ },
+ {
+ name: "inside script block",
+ body: "",
+ want: "script",
+ },
+ {
+ name: "attribute value",
+ body: ``,
+ want: "attribute",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if got := classifyXSSContext(tt.body); got != tt.want {
+ t.Errorf("classifyXSSContext() = %q, want %q", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestSurvivingBreakChars(t *testing.T) {
+ // the canary is wrapped exactly as the probe injects it; all five chars survive.
+ body := "<" + canaryToken + ">\"" + canaryToken + "'`" + canaryToken + "`"
+ got := survivingBreakChars(body)
+ want := map[string]bool{"<": true, ">": true, "\"": true, "'": true, "`": true}
+ if len(got) != len(want) {
+ t.Fatalf("expected %d surviving chars, got %v", len(want), got)
+ }
+ for _, c := range got {
+ if !want[c] {
+ t.Errorf("unexpected surviving char %q", c)
+ }
+ }
+}
+
+func TestXSSResult_ResultType(t *testing.T) {
+ r := &XSSResult{}
+ if r.ResultType() != "xss" {
+ t.Errorf("expected result type 'xss', got %q", r.ResultType())
+ }
+}
diff --git a/man/sif.1 b/man/sif.1
index f70e3cd..bb160a7 100644
--- a/man/sif.1
+++ b/man/sif.1
@@ -51,7 +51,7 @@ vulnerability scanning with nuclei templates.
automated google dorking.
.TP
.B \-js
-javascript analysis.
+javascript analysis + secret and endpoint extraction.
.TP
.B \-c3
cloud storage misconfiguration scan.
@@ -86,9 +86,27 @@ sql reconnaissance (admin panels, error disclosure).
.B \-lfi
local file inclusion reconnaissance.
.TP
+.B \-cors
+cors misconfiguration probe (reflected/permissive origins).
+.TP
+.B \-redirect
+open redirect probe.
+.TP
+.B \-xss
+reflected xss probe.
+.TP
.B \-framework
framework detection with cve lookup.
.TP
+.B \-crawl
+web crawler; spiders same\-host links, scripts and forms, respecting robots.txt.
+.TP
+.BR \-crawl\-depth " \fIn\fR"
+max crawl recursion depth (default 2).
+.TP
+.B \-passive
+passive subdomain and historical url discovery from third\-party feeds (zero traffic to the target).
+.TP
.B \-noscan
skip the base url scan (robots.txt, etc).
.SH OPTIONS
diff --git a/sif.go b/sif.go
index cee06fd..bb6b705 100644
--- a/sif.go
+++ b/sif.go
@@ -391,6 +391,56 @@ func (app *App) Run() error {
}
}
+ if app.settings.CORS {
+ result, err := scan.CORS(url, app.settings.Timeout, app.settings.Threads, app.settings.LogDir)
+ if err != nil {
+ log.Errorf("Error while running CORS probe: %s", err)
+ } else if result != nil {
+ moduleResults = append(moduleResults, NewModuleResult(result))
+ scansRun = append(scansRun, "CORS")
+ }
+ }
+
+ if app.settings.Redirect {
+ result, err := scan.Redirect(url, app.settings.Timeout, app.settings.Threads, app.settings.LogDir)
+ if err != nil {
+ log.Errorf("Error while running open redirect probe: %s", err)
+ } else if result != nil {
+ moduleResults = append(moduleResults, NewModuleResult(result))
+ scansRun = append(scansRun, "Open Redirect")
+ }
+ }
+
+ if app.settings.XSS {
+ result, err := scan.XSS(url, app.settings.Timeout, app.settings.Threads, app.settings.LogDir)
+ if err != nil {
+ log.Errorf("Error while running reflected XSS probe: %s", err)
+ } else if result != nil {
+ moduleResults = append(moduleResults, NewModuleResult(result))
+ scansRun = append(scansRun, "Reflected XSS")
+ }
+ }
+
+ if app.settings.Crawl {
+ result, err := scan.Crawl(url, app.settings.CrawlDepth, app.settings.Timeout, app.settings.LogDir)
+ if err != nil {
+ log.Errorf("Error while running web crawl: %s", err)
+ } else if result != nil {
+ moduleResults = append(moduleResults, NewModuleResult(result))
+ scansRun = append(scansRun, "Crawl")
+ }
+ }
+
+ if app.settings.Passive {
+ result, err := scan.Passive(url, app.settings.Timeout, app.settings.LogDir)
+ if err != nil {
+ log.Errorf("Error while running passive discovery: %s", err)
+ } else if result != nil {
+ moduleResults = append(moduleResults, NewModuleResult(result))
+ scansRun = append(scansRun, "Passive")
+ }
+ }
+
// Load and run modules
if app.settings.AllModules || app.settings.Modules != "" || app.settings.ModuleTags != "" {
loader, err := modules.NewLoader()