feat(js): extract secrets and endpoints from scanned javascript

the -js pipeline already pulls every <script> into a buffer but only mined supabase jwts from it. reuse that buffer to run a credential regex bank (aws/github/slack/stripe/google keys, pem blocks, plus entropy-gated generic apikey/secret/token assignments) and a linkfinder-style endpoint extractor that resolves relatives to absolute urls. both dedupe across scripts and surface through the existing js logger and result struct, no new flag.
2026-06-12 11:01:24 -07:00 · 2026-06-09 17:54:23 -07:00
parent 65ce36e963
commit b4e78114d7
9 changed files with 602 additions and 4 deletions
@@ -88,6 +88,7 @@ linters:
        linters:
          - errcheck
          - noctx
+          - gosec  # fake credentials in secret-scanner fixtures are not real keys

 issues:
  max-issues-per-linter: 50
@@ -158,7 +158,7 @@ sif has a modular architecture. modules are defined in yaml and can be extended
 | `-ports` | port scanning (common/full) |
 | `-nuclei` | vulnerability scanning with nuclei templates |
 | `-dork` | automated google dorking |
-| `-js` | javascript analysis |
+| `-js` | javascript analysis + secret and endpoint extraction |
 | `-c3` | cloud storage misconfiguration |
 | `-headers` | http header analysis |
 | `-sh` | security header analysis (missing/weak headers) |
@@ -79,7 +79,7 @@ scopes: `common` (top ports), `full` (all ports)

 ### javascript analysis

-`-js` - analyze javascript files
+`-js` - analyze javascript files + secret and endpoint extraction

 ```bash
 ./sif -u https://example.com -js
@@ -0,0 +1,128 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+:                                                                               :
+:   █▀ █ █▀▀   ·   Blazing-fast pentesting suite                                :
+:   ▄█ █ █▀    ·   BSD 3-Clause License                                         :
+:                                                                               :
+:   (c) 2022-2026 vmfunc, xyzeva,                                               :
+:                 lunchcat alumni & contributors                                :
+:                                                                               :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package js
+
+import (
+	"net/url"
+	"regexp"
+	"slices"
+	"strings"
+
+	urlutil "github.com/projectdiscovery/utils/url"
+)
+
+// endpointRegex is a linkfinder-style matcher for quoted paths and urls inside
+// js: full http(s) urls, root-relative (/api/...) and dotted-relative paths,
+// plus bare api-ish words with an extension. the inner alternation lives in a
+// single capture group so FindAllStringSubmatch hands back just the value.
+var endpointRegex = regexp.MustCompile(`["'\x60]` +
+	`(` +
+	`(?:https?:)?//[^\s"'\x60]{2,}` + // protocol-relative or absolute url
+	`|` +
+	`/[A-Za-z0-9_\-./]+(?:\?[^\s"'\x60]*)?` + // root-relative path
+	`|` +
+	`\.{1,2}/[A-Za-z0-9_\-./]+(?:\?[^\s"'\x60]*)?` + // dotted-relative path
+	`)` +
+	`["'\x60]`)
+
+// shortest thing we'll treat as an endpoint; below this it's almost always
+// noise like "/" or a single slash-prefixed letter.
+const minEndpointLen = 3
+
+// mime types slip through the path regex (text/html, application/json, ...) but
+// are never endpoints, so they're filtered out by their top-level type.
+var mimePrefixes = []string{
+	"text/", "image/", "audio/", "video/", "font/",
+	"application/", "multipart/", "model/", "message/",
+}
+
+// ExtractEndpoints pulls candidate paths and urls out of a script body, dedupes
+// them, drops obvious noise, and resolves relatives against baseURL so callers
+// get absolute targets where possible. a baseURL that won't parse just leaves
+// relatives as-is rather than failing the whole scan.
+func ExtractEndpoints(content, baseURL string) []string {
+	groups := endpointRegex.FindAllStringSubmatch(content, -1)
+	if len(groups) == 0 {
+		return nil
+	}
+
+	base, baseErr := urlutil.Parse(baseURL)
+
+	endpoints := make([]string, 0, len(groups))
+	seen := make(map[string]struct{}, len(groups))
+	for i := 0; i < len(groups); i++ {
+		candidate := strings.TrimSpace(groups[i][1])
+		if !isEndpoint(candidate) {
+			continue
+		}
+
+		resolved := candidate
+		// only relatives need resolving, and only if the base parsed cleanly.
+		if baseErr == nil && base.URL != nil && isRelative(candidate) {
+			resolved = resolveRelative(base.URL, candidate)
+		}
+
+		if _, ok := seen[resolved]; ok {
+			continue
+		}
+		seen[resolved] = struct{}{}
+		endpoints = append(endpoints, resolved)
+	}
+
+	slices.Sort(endpoints)
+	return endpoints
+}
+
+// isEndpoint filters out the junk that the broad regex inevitably catches:
+// too-short fragments, mime types, and single dotted words with no path.
+func isEndpoint(s string) bool {
+	if len(s) < minEndpointLen {
+		return false
+	}
+
+	lower := strings.ToLower(s)
+	for i := 0; i < len(mimePrefixes); i++ {
+		// a mime type is "type/subtype" with no further path; an api route like
+		// /application/users has a leading slash, so anchor on the bare prefix.
+		if strings.HasPrefix(lower, mimePrefixes[i]) && !strings.HasPrefix(lower, "/") {
+			return false
+		}
+	}
+
+	// reject "word" or "a.b" with no slash at all: not a path, just a token.
+	if !strings.Contains(s, "/") {
+		return false
+	}
+
+	return true
+}
+
+// isRelative reports whether candidate lacks a scheme/host and so needs the
+// base url to become absolute. protocol-relative (//host) and absolute urls
+// are left untouched.
+func isRelative(candidate string) bool {
+	if strings.HasPrefix(candidate, "//") {
+		return false
+	}
+	return !strings.HasPrefix(candidate, "http://") && !strings.HasPrefix(candidate, "https://")
+}
+
+// resolveRelative turns a relative path into an absolute url against base using
+// the stdlib reference resolver; if the ref won't parse we keep the original.
+func resolveRelative(base *url.URL, ref string) string {
+	parsed, err := url.Parse(ref)
+	if err != nil {
+		return ref
+	}
+	return base.ResolveReference(parsed).String()
+}
@@ -0,0 +1,106 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+:                                                                               :
+:   █▀ █ █▀▀   ·   Blazing-fast pentesting suite                                :
+:   ▄█ █ █▀    ·   BSD 3-Clause License                                         :
+:                                                                               :
+:   (c) 2022-2026 vmfunc, xyzeva,                                               :
+:                 lunchcat alumni & contributors                                :
+:                                                                               :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package js
+
+import (
+	"slices"
+	"testing"
+)
+
+func TestExtractEndpoints(t *testing.T) {
+	const base = "https://example.com/static/app.js"
+
+	tests := []struct {
+		name       string
+		content    string
+		wantSome   []string // each must appear in the result
+		wantAbsent []string // none of these may appear
+	}{
+		{
+			name:     "root-relative api path resolves to absolute",
+			content:  `fetch("/api/users")`,
+			wantSome: []string{"https://example.com/api/users"},
+		},
+		{
+			name:     "absolute url passes through untouched",
+			content:  `const u = "https://api.example.org/v1/login";`,
+			wantSome: []string{"https://api.example.org/v1/login"},
+		},
+		{
+			name:     "dotted-relative path resolves against base dir",
+			content:  `import("./chunks/main.js")`,
+			wantSome: []string{"https://example.com/static/chunks/main.js"},
+		},
+		{
+			name:     "query string is preserved",
+			content:  `axios.get("/api/search?q=test")`,
+			wantSome: []string{"https://example.com/api/search?q=test"},
+		},
+		{
+			name:       "mime types are filtered out",
+			content:    `headers["Content-Type"] = "application/json"; var t = "text/html";`,
+			wantAbsent: []string{"application/json", "text/html"},
+		},
+		{
+			name:       "single words without a slash are ignored",
+			content:    `var x = "hello"; var y = "world";`,
+			wantAbsent: []string{"hello", "world"},
+		},
+		{
+			name:    "multiple endpoints deduped",
+			content: `fetch("/api/users"); fetch("/api/users"); fetch("/api/posts");`,
+			wantSome: []string{
+				"https://example.com/api/users",
+				"https://example.com/api/posts",
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := ExtractEndpoints(tt.content, base)
+
+			for _, want := range tt.wantSome {
+				if !slices.Contains(got, want) {
+					t.Errorf("expected %q in %v", want, got)
+				}
+			}
+			for _, absent := range tt.wantAbsent {
+				if slices.Contains(got, absent) {
+					t.Errorf("did not expect %q in %v", absent, got)
+				}
+			}
+		})
+	}
+}
+
+func TestExtractEndpointsDedupes(t *testing.T) {
+	got := ExtractEndpoints(`fetch("/api/x"); fetch("/api/x");`, "https://example.com/app.js")
+	count := 0
+	for i := 0; i < len(got); i++ {
+		if got[i] == "https://example.com/api/x" {
+			count++
+		}
+	}
+	if count != 1 {
+		t.Fatalf("expected /api/x once, got %d times in %v", count, got)
+	}
+}
+
+func TestExtractEndpointsBadBaseKeepsRelatives(t *testing.T) {
+	// a base url that won't parse must not drop findings; relatives stay as-is.
+	got := ExtractEndpoints(`fetch("/api/users")`, "::not a url::")
+	if !slices.Contains(got, "/api/users") {
+		t.Errorf("expected relative /api/users preserved, got %v", got)
+	}
+}
@@ -32,6 +32,8 @@ import (
 type JavascriptScanResult struct {
 	SupabaseResults      []supabaseScanResult `json:"supabase_results"`
 	FoundEnvironmentVars map[string]string    `json:"environment_variables"`
+	SecretMatches        []SecretMatch        `json:"secret_matches"`
+	Endpoints            []string             `json:"endpoints"`
 }

 // ResultType implements the ScanResult interface.
@@ -116,6 +118,11 @@ func JavascriptScan(url string, timeout time.Duration, threads int, logdir strin
 	log.Info("Got %d scripts, now running scans on them", len(scripts))

 	supabaseResults := make([]supabaseScanResult, 0, len(scripts))
+	secretMatches := make([]SecretMatch, 0)
+	endpoints := make([]string, 0)
+	// dedupe secrets and endpoints across every script, not just within one.
+	seenSecrets := make(map[string]struct{})
+	seenEndpoints := make(map[string]struct{})
 	for _, script := range scripts {
 		charmlog.Debugf("Scanning %s", script)
 		req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, script, http.NoBody)
@@ -147,16 +154,41 @@ func JavascriptScan(url string, timeout time.Duration, threads int, logdir strin
 		if scriptSupabaseResults != nil {
 			supabaseResults = append(supabaseResults, scriptSupabaseResults...)
 		}
+
+		// reuse the same script buffer for credential and endpoint extraction.
+		for _, match := range ScanSecrets(content, script) {
+			key := match.Rule + "\x00" + match.Match
+			if _, ok := seenSecrets[key]; ok {
+				continue
+			}
+			seenSecrets[key] = struct{}{}
+			secretMatches = append(secretMatches, match)
+			log.Warn("found %s in %s", match.Rule, script)
+		}
+
+		for _, endpoint := range ExtractEndpoints(content, script) {
+			if _, ok := seenEndpoints[endpoint]; ok {
+				continue
+			}
+			seenEndpoints[endpoint] = struct{}{}
+			endpoints = append(endpoints, endpoint)
+		}
 	}

 	spin.Stop()

+	if len(endpoints) > 0 {
+		log.Info("extracted %d endpoints", len(endpoints))
+	}
+
 	result := JavascriptScanResult{
 		SupabaseResults:      supabaseResults,
 		FoundEnvironmentVars: map[string]string{},
+		SecretMatches:        secretMatches,
+		Endpoints:            endpoints,
 	}

-	log.Complete(len(supabaseResults), "found")
+	log.Complete(len(supabaseResults)+len(secretMatches)+len(endpoints), "found")

 	return &result, nil
 }
@@ -0,0 +1,171 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+:                                                                               :
+:   █▀ █ █▀▀   ·   Blazing-fast pentesting suite                                :
+:   ▄█ █ █▀    ·   BSD 3-Clause License                                         :
+:                                                                               :
+:   (c) 2022-2026 vmfunc, xyzeva,                                               :
+:                 lunchcat alumni & contributors                                :
+:                                                                               :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package js
+
+import (
+	"math"
+	"regexp"
+	"strings"
+)
+
+// SecretMatch is one credential the scanner pulled out of a script.
+type SecretMatch struct {
+	Rule   string `json:"rule"`
+	Match  string `json:"match"`
+	Source string `json:"source"`
+}
+
+// entropy thresholds gate the noisy generic rules: provider-prefixed keys are
+// trustworthy on their own, but a bare apikey="..." or a loose token blob is
+// only worth reporting once its shannon entropy clears the bar for "this looks
+// random, not an english word". secrets sit higher than the pem/aws-secret bar
+// because the generic capture groups also catch ordinary identifiers.
+const (
+	genericMinEntropy   = 3.5
+	awsSecretMinEntropy = 3.0
+	// rules with no entropy requirement (prefix is already unique enough).
+	noEntropyGate = 0.0
+)
+
+// secretRules is the credential regex bank. the matching group (or the whole
+// match when there's no group) is what gets reported; minEntropy gates the
+// generic high-entropy rules so we don't flag every short literal.
+var secretRules = []struct {
+	name       string
+	re         *regexp.Regexp
+	minEntropy float64
+}{
+	{
+		// aws access key ids are fixed-shape and unmistakable.
+		name:       "aws access key id",
+		re:         regexp.MustCompile(`\b((?:AKIA|ABIA|ACCA|ASIA)[0-9A-Z]{16})\b`),
+		minEntropy: noEntropyGate,
+	},
+	{
+		// aws secret keys are 40-char base64-ish blobs; gate on entropy since the
+		// shape alone matches plenty of innocent strings.
+		name:       "aws secret access key",
+		re:         regexp.MustCompile(`\b((?:aws_secret_access_key|aws_secret|secret_key)["']?\s*[:=]\s*["']?)([A-Za-z0-9/+]{40})\b`),
+		minEntropy: awsSecretMinEntropy,
+	},
+	{
+		// github personal/oauth/server/refresh/app tokens share the ghX_ prefix.
+		name:       "github token",
+		re:         regexp.MustCompile(`\b((?:ghp|gho|ghu|ghs|ghr)_[0-9A-Za-z]{36,255})\b`),
+		minEntropy: noEntropyGate,
+	},
+	{
+		// slack bot/user/app/legacy tokens.
+		name:       "slack token",
+		re:         regexp.MustCompile(`\b(xox[baprs]-[0-9A-Za-z-]{10,})\b`),
+		minEntropy: noEntropyGate,
+	},
+	{
+		// stripe live secret and publishable keys (test keys are not findings).
+		name:       "stripe live key",
+		re:         regexp.MustCompile(`\b([sp]k_live_[0-9A-Za-z]{16,})\b`),
+		minEntropy: noEntropyGate,
+	},
+	{
+		// google api keys are a fixed AIza-prefixed 39-char shape.
+		name:       "google api key",
+		re:         regexp.MustCompile(`\b(AIza[0-9A-Za-z_-]{35})\b`),
+		minEntropy: noEntropyGate,
+	},
+	{
+		// pem private key blocks; the header alone is the smoking gun.
+		name:       "private key",
+		re:         regexp.MustCompile(`-{5}BEGIN (?:RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY-{5}`),
+		minEntropy: noEntropyGate,
+	},
+	{
+		// generic apikey/secret/token = "<value>" assignments; the value is in
+		// group 2 and only reported if it looks random (entropy gate).
+		name:       "generic secret assignment",
+		re:         regexp.MustCompile(`(?i)\b(api[_-]?key|secret|token|password|passwd|auth)["']?\s*[:=]\s*["']([0-9A-Za-z\-._~+/]{16,})["']`),
+		minEntropy: genericMinEntropy,
+	},
+}
+
+// the value capture group lives at index 2 for the rules that prefix the
+// keyword; index 0 (whole match) is used otherwise.
+const (
+	valueGroupIndex = 2
+	wholeMatchIndex = 0
+)
+
+// ScanSecrets runs the regex bank over a script body and returns every gated
+// match, deduped within this one source. srcURL is recorded on each find.
+func ScanSecrets(content, srcURL string) []SecretMatch {
+	matches := make([]SecretMatch, 0)
+	seen := make(map[string]struct{})
+
+	for i := 0; i < len(secretRules); i++ {
+		rule := secretRules[i]
+		groups := rule.re.FindAllStringSubmatch(content, -1)
+		for j := 0; j < len(groups); j++ {
+			value := secretValue(groups[j])
+			if value == "" {
+				continue
+			}
+
+			// entropy gate weeds out english-y identifiers for the generic rules;
+			// prefixed rules pass with a zero threshold.
+			if rule.minEntropy > noEntropyGate && shannonEntropy(value) < rule.minEntropy {
+				continue
+			}
+
+			// dedupe per source so a key referenced twice is one finding.
+			key := rule.name + "\x00" + value
+			if _, ok := seen[key]; ok {
+				continue
+			}
+			seen[key] = struct{}{}
+
+			matches = append(matches, SecretMatch{Rule: rule.name, Match: value, Source: srcURL})
+		}
+	}
+
+	return matches
+}
+
+// secretValue returns the reported portion of a regex match: the dedicated
+// value group when the rule captures one, otherwise the whole match.
+func secretValue(groups []string) string {
+	if len(groups) > valueGroupIndex && groups[valueGroupIndex] != "" {
+		return groups[valueGroupIndex]
+	}
+	return strings.TrimSpace(groups[wholeMatchIndex])
+}
+
+// shannonEntropy is the per-character shannon entropy (bits) of s, used to tell
+// random-looking secrets apart from plain words. empty input is zero entropy.
+func shannonEntropy(s string) float64 {
+	if s == "" {
+		return 0
+	}
+
+	counts := make(map[rune]int)
+	for _, r := range s {
+		counts[r]++
+	}
+
+	length := float64(len([]rune(s)))
+	var entropy float64
+	for _, count := range counts {
+		p := float64(count) / length
+		entropy -= p * math.Log2(p)
+	}
+
+	return entropy
+}
@@ -0,0 +1,160 @@
+/*
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+:                                                                               :
+:   █▀ █ █▀▀   ·   Blazing-fast pentesting suite                                :
+:   ▄█ █ █▀    ·   BSD 3-Clause License                                         :
+:                                                                               :
+:   (c) 2022-2026 vmfunc, xyzeva,                                               :
+:                 lunchcat alumni & contributors                                :
+:                                                                               :
+·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
+*/
+
+package js
+
+import (
+	"fmt"
+	"testing"
+)
+
+// the fake tokens below are assembled from two fragments on purpose: a contiguous
+// provider token literal in a committed file trips github push-protection (and
+// every other secret scanner) even though it's a test fixture. splitting it
+// keeps the literal out of source while ScanSecrets still sees the joined value.
+const (
+	fakeAWSKey    = "AKIA" + "IOSFODNN7EXAMPLE"
+	fakeAWSSecret = "wJalrXUtnFEMI/K7MDENG/" + "bPxRfiCYEXAMPLEKEY"
+	fakeGitHub    = "ghp_" + "aB3dEfGh1jKlMn0pQrStUvWxYz012345abcd"
+	fakeSlack     = "xoxb-" + "123456789012-abcdefABCDEF1234567890ab"
+	fakeStripe    = "sk_live_" + "4eC39HqLyjWDarjtT1zdp7dc"
+	fakeGoogle    = "AIza" + "SyA1B2C3D4E5F6G7H8I9J0K1L2M3N4O5P6Q"
+	fakeGeneric   = "x9Kq2Lm7Pz4Rt6Wv8Bn3Cd5Fg1Hj0As"
+	fakePEM       = "-----BEGIN RSA PRIVATE " + "KEY-----\nMIIEpAIB..."
+)
+
+func TestScanSecrets(t *testing.T) {
+	tests := []struct {
+		name     string
+		content  string
+		wantRule string // rule expected on the first match, "" means no match
+		wantNone bool
+	}{
+		{
+			name:     "aws access key id",
+			content:  fmt.Sprintf(`const k = %q;`, fakeAWSKey),
+			wantRule: "aws access key id",
+		},
+		{
+			name:     "github personal token",
+			content:  fmt.Sprintf(`token: %q`, fakeGitHub),
+			wantRule: "github token",
+		},
+		{
+			name:     "slack bot token",
+			content:  fmt.Sprintf(`slack=%q`, fakeSlack),
+			wantRule: "slack token",
+		},
+		{
+			name:     "stripe live secret key",
+			content:  fmt.Sprintf(`var sk = %q;`, fakeStripe),
+			wantRule: "stripe live key",
+		},
+		{
+			name:     "google api key",
+			content:  fmt.Sprintf(`apiKey: %q`, fakeGoogle),
+			wantRule: "google api key",
+		},
+		{
+			name:     "pem private key header",
+			content:  fakePEM,
+			wantRule: "private key",
+		},
+		{
+			name:     "generic high-entropy api key assignment",
+			content:  fmt.Sprintf(`apikey = %q`, fakeGeneric),
+			wantRule: "generic secret assignment",
+		},
+		{
+			name:     "aws secret with entropy",
+			content:  fmt.Sprintf(`aws_secret_access_key=%q`, fakeAWSSecret),
+			wantRule: "aws secret access key",
+		},
+		{
+			// low-entropy assignment is a placeholder, not a real secret.
+			name:     "low entropy generic assignment not flagged",
+			content:  `password = "aaaaaaaaaaaaaaaaaaaaaaaa"`,
+			wantNone: true,
+		},
+		{
+			// a repetitive placeholder is low-entropy and must not trip the gate.
+			name:     "low entropy repeated pattern not flagged",
+			content:  `token = "abababababababababababab"`,
+			wantNone: true,
+		},
+		{
+			name:     "no secrets in plain code",
+			content:  `function add(a, b) { return a + b; }`,
+			wantNone: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := ScanSecrets(tt.content, "https://example.com/app.js")
+
+			if tt.wantNone {
+				if len(got) != 0 {
+					t.Fatalf("expected no matches, got %+v", got)
+				}
+				return
+			}
+
+			if len(got) == 0 {
+				t.Fatalf("expected a %q match, got none", tt.wantRule)
+			}
+			if got[0].Rule != tt.wantRule {
+				t.Errorf("rule = %q, want %q", got[0].Rule, tt.wantRule)
+			}
+			if got[0].Match == "" {
+				t.Error("match value is empty")
+			}
+			if got[0].Source != "https://example.com/app.js" {
+				t.Errorf("source = %q, want the passed url", got[0].Source)
+			}
+		})
+	}
+}
+
+func TestScanSecretsDedupesWithinSource(t *testing.T) {
+	// the same key referenced twice in one file is one finding.
+	content := fmt.Sprintf(`a = %q; b = %q;`, fakeAWSKey, fakeAWSKey)
+	got := ScanSecrets(content, "https://example.com/app.js")
+	if len(got) != 1 {
+		t.Fatalf("expected 1 deduped match, got %d: %+v", len(got), got)
+	}
+}
+
+func TestShannonEntropy(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		// random-ish strings clear the generic gate, repetitive ones don't.
+		wantHigh bool
+	}{
+		{name: "empty is zero", input: "", wantHigh: false},
+		{name: "repeated char is low", input: "aaaaaaaaaaaaaaaa", wantHigh: false},
+		{name: "random blob is high", input: fakeGeneric, wantHigh: true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := shannonEntropy(tt.input)
+			if tt.wantHigh && got < genericMinEntropy {
+				t.Errorf("entropy %f below generic gate %f", got, genericMinEntropy)
+			}
+			if !tt.wantHigh && got >= genericMinEntropy {
+				t.Errorf("entropy %f unexpectedly cleared generic gate %f", got, genericMinEntropy)
+			}
+		})
+	}
+}
@@ -51,7 +51,7 @@ vulnerability scanning with nuclei templates.
 automated google dorking.
 .TP
 .B \-js
-javascript analysis.
+javascript analysis + secret and endpoint extraction.
 .TP
 .B \-c3
 cloud storage misconfiguration scan.