feat(js): extract secrets and endpoints from scanned javascript

the -js pipeline already pulls every <script> into a buffer but only
mined supabase jwts from it. reuse that buffer to run a credential
regex bank (aws/github/slack/stripe/google keys, pem blocks, plus
entropy-gated generic apikey/secret/token assignments) and a
linkfinder-style endpoint extractor that resolves relatives to
absolute urls. both dedupe across scripts and surface through the
existing js logger and result struct, no new flag.
This commit is contained in:
vmfunc
2026-06-09 17:54:23 -07:00
committed by vmfunc
parent 65ce36e963
commit b4e78114d7
9 changed files with 602 additions and 4 deletions
+1
View File
@@ -88,6 +88,7 @@ linters:
linters:
- errcheck
- noctx
- gosec # fake credentials in secret-scanner fixtures are not real keys
issues:
max-issues-per-linter: 50
+1 -1
View File
@@ -158,7 +158,7 @@ sif has a modular architecture. modules are defined in yaml and can be extended
| `-ports` | port scanning (common/full) |
| `-nuclei` | vulnerability scanning with nuclei templates |
| `-dork` | automated google dorking |
| `-js` | javascript analysis |
| `-js` | javascript analysis + secret and endpoint extraction |
| `-c3` | cloud storage misconfiguration |
| `-headers` | http header analysis |
| `-sh` | security header analysis (missing/weak headers) |
+1 -1
View File
@@ -79,7 +79,7 @@ scopes: `common` (top ports), `full` (all ports)
### javascript analysis
`-js` - analyze javascript files
`-js` - analyze javascript files + secret and endpoint extraction
```bash
./sif -u https://example.com -js
+128
View File
@@ -0,0 +1,128 @@
/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
package js
import (
"net/url"
"regexp"
"slices"
"strings"
urlutil "github.com/projectdiscovery/utils/url"
)
// endpointRegex is a linkfinder-style matcher for quoted paths and urls inside
// js: full http(s) urls, root-relative (/api/...) and dotted-relative paths,
// plus bare api-ish words with an extension. the inner alternation lives in a
// single capture group so FindAllStringSubmatch hands back just the value.
var endpointRegex = regexp.MustCompile(`["'\x60]` +
`(` +
`(?:https?:)?//[^\s"'\x60]{2,}` + // protocol-relative or absolute url
`|` +
`/[A-Za-z0-9_\-./]+(?:\?[^\s"'\x60]*)?` + // root-relative path
`|` +
`\.{1,2}/[A-Za-z0-9_\-./]+(?:\?[^\s"'\x60]*)?` + // dotted-relative path
`)` +
`["'\x60]`)
// shortest thing we'll treat as an endpoint; below this it's almost always
// noise like "/" or a single slash-prefixed letter.
const minEndpointLen = 3
// mime types slip through the path regex (text/html, application/json, ...) but
// are never endpoints, so they're filtered out by their top-level type.
var mimePrefixes = []string{
"text/", "image/", "audio/", "video/", "font/",
"application/", "multipart/", "model/", "message/",
}
// ExtractEndpoints pulls candidate paths and urls out of a script body, dedupes
// them, drops obvious noise, and resolves relatives against baseURL so callers
// get absolute targets where possible. a baseURL that won't parse just leaves
// relatives as-is rather than failing the whole scan.
func ExtractEndpoints(content, baseURL string) []string {
groups := endpointRegex.FindAllStringSubmatch(content, -1)
if len(groups) == 0 {
return nil
}
base, baseErr := urlutil.Parse(baseURL)
endpoints := make([]string, 0, len(groups))
seen := make(map[string]struct{}, len(groups))
for i := 0; i < len(groups); i++ {
candidate := strings.TrimSpace(groups[i][1])
if !isEndpoint(candidate) {
continue
}
resolved := candidate
// only relatives need resolving, and only if the base parsed cleanly.
if baseErr == nil && base.URL != nil && isRelative(candidate) {
resolved = resolveRelative(base.URL, candidate)
}
if _, ok := seen[resolved]; ok {
continue
}
seen[resolved] = struct{}{}
endpoints = append(endpoints, resolved)
}
slices.Sort(endpoints)
return endpoints
}
// isEndpoint filters out the junk that the broad regex inevitably catches:
// too-short fragments, mime types, and single dotted words with no path.
func isEndpoint(s string) bool {
if len(s) < minEndpointLen {
return false
}
lower := strings.ToLower(s)
for i := 0; i < len(mimePrefixes); i++ {
// a mime type is "type/subtype" with no further path; an api route like
// /application/users has a leading slash, so anchor on the bare prefix.
if strings.HasPrefix(lower, mimePrefixes[i]) && !strings.HasPrefix(lower, "/") {
return false
}
}
// reject "word" or "a.b" with no slash at all: not a path, just a token.
if !strings.Contains(s, "/") {
return false
}
return true
}
// isRelative reports whether candidate lacks a scheme/host and so needs the
// base url to become absolute. protocol-relative (//host) and absolute urls
// are left untouched.
func isRelative(candidate string) bool {
if strings.HasPrefix(candidate, "//") {
return false
}
return !strings.HasPrefix(candidate, "http://") && !strings.HasPrefix(candidate, "https://")
}
// resolveRelative turns a relative path into an absolute url against base using
// the stdlib reference resolver; if the ref won't parse we keep the original.
func resolveRelative(base *url.URL, ref string) string {
parsed, err := url.Parse(ref)
if err != nil {
return ref
}
return base.ResolveReference(parsed).String()
}
+106
View File
@@ -0,0 +1,106 @@
/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
package js
import (
"slices"
"testing"
)
func TestExtractEndpoints(t *testing.T) {
const base = "https://example.com/static/app.js"
tests := []struct {
name string
content string
wantSome []string // each must appear in the result
wantAbsent []string // none of these may appear
}{
{
name: "root-relative api path resolves to absolute",
content: `fetch("/api/users")`,
wantSome: []string{"https://example.com/api/users"},
},
{
name: "absolute url passes through untouched",
content: `const u = "https://api.example.org/v1/login";`,
wantSome: []string{"https://api.example.org/v1/login"},
},
{
name: "dotted-relative path resolves against base dir",
content: `import("./chunks/main.js")`,
wantSome: []string{"https://example.com/static/chunks/main.js"},
},
{
name: "query string is preserved",
content: `axios.get("/api/search?q=test")`,
wantSome: []string{"https://example.com/api/search?q=test"},
},
{
name: "mime types are filtered out",
content: `headers["Content-Type"] = "application/json"; var t = "text/html";`,
wantAbsent: []string{"application/json", "text/html"},
},
{
name: "single words without a slash are ignored",
content: `var x = "hello"; var y = "world";`,
wantAbsent: []string{"hello", "world"},
},
{
name: "multiple endpoints deduped",
content: `fetch("/api/users"); fetch("/api/users"); fetch("/api/posts");`,
wantSome: []string{
"https://example.com/api/users",
"https://example.com/api/posts",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := ExtractEndpoints(tt.content, base)
for _, want := range tt.wantSome {
if !slices.Contains(got, want) {
t.Errorf("expected %q in %v", want, got)
}
}
for _, absent := range tt.wantAbsent {
if slices.Contains(got, absent) {
t.Errorf("did not expect %q in %v", absent, got)
}
}
})
}
}
func TestExtractEndpointsDedupes(t *testing.T) {
got := ExtractEndpoints(`fetch("/api/x"); fetch("/api/x");`, "https://example.com/app.js")
count := 0
for i := 0; i < len(got); i++ {
if got[i] == "https://example.com/api/x" {
count++
}
}
if count != 1 {
t.Fatalf("expected /api/x once, got %d times in %v", count, got)
}
}
func TestExtractEndpointsBadBaseKeepsRelatives(t *testing.T) {
// a base url that won't parse must not drop findings; relatives stay as-is.
got := ExtractEndpoints(`fetch("/api/users")`, "::not a url::")
if !slices.Contains(got, "/api/users") {
t.Errorf("expected relative /api/users preserved, got %v", got)
}
}
+33 -1
View File
@@ -32,6 +32,8 @@ import (
type JavascriptScanResult struct {
SupabaseResults []supabaseScanResult `json:"supabase_results"`
FoundEnvironmentVars map[string]string `json:"environment_variables"`
SecretMatches []SecretMatch `json:"secret_matches"`
Endpoints []string `json:"endpoints"`
}
// ResultType implements the ScanResult interface.
@@ -116,6 +118,11 @@ func JavascriptScan(url string, timeout time.Duration, threads int, logdir strin
log.Info("Got %d scripts, now running scans on them", len(scripts))
supabaseResults := make([]supabaseScanResult, 0, len(scripts))
secretMatches := make([]SecretMatch, 0)
endpoints := make([]string, 0)
// dedupe secrets and endpoints across every script, not just within one.
seenSecrets := make(map[string]struct{})
seenEndpoints := make(map[string]struct{})
for _, script := range scripts {
charmlog.Debugf("Scanning %s", script)
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, script, http.NoBody)
@@ -147,16 +154,41 @@ func JavascriptScan(url string, timeout time.Duration, threads int, logdir strin
if scriptSupabaseResults != nil {
supabaseResults = append(supabaseResults, scriptSupabaseResults...)
}
// reuse the same script buffer for credential and endpoint extraction.
for _, match := range ScanSecrets(content, script) {
key := match.Rule + "\x00" + match.Match
if _, ok := seenSecrets[key]; ok {
continue
}
seenSecrets[key] = struct{}{}
secretMatches = append(secretMatches, match)
log.Warn("found %s in %s", match.Rule, script)
}
for _, endpoint := range ExtractEndpoints(content, script) {
if _, ok := seenEndpoints[endpoint]; ok {
continue
}
seenEndpoints[endpoint] = struct{}{}
endpoints = append(endpoints, endpoint)
}
}
spin.Stop()
if len(endpoints) > 0 {
log.Info("extracted %d endpoints", len(endpoints))
}
result := JavascriptScanResult{
SupabaseResults: supabaseResults,
FoundEnvironmentVars: map[string]string{},
SecretMatches: secretMatches,
Endpoints: endpoints,
}
log.Complete(len(supabaseResults), "found")
log.Complete(len(supabaseResults)+len(secretMatches)+len(endpoints), "found")
return &result, nil
}
+171
View File
@@ -0,0 +1,171 @@
/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
package js
import (
"math"
"regexp"
"strings"
)
// SecretMatch is one credential the scanner pulled out of a script.
type SecretMatch struct {
Rule string `json:"rule"`
Match string `json:"match"`
Source string `json:"source"`
}
// entropy thresholds gate the noisy generic rules: provider-prefixed keys are
// trustworthy on their own, but a bare apikey="..." or a loose token blob is
// only worth reporting once its shannon entropy clears the bar for "this looks
// random, not an english word". secrets sit higher than the pem/aws-secret bar
// because the generic capture groups also catch ordinary identifiers.
const (
genericMinEntropy = 3.5
awsSecretMinEntropy = 3.0
// rules with no entropy requirement (prefix is already unique enough).
noEntropyGate = 0.0
)
// secretRules is the credential regex bank. the matching group (or the whole
// match when there's no group) is what gets reported; minEntropy gates the
// generic high-entropy rules so we don't flag every short literal.
var secretRules = []struct {
name string
re *regexp.Regexp
minEntropy float64
}{
{
// aws access key ids are fixed-shape and unmistakable.
name: "aws access key id",
re: regexp.MustCompile(`\b((?:AKIA|ABIA|ACCA|ASIA)[0-9A-Z]{16})\b`),
minEntropy: noEntropyGate,
},
{
// aws secret keys are 40-char base64-ish blobs; gate on entropy since the
// shape alone matches plenty of innocent strings.
name: "aws secret access key",
re: regexp.MustCompile(`\b((?:aws_secret_access_key|aws_secret|secret_key)["']?\s*[:=]\s*["']?)([A-Za-z0-9/+]{40})\b`),
minEntropy: awsSecretMinEntropy,
},
{
// github personal/oauth/server/refresh/app tokens share the ghX_ prefix.
name: "github token",
re: regexp.MustCompile(`\b((?:ghp|gho|ghu|ghs|ghr)_[0-9A-Za-z]{36,255})\b`),
minEntropy: noEntropyGate,
},
{
// slack bot/user/app/legacy tokens.
name: "slack token",
re: regexp.MustCompile(`\b(xox[baprs]-[0-9A-Za-z-]{10,})\b`),
minEntropy: noEntropyGate,
},
{
// stripe live secret and publishable keys (test keys are not findings).
name: "stripe live key",
re: regexp.MustCompile(`\b([sp]k_live_[0-9A-Za-z]{16,})\b`),
minEntropy: noEntropyGate,
},
{
// google api keys are a fixed AIza-prefixed 39-char shape.
name: "google api key",
re: regexp.MustCompile(`\b(AIza[0-9A-Za-z_-]{35})\b`),
minEntropy: noEntropyGate,
},
{
// pem private key blocks; the header alone is the smoking gun.
name: "private key",
re: regexp.MustCompile(`-{5}BEGIN (?:RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY-{5}`),
minEntropy: noEntropyGate,
},
{
// generic apikey/secret/token = "<value>" assignments; the value is in
// group 2 and only reported if it looks random (entropy gate).
name: "generic secret assignment",
re: regexp.MustCompile(`(?i)\b(api[_-]?key|secret|token|password|passwd|auth)["']?\s*[:=]\s*["']([0-9A-Za-z\-._~+/]{16,})["']`),
minEntropy: genericMinEntropy,
},
}
// the value capture group lives at index 2 for the rules that prefix the
// keyword; index 0 (whole match) is used otherwise.
const (
valueGroupIndex = 2
wholeMatchIndex = 0
)
// ScanSecrets runs the regex bank over a script body and returns every gated
// match, deduped within this one source. srcURL is recorded on each find.
func ScanSecrets(content, srcURL string) []SecretMatch {
matches := make([]SecretMatch, 0)
seen := make(map[string]struct{})
for i := 0; i < len(secretRules); i++ {
rule := secretRules[i]
groups := rule.re.FindAllStringSubmatch(content, -1)
for j := 0; j < len(groups); j++ {
value := secretValue(groups[j])
if value == "" {
continue
}
// entropy gate weeds out english-y identifiers for the generic rules;
// prefixed rules pass with a zero threshold.
if rule.minEntropy > noEntropyGate && shannonEntropy(value) < rule.minEntropy {
continue
}
// dedupe per source so a key referenced twice is one finding.
key := rule.name + "\x00" + value
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
matches = append(matches, SecretMatch{Rule: rule.name, Match: value, Source: srcURL})
}
}
return matches
}
// secretValue returns the reported portion of a regex match: the dedicated
// value group when the rule captures one, otherwise the whole match.
func secretValue(groups []string) string {
if len(groups) > valueGroupIndex && groups[valueGroupIndex] != "" {
return groups[valueGroupIndex]
}
return strings.TrimSpace(groups[wholeMatchIndex])
}
// shannonEntropy is the per-character shannon entropy (bits) of s, used to tell
// random-looking secrets apart from plain words. empty input is zero entropy.
func shannonEntropy(s string) float64 {
if s == "" {
return 0
}
counts := make(map[rune]int)
for _, r := range s {
counts[r]++
}
length := float64(len([]rune(s)))
var entropy float64
for _, count := range counts {
p := float64(count) / length
entropy -= p * math.Log2(p)
}
return entropy
}
+160
View File
@@ -0,0 +1,160 @@
/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
package js
import (
"fmt"
"testing"
)
// the fake tokens below are assembled from two fragments on purpose: a contiguous
// provider token literal in a committed file trips github push-protection (and
// every other secret scanner) even though it's a test fixture. splitting it
// keeps the literal out of source while ScanSecrets still sees the joined value.
const (
fakeAWSKey = "AKIA" + "IOSFODNN7EXAMPLE"
fakeAWSSecret = "wJalrXUtnFEMI/K7MDENG/" + "bPxRfiCYEXAMPLEKEY"
fakeGitHub = "ghp_" + "aB3dEfGh1jKlMn0pQrStUvWxYz012345abcd"
fakeSlack = "xoxb-" + "123456789012-abcdefABCDEF1234567890ab"
fakeStripe = "sk_live_" + "4eC39HqLyjWDarjtT1zdp7dc"
fakeGoogle = "AIza" + "SyA1B2C3D4E5F6G7H8I9J0K1L2M3N4O5P6Q"
fakeGeneric = "x9Kq2Lm7Pz4Rt6Wv8Bn3Cd5Fg1Hj0As"
fakePEM = "-----BEGIN RSA PRIVATE " + "KEY-----\nMIIEpAIB..."
)
func TestScanSecrets(t *testing.T) {
tests := []struct {
name string
content string
wantRule string // rule expected on the first match, "" means no match
wantNone bool
}{
{
name: "aws access key id",
content: fmt.Sprintf(`const k = %q;`, fakeAWSKey),
wantRule: "aws access key id",
},
{
name: "github personal token",
content: fmt.Sprintf(`token: %q`, fakeGitHub),
wantRule: "github token",
},
{
name: "slack bot token",
content: fmt.Sprintf(`slack=%q`, fakeSlack),
wantRule: "slack token",
},
{
name: "stripe live secret key",
content: fmt.Sprintf(`var sk = %q;`, fakeStripe),
wantRule: "stripe live key",
},
{
name: "google api key",
content: fmt.Sprintf(`apiKey: %q`, fakeGoogle),
wantRule: "google api key",
},
{
name: "pem private key header",
content: fakePEM,
wantRule: "private key",
},
{
name: "generic high-entropy api key assignment",
content: fmt.Sprintf(`apikey = %q`, fakeGeneric),
wantRule: "generic secret assignment",
},
{
name: "aws secret with entropy",
content: fmt.Sprintf(`aws_secret_access_key=%q`, fakeAWSSecret),
wantRule: "aws secret access key",
},
{
// low-entropy assignment is a placeholder, not a real secret.
name: "low entropy generic assignment not flagged",
content: `password = "aaaaaaaaaaaaaaaaaaaaaaaa"`,
wantNone: true,
},
{
// a repetitive placeholder is low-entropy and must not trip the gate.
name: "low entropy repeated pattern not flagged",
content: `token = "abababababababababababab"`,
wantNone: true,
},
{
name: "no secrets in plain code",
content: `function add(a, b) { return a + b; }`,
wantNone: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := ScanSecrets(tt.content, "https://example.com/app.js")
if tt.wantNone {
if len(got) != 0 {
t.Fatalf("expected no matches, got %+v", got)
}
return
}
if len(got) == 0 {
t.Fatalf("expected a %q match, got none", tt.wantRule)
}
if got[0].Rule != tt.wantRule {
t.Errorf("rule = %q, want %q", got[0].Rule, tt.wantRule)
}
if got[0].Match == "" {
t.Error("match value is empty")
}
if got[0].Source != "https://example.com/app.js" {
t.Errorf("source = %q, want the passed url", got[0].Source)
}
})
}
}
func TestScanSecretsDedupesWithinSource(t *testing.T) {
// the same key referenced twice in one file is one finding.
content := fmt.Sprintf(`a = %q; b = %q;`, fakeAWSKey, fakeAWSKey)
got := ScanSecrets(content, "https://example.com/app.js")
if len(got) != 1 {
t.Fatalf("expected 1 deduped match, got %d: %+v", len(got), got)
}
}
func TestShannonEntropy(t *testing.T) {
tests := []struct {
name string
input string
// random-ish strings clear the generic gate, repetitive ones don't.
wantHigh bool
}{
{name: "empty is zero", input: "", wantHigh: false},
{name: "repeated char is low", input: "aaaaaaaaaaaaaaaa", wantHigh: false},
{name: "random blob is high", input: fakeGeneric, wantHigh: true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := shannonEntropy(tt.input)
if tt.wantHigh && got < genericMinEntropy {
t.Errorf("entropy %f below generic gate %f", got, genericMinEntropy)
}
if !tt.wantHigh && got >= genericMinEntropy {
t.Errorf("entropy %f unexpectedly cleared generic gate %f", got, genericMinEntropy)
}
})
}
}
+1 -1
View File
@@ -51,7 +51,7 @@ vulnerability scanning with nuclei templates.
automated google dorking.
.TP
.B \-js
javascript analysis.
javascript analysis + secret and endpoint extraction.
.TP
.B \-c3
cloud storage misconfiguration scan.