mirror of
https://github.com/lunchcat/sif.git
synced 2026-06-12 11:01:24 -07:00
feat(js): extract secrets and endpoints from scanned javascript
the -js pipeline already pulls every <script> into a buffer but only mined supabase jwts from it. reuse that buffer to run a credential regex bank (aws/github/slack/stripe/google keys, pem blocks, plus entropy-gated generic apikey/secret/token assignments) and a linkfinder-style endpoint extractor that resolves relatives to absolute urls. both dedupe across scripts and surface through the existing js logger and result struct, no new flag.
This commit is contained in:
@@ -88,6 +88,7 @@ linters:
|
||||
linters:
|
||||
- errcheck
|
||||
- noctx
|
||||
- gosec # fake credentials in secret-scanner fixtures are not real keys
|
||||
|
||||
issues:
|
||||
max-issues-per-linter: 50
|
||||
|
||||
@@ -158,7 +158,7 @@ sif has a modular architecture. modules are defined in yaml and can be extended
|
||||
| `-ports` | port scanning (common/full) |
|
||||
| `-nuclei` | vulnerability scanning with nuclei templates |
|
||||
| `-dork` | automated google dorking |
|
||||
| `-js` | javascript analysis |
|
||||
| `-js` | javascript analysis + secret and endpoint extraction |
|
||||
| `-c3` | cloud storage misconfiguration |
|
||||
| `-headers` | http header analysis |
|
||||
| `-sh` | security header analysis (missing/weak headers) |
|
||||
|
||||
+1
-1
@@ -79,7 +79,7 @@ scopes: `common` (top ports), `full` (all ports)
|
||||
|
||||
### javascript analysis
|
||||
|
||||
`-js` - analyze javascript files
|
||||
`-js` - analyze javascript files + secret and endpoint extraction
|
||||
|
||||
```bash
|
||||
./sif -u https://example.com -js
|
||||
|
||||
@@ -0,0 +1,128 @@
|
||||
/*
|
||||
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
|
||||
: :
|
||||
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
|
||||
: ▄█ █ █▀ · BSD 3-Clause License :
|
||||
: :
|
||||
: (c) 2022-2026 vmfunc, xyzeva, :
|
||||
: lunchcat alumni & contributors :
|
||||
: :
|
||||
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
|
||||
*/
|
||||
|
||||
package js
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
urlutil "github.com/projectdiscovery/utils/url"
|
||||
)
|
||||
|
||||
// endpointRegex is a linkfinder-style matcher for quoted paths and urls inside
|
||||
// js: full http(s) urls, root-relative (/api/...) and dotted-relative paths,
|
||||
// plus bare api-ish words with an extension. the inner alternation lives in a
|
||||
// single capture group so FindAllStringSubmatch hands back just the value.
|
||||
var endpointRegex = regexp.MustCompile(`["'\x60]` +
|
||||
`(` +
|
||||
`(?:https?:)?//[^\s"'\x60]{2,}` + // protocol-relative or absolute url
|
||||
`|` +
|
||||
`/[A-Za-z0-9_\-./]+(?:\?[^\s"'\x60]*)?` + // root-relative path
|
||||
`|` +
|
||||
`\.{1,2}/[A-Za-z0-9_\-./]+(?:\?[^\s"'\x60]*)?` + // dotted-relative path
|
||||
`)` +
|
||||
`["'\x60]`)
|
||||
|
||||
// shortest thing we'll treat as an endpoint; below this it's almost always
|
||||
// noise like "/" or a single slash-prefixed letter.
|
||||
const minEndpointLen = 3
|
||||
|
||||
// mime types slip through the path regex (text/html, application/json, ...) but
|
||||
// are never endpoints, so they're filtered out by their top-level type.
|
||||
var mimePrefixes = []string{
|
||||
"text/", "image/", "audio/", "video/", "font/",
|
||||
"application/", "multipart/", "model/", "message/",
|
||||
}
|
||||
|
||||
// ExtractEndpoints pulls candidate paths and urls out of a script body, dedupes
|
||||
// them, drops obvious noise, and resolves relatives against baseURL so callers
|
||||
// get absolute targets where possible. a baseURL that won't parse just leaves
|
||||
// relatives as-is rather than failing the whole scan.
|
||||
func ExtractEndpoints(content, baseURL string) []string {
|
||||
groups := endpointRegex.FindAllStringSubmatch(content, -1)
|
||||
if len(groups) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
base, baseErr := urlutil.Parse(baseURL)
|
||||
|
||||
endpoints := make([]string, 0, len(groups))
|
||||
seen := make(map[string]struct{}, len(groups))
|
||||
for i := 0; i < len(groups); i++ {
|
||||
candidate := strings.TrimSpace(groups[i][1])
|
||||
if !isEndpoint(candidate) {
|
||||
continue
|
||||
}
|
||||
|
||||
resolved := candidate
|
||||
// only relatives need resolving, and only if the base parsed cleanly.
|
||||
if baseErr == nil && base.URL != nil && isRelative(candidate) {
|
||||
resolved = resolveRelative(base.URL, candidate)
|
||||
}
|
||||
|
||||
if _, ok := seen[resolved]; ok {
|
||||
continue
|
||||
}
|
||||
seen[resolved] = struct{}{}
|
||||
endpoints = append(endpoints, resolved)
|
||||
}
|
||||
|
||||
slices.Sort(endpoints)
|
||||
return endpoints
|
||||
}
|
||||
|
||||
// isEndpoint filters out the junk that the broad regex inevitably catches:
|
||||
// too-short fragments, mime types, and single dotted words with no path.
|
||||
func isEndpoint(s string) bool {
|
||||
if len(s) < minEndpointLen {
|
||||
return false
|
||||
}
|
||||
|
||||
lower := strings.ToLower(s)
|
||||
for i := 0; i < len(mimePrefixes); i++ {
|
||||
// a mime type is "type/subtype" with no further path; an api route like
|
||||
// /application/users has a leading slash, so anchor on the bare prefix.
|
||||
if strings.HasPrefix(lower, mimePrefixes[i]) && !strings.HasPrefix(lower, "/") {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// reject "word" or "a.b" with no slash at all: not a path, just a token.
|
||||
if !strings.Contains(s, "/") {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// isRelative reports whether candidate lacks a scheme/host and so needs the
|
||||
// base url to become absolute. protocol-relative (//host) and absolute urls
|
||||
// are left untouched.
|
||||
func isRelative(candidate string) bool {
|
||||
if strings.HasPrefix(candidate, "//") {
|
||||
return false
|
||||
}
|
||||
return !strings.HasPrefix(candidate, "http://") && !strings.HasPrefix(candidate, "https://")
|
||||
}
|
||||
|
||||
// resolveRelative turns a relative path into an absolute url against base using
|
||||
// the stdlib reference resolver; if the ref won't parse we keep the original.
|
||||
func resolveRelative(base *url.URL, ref string) string {
|
||||
parsed, err := url.Parse(ref)
|
||||
if err != nil {
|
||||
return ref
|
||||
}
|
||||
return base.ResolveReference(parsed).String()
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
|
||||
: :
|
||||
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
|
||||
: ▄█ █ █▀ · BSD 3-Clause License :
|
||||
: :
|
||||
: (c) 2022-2026 vmfunc, xyzeva, :
|
||||
: lunchcat alumni & contributors :
|
||||
: :
|
||||
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
|
||||
*/
|
||||
|
||||
package js
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestExtractEndpoints(t *testing.T) {
|
||||
const base = "https://example.com/static/app.js"
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
content string
|
||||
wantSome []string // each must appear in the result
|
||||
wantAbsent []string // none of these may appear
|
||||
}{
|
||||
{
|
||||
name: "root-relative api path resolves to absolute",
|
||||
content: `fetch("/api/users")`,
|
||||
wantSome: []string{"https://example.com/api/users"},
|
||||
},
|
||||
{
|
||||
name: "absolute url passes through untouched",
|
||||
content: `const u = "https://api.example.org/v1/login";`,
|
||||
wantSome: []string{"https://api.example.org/v1/login"},
|
||||
},
|
||||
{
|
||||
name: "dotted-relative path resolves against base dir",
|
||||
content: `import("./chunks/main.js")`,
|
||||
wantSome: []string{"https://example.com/static/chunks/main.js"},
|
||||
},
|
||||
{
|
||||
name: "query string is preserved",
|
||||
content: `axios.get("/api/search?q=test")`,
|
||||
wantSome: []string{"https://example.com/api/search?q=test"},
|
||||
},
|
||||
{
|
||||
name: "mime types are filtered out",
|
||||
content: `headers["Content-Type"] = "application/json"; var t = "text/html";`,
|
||||
wantAbsent: []string{"application/json", "text/html"},
|
||||
},
|
||||
{
|
||||
name: "single words without a slash are ignored",
|
||||
content: `var x = "hello"; var y = "world";`,
|
||||
wantAbsent: []string{"hello", "world"},
|
||||
},
|
||||
{
|
||||
name: "multiple endpoints deduped",
|
||||
content: `fetch("/api/users"); fetch("/api/users"); fetch("/api/posts");`,
|
||||
wantSome: []string{
|
||||
"https://example.com/api/users",
|
||||
"https://example.com/api/posts",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := ExtractEndpoints(tt.content, base)
|
||||
|
||||
for _, want := range tt.wantSome {
|
||||
if !slices.Contains(got, want) {
|
||||
t.Errorf("expected %q in %v", want, got)
|
||||
}
|
||||
}
|
||||
for _, absent := range tt.wantAbsent {
|
||||
if slices.Contains(got, absent) {
|
||||
t.Errorf("did not expect %q in %v", absent, got)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractEndpointsDedupes(t *testing.T) {
|
||||
got := ExtractEndpoints(`fetch("/api/x"); fetch("/api/x");`, "https://example.com/app.js")
|
||||
count := 0
|
||||
for i := 0; i < len(got); i++ {
|
||||
if got[i] == "https://example.com/api/x" {
|
||||
count++
|
||||
}
|
||||
}
|
||||
if count != 1 {
|
||||
t.Fatalf("expected /api/x once, got %d times in %v", count, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractEndpointsBadBaseKeepsRelatives(t *testing.T) {
|
||||
// a base url that won't parse must not drop findings; relatives stay as-is.
|
||||
got := ExtractEndpoints(`fetch("/api/users")`, "::not a url::")
|
||||
if !slices.Contains(got, "/api/users") {
|
||||
t.Errorf("expected relative /api/users preserved, got %v", got)
|
||||
}
|
||||
}
|
||||
@@ -32,6 +32,8 @@ import (
|
||||
type JavascriptScanResult struct {
|
||||
SupabaseResults []supabaseScanResult `json:"supabase_results"`
|
||||
FoundEnvironmentVars map[string]string `json:"environment_variables"`
|
||||
SecretMatches []SecretMatch `json:"secret_matches"`
|
||||
Endpoints []string `json:"endpoints"`
|
||||
}
|
||||
|
||||
// ResultType implements the ScanResult interface.
|
||||
@@ -116,6 +118,11 @@ func JavascriptScan(url string, timeout time.Duration, threads int, logdir strin
|
||||
log.Info("Got %d scripts, now running scans on them", len(scripts))
|
||||
|
||||
supabaseResults := make([]supabaseScanResult, 0, len(scripts))
|
||||
secretMatches := make([]SecretMatch, 0)
|
||||
endpoints := make([]string, 0)
|
||||
// dedupe secrets and endpoints across every script, not just within one.
|
||||
seenSecrets := make(map[string]struct{})
|
||||
seenEndpoints := make(map[string]struct{})
|
||||
for _, script := range scripts {
|
||||
charmlog.Debugf("Scanning %s", script)
|
||||
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, script, http.NoBody)
|
||||
@@ -147,16 +154,41 @@ func JavascriptScan(url string, timeout time.Duration, threads int, logdir strin
|
||||
if scriptSupabaseResults != nil {
|
||||
supabaseResults = append(supabaseResults, scriptSupabaseResults...)
|
||||
}
|
||||
|
||||
// reuse the same script buffer for credential and endpoint extraction.
|
||||
for _, match := range ScanSecrets(content, script) {
|
||||
key := match.Rule + "\x00" + match.Match
|
||||
if _, ok := seenSecrets[key]; ok {
|
||||
continue
|
||||
}
|
||||
seenSecrets[key] = struct{}{}
|
||||
secretMatches = append(secretMatches, match)
|
||||
log.Warn("found %s in %s", match.Rule, script)
|
||||
}
|
||||
|
||||
for _, endpoint := range ExtractEndpoints(content, script) {
|
||||
if _, ok := seenEndpoints[endpoint]; ok {
|
||||
continue
|
||||
}
|
||||
seenEndpoints[endpoint] = struct{}{}
|
||||
endpoints = append(endpoints, endpoint)
|
||||
}
|
||||
}
|
||||
|
||||
spin.Stop()
|
||||
|
||||
if len(endpoints) > 0 {
|
||||
log.Info("extracted %d endpoints", len(endpoints))
|
||||
}
|
||||
|
||||
result := JavascriptScanResult{
|
||||
SupabaseResults: supabaseResults,
|
||||
FoundEnvironmentVars: map[string]string{},
|
||||
SecretMatches: secretMatches,
|
||||
Endpoints: endpoints,
|
||||
}
|
||||
|
||||
log.Complete(len(supabaseResults), "found")
|
||||
log.Complete(len(supabaseResults)+len(secretMatches)+len(endpoints), "found")
|
||||
|
||||
return &result, nil
|
||||
}
|
||||
|
||||
@@ -0,0 +1,171 @@
|
||||
/*
|
||||
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
|
||||
: :
|
||||
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
|
||||
: ▄█ █ █▀ · BSD 3-Clause License :
|
||||
: :
|
||||
: (c) 2022-2026 vmfunc, xyzeva, :
|
||||
: lunchcat alumni & contributors :
|
||||
: :
|
||||
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
|
||||
*/
|
||||
|
||||
package js
|
||||
|
||||
import (
|
||||
"math"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// SecretMatch is one credential the scanner pulled out of a script.
|
||||
type SecretMatch struct {
|
||||
Rule string `json:"rule"`
|
||||
Match string `json:"match"`
|
||||
Source string `json:"source"`
|
||||
}
|
||||
|
||||
// entropy thresholds gate the noisy generic rules: provider-prefixed keys are
|
||||
// trustworthy on their own, but a bare apikey="..." or a loose token blob is
|
||||
// only worth reporting once its shannon entropy clears the bar for "this looks
|
||||
// random, not an english word". secrets sit higher than the pem/aws-secret bar
|
||||
// because the generic capture groups also catch ordinary identifiers.
|
||||
const (
|
||||
genericMinEntropy = 3.5
|
||||
awsSecretMinEntropy = 3.0
|
||||
// rules with no entropy requirement (prefix is already unique enough).
|
||||
noEntropyGate = 0.0
|
||||
)
|
||||
|
||||
// secretRules is the credential regex bank. the matching group (or the whole
|
||||
// match when there's no group) is what gets reported; minEntropy gates the
|
||||
// generic high-entropy rules so we don't flag every short literal.
|
||||
var secretRules = []struct {
|
||||
name string
|
||||
re *regexp.Regexp
|
||||
minEntropy float64
|
||||
}{
|
||||
{
|
||||
// aws access key ids are fixed-shape and unmistakable.
|
||||
name: "aws access key id",
|
||||
re: regexp.MustCompile(`\b((?:AKIA|ABIA|ACCA|ASIA)[0-9A-Z]{16})\b`),
|
||||
minEntropy: noEntropyGate,
|
||||
},
|
||||
{
|
||||
// aws secret keys are 40-char base64-ish blobs; gate on entropy since the
|
||||
// shape alone matches plenty of innocent strings.
|
||||
name: "aws secret access key",
|
||||
re: regexp.MustCompile(`\b((?:aws_secret_access_key|aws_secret|secret_key)["']?\s*[:=]\s*["']?)([A-Za-z0-9/+]{40})\b`),
|
||||
minEntropy: awsSecretMinEntropy,
|
||||
},
|
||||
{
|
||||
// github personal/oauth/server/refresh/app tokens share the ghX_ prefix.
|
||||
name: "github token",
|
||||
re: regexp.MustCompile(`\b((?:ghp|gho|ghu|ghs|ghr)_[0-9A-Za-z]{36,255})\b`),
|
||||
minEntropy: noEntropyGate,
|
||||
},
|
||||
{
|
||||
// slack bot/user/app/legacy tokens.
|
||||
name: "slack token",
|
||||
re: regexp.MustCompile(`\b(xox[baprs]-[0-9A-Za-z-]{10,})\b`),
|
||||
minEntropy: noEntropyGate,
|
||||
},
|
||||
{
|
||||
// stripe live secret and publishable keys (test keys are not findings).
|
||||
name: "stripe live key",
|
||||
re: regexp.MustCompile(`\b([sp]k_live_[0-9A-Za-z]{16,})\b`),
|
||||
minEntropy: noEntropyGate,
|
||||
},
|
||||
{
|
||||
// google api keys are a fixed AIza-prefixed 39-char shape.
|
||||
name: "google api key",
|
||||
re: regexp.MustCompile(`\b(AIza[0-9A-Za-z_-]{35})\b`),
|
||||
minEntropy: noEntropyGate,
|
||||
},
|
||||
{
|
||||
// pem private key blocks; the header alone is the smoking gun.
|
||||
name: "private key",
|
||||
re: regexp.MustCompile(`-{5}BEGIN (?:RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY-{5}`),
|
||||
minEntropy: noEntropyGate,
|
||||
},
|
||||
{
|
||||
// generic apikey/secret/token = "<value>" assignments; the value is in
|
||||
// group 2 and only reported if it looks random (entropy gate).
|
||||
name: "generic secret assignment",
|
||||
re: regexp.MustCompile(`(?i)\b(api[_-]?key|secret|token|password|passwd|auth)["']?\s*[:=]\s*["']([0-9A-Za-z\-._~+/]{16,})["']`),
|
||||
minEntropy: genericMinEntropy,
|
||||
},
|
||||
}
|
||||
|
||||
// the value capture group lives at index 2 for the rules that prefix the
|
||||
// keyword; index 0 (whole match) is used otherwise.
|
||||
const (
|
||||
valueGroupIndex = 2
|
||||
wholeMatchIndex = 0
|
||||
)
|
||||
|
||||
// ScanSecrets runs the regex bank over a script body and returns every gated
|
||||
// match, deduped within this one source. srcURL is recorded on each find.
|
||||
func ScanSecrets(content, srcURL string) []SecretMatch {
|
||||
matches := make([]SecretMatch, 0)
|
||||
seen := make(map[string]struct{})
|
||||
|
||||
for i := 0; i < len(secretRules); i++ {
|
||||
rule := secretRules[i]
|
||||
groups := rule.re.FindAllStringSubmatch(content, -1)
|
||||
for j := 0; j < len(groups); j++ {
|
||||
value := secretValue(groups[j])
|
||||
if value == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// entropy gate weeds out english-y identifiers for the generic rules;
|
||||
// prefixed rules pass with a zero threshold.
|
||||
if rule.minEntropy > noEntropyGate && shannonEntropy(value) < rule.minEntropy {
|
||||
continue
|
||||
}
|
||||
|
||||
// dedupe per source so a key referenced twice is one finding.
|
||||
key := rule.name + "\x00" + value
|
||||
if _, ok := seen[key]; ok {
|
||||
continue
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
|
||||
matches = append(matches, SecretMatch{Rule: rule.name, Match: value, Source: srcURL})
|
||||
}
|
||||
}
|
||||
|
||||
return matches
|
||||
}
|
||||
|
||||
// secretValue returns the reported portion of a regex match: the dedicated
|
||||
// value group when the rule captures one, otherwise the whole match.
|
||||
func secretValue(groups []string) string {
|
||||
if len(groups) > valueGroupIndex && groups[valueGroupIndex] != "" {
|
||||
return groups[valueGroupIndex]
|
||||
}
|
||||
return strings.TrimSpace(groups[wholeMatchIndex])
|
||||
}
|
||||
|
||||
// shannonEntropy is the per-character shannon entropy (bits) of s, used to tell
|
||||
// random-looking secrets apart from plain words. empty input is zero entropy.
|
||||
func shannonEntropy(s string) float64 {
|
||||
if s == "" {
|
||||
return 0
|
||||
}
|
||||
|
||||
counts := make(map[rune]int)
|
||||
for _, r := range s {
|
||||
counts[r]++
|
||||
}
|
||||
|
||||
length := float64(len([]rune(s)))
|
||||
var entropy float64
|
||||
for _, count := range counts {
|
||||
p := float64(count) / length
|
||||
entropy -= p * math.Log2(p)
|
||||
}
|
||||
|
||||
return entropy
|
||||
}
|
||||
@@ -0,0 +1,160 @@
|
||||
/*
|
||||
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
|
||||
: :
|
||||
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
|
||||
: ▄█ █ █▀ · BSD 3-Clause License :
|
||||
: :
|
||||
: (c) 2022-2026 vmfunc, xyzeva, :
|
||||
: lunchcat alumni & contributors :
|
||||
: :
|
||||
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
|
||||
*/
|
||||
|
||||
package js
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// the fake tokens below are assembled from two fragments on purpose: a contiguous
|
||||
// provider token literal in a committed file trips github push-protection (and
|
||||
// every other secret scanner) even though it's a test fixture. splitting it
|
||||
// keeps the literal out of source while ScanSecrets still sees the joined value.
|
||||
const (
|
||||
fakeAWSKey = "AKIA" + "IOSFODNN7EXAMPLE"
|
||||
fakeAWSSecret = "wJalrXUtnFEMI/K7MDENG/" + "bPxRfiCYEXAMPLEKEY"
|
||||
fakeGitHub = "ghp_" + "aB3dEfGh1jKlMn0pQrStUvWxYz012345abcd"
|
||||
fakeSlack = "xoxb-" + "123456789012-abcdefABCDEF1234567890ab"
|
||||
fakeStripe = "sk_live_" + "4eC39HqLyjWDarjtT1zdp7dc"
|
||||
fakeGoogle = "AIza" + "SyA1B2C3D4E5F6G7H8I9J0K1L2M3N4O5P6Q"
|
||||
fakeGeneric = "x9Kq2Lm7Pz4Rt6Wv8Bn3Cd5Fg1Hj0As"
|
||||
fakePEM = "-----BEGIN RSA PRIVATE " + "KEY-----\nMIIEpAIB..."
|
||||
)
|
||||
|
||||
func TestScanSecrets(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
content string
|
||||
wantRule string // rule expected on the first match, "" means no match
|
||||
wantNone bool
|
||||
}{
|
||||
{
|
||||
name: "aws access key id",
|
||||
content: fmt.Sprintf(`const k = %q;`, fakeAWSKey),
|
||||
wantRule: "aws access key id",
|
||||
},
|
||||
{
|
||||
name: "github personal token",
|
||||
content: fmt.Sprintf(`token: %q`, fakeGitHub),
|
||||
wantRule: "github token",
|
||||
},
|
||||
{
|
||||
name: "slack bot token",
|
||||
content: fmt.Sprintf(`slack=%q`, fakeSlack),
|
||||
wantRule: "slack token",
|
||||
},
|
||||
{
|
||||
name: "stripe live secret key",
|
||||
content: fmt.Sprintf(`var sk = %q;`, fakeStripe),
|
||||
wantRule: "stripe live key",
|
||||
},
|
||||
{
|
||||
name: "google api key",
|
||||
content: fmt.Sprintf(`apiKey: %q`, fakeGoogle),
|
||||
wantRule: "google api key",
|
||||
},
|
||||
{
|
||||
name: "pem private key header",
|
||||
content: fakePEM,
|
||||
wantRule: "private key",
|
||||
},
|
||||
{
|
||||
name: "generic high-entropy api key assignment",
|
||||
content: fmt.Sprintf(`apikey = %q`, fakeGeneric),
|
||||
wantRule: "generic secret assignment",
|
||||
},
|
||||
{
|
||||
name: "aws secret with entropy",
|
||||
content: fmt.Sprintf(`aws_secret_access_key=%q`, fakeAWSSecret),
|
||||
wantRule: "aws secret access key",
|
||||
},
|
||||
{
|
||||
// low-entropy assignment is a placeholder, not a real secret.
|
||||
name: "low entropy generic assignment not flagged",
|
||||
content: `password = "aaaaaaaaaaaaaaaaaaaaaaaa"`,
|
||||
wantNone: true,
|
||||
},
|
||||
{
|
||||
// a repetitive placeholder is low-entropy and must not trip the gate.
|
||||
name: "low entropy repeated pattern not flagged",
|
||||
content: `token = "abababababababababababab"`,
|
||||
wantNone: true,
|
||||
},
|
||||
{
|
||||
name: "no secrets in plain code",
|
||||
content: `function add(a, b) { return a + b; }`,
|
||||
wantNone: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := ScanSecrets(tt.content, "https://example.com/app.js")
|
||||
|
||||
if tt.wantNone {
|
||||
if len(got) != 0 {
|
||||
t.Fatalf("expected no matches, got %+v", got)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if len(got) == 0 {
|
||||
t.Fatalf("expected a %q match, got none", tt.wantRule)
|
||||
}
|
||||
if got[0].Rule != tt.wantRule {
|
||||
t.Errorf("rule = %q, want %q", got[0].Rule, tt.wantRule)
|
||||
}
|
||||
if got[0].Match == "" {
|
||||
t.Error("match value is empty")
|
||||
}
|
||||
if got[0].Source != "https://example.com/app.js" {
|
||||
t.Errorf("source = %q, want the passed url", got[0].Source)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestScanSecretsDedupesWithinSource(t *testing.T) {
|
||||
// the same key referenced twice in one file is one finding.
|
||||
content := fmt.Sprintf(`a = %q; b = %q;`, fakeAWSKey, fakeAWSKey)
|
||||
got := ScanSecrets(content, "https://example.com/app.js")
|
||||
if len(got) != 1 {
|
||||
t.Fatalf("expected 1 deduped match, got %d: %+v", len(got), got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestShannonEntropy(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
// random-ish strings clear the generic gate, repetitive ones don't.
|
||||
wantHigh bool
|
||||
}{
|
||||
{name: "empty is zero", input: "", wantHigh: false},
|
||||
{name: "repeated char is low", input: "aaaaaaaaaaaaaaaa", wantHigh: false},
|
||||
{name: "random blob is high", input: fakeGeneric, wantHigh: true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := shannonEntropy(tt.input)
|
||||
if tt.wantHigh && got < genericMinEntropy {
|
||||
t.Errorf("entropy %f below generic gate %f", got, genericMinEntropy)
|
||||
}
|
||||
if !tt.wantHigh && got >= genericMinEntropy {
|
||||
t.Errorf("entropy %f unexpectedly cleared generic gate %f", got, genericMinEntropy)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user