Files
sif/internal/scan/frameworks/detector.go
T
Tigah 6575c2e5f7 fix(frameworks): stop false positives and version mis-extraction (#247)
a detector accuracy audit surfaced two classes of bug in the framework
detectors.

bare-brand header false positives: header-only signatures matched a
brand name as a substring across every header name and value, so a
detector fired on any response that merely referenced the brand (a
vendor cdn named in a link or csp value, a cookie sharing the prefix).
add an optional Header field to Signature that scopes a header-only
match to one named header's value, and apply it (or a structural
anchor) per detector:

- express: "Express" scoped to x-powered-by, was firing on an
  express_checkout cookie.
- flask: "Werkzeug" scoped to the server header.
- symfony: dropped the bare "symfony" word (symfony sets no such
  header, it fired on symfony.com links); the x-debug-token header is
  the marker.
- shopify: key on the x-shopify response headers instead of the bare
  "Shopify" word, which fired on a cdn.shopify.com link.
- remix: dropped the bare "remix"/"_remix" substrings that fired on a
  track_remix.mp3 asset; window.__remixContext is the definitive
  marker.
- spring boot: anchor the whitelabel title in its h1 tag context so a
  tutorial discussing the error does not fire.

the gin and fastapi detectors are removed: gin keyed on the
"gin-gonic" import-path string (appears in tutorials, never in a real
gin response) and fastapi on bare words matching the projects' doc
domains. neither framework advertises itself in a response header or a
non-prose body marker, so there is no clean passive signal to anchor
on.

version mis-extraction: drop the low-confidence ".*?" version
fallbacks (rails, django, laravel, spring), whose unbounded gap
grabbed the first version-shaped number after the framework word and
reported an unrelated asset's cache-buster when no real version was
present. let isValidVersionString accept a single integer so a bare
major such as drupal's "Drupal 10" is no longer rejected as "unknown".

each false positive and version bug is covered by a regression test.
2026-07-02 12:55:34 -07:00

166 lines
4.8 KiB
Go

/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
/*
BSD 3-Clause License
(c) 2022-2026 vmfunc, xyzeva & contributors
*/
package frameworks
import (
"net/http"
"strings"
"sync"
)
// Signature represents a pattern to match for framework detection.
type Signature struct {
Pattern string
Weight float32
HeaderOnly bool
// Header, when set, scopes a HeaderOnly match to the named header's value
// (canonical form, e.g. "X-Powered-By"). Empty matches across all headers.
Header string
}
// Detector is the interface for framework detection plugins.
type Detector interface {
// Name returns the unique framework name.
Name() string
// Signatures returns patterns to search for this framework.
Signatures() []Signature
// Detect performs detection and returns confidence (0.0-1.0) and version.
// The version can be empty if not detectable.
Detect(body string, headers http.Header) (confidence float32, version string)
}
// registry holds all registered detectors.
var (
registryMu sync.RWMutex
registry = make(map[string]Detector)
)
// Register adds a detector to the registry. Should be called from init().
func Register(d Detector) {
registryMu.Lock()
defer registryMu.Unlock()
registry[d.Name()] = d
}
// GetDetectors returns all registered detectors.
func GetDetectors() map[string]Detector {
registryMu.RLock()
defer registryMu.RUnlock()
// Return a copy to prevent mutation
result := make(map[string]Detector, len(registry))
for k, v := range registry {
result[k] = v
}
return result
}
// GetDetector returns a specific detector by name.
func GetDetector(name string) (Detector, bool) {
registryMu.RLock()
defer registryMu.RUnlock()
d, ok := registry[name]
return d, ok
}
// BaseDetector provides common functionality for detector implementations.
type BaseDetector struct {
name string
signatures []Signature
}
// NewBaseDetector creates a new base detector.
func NewBaseDetector(name string, signatures []Signature) BaseDetector {
return BaseDetector{name: name, signatures: signatures}
}
// Name returns the framework name.
func (b BaseDetector) Name() string {
return b.name
}
// Signatures returns the detection signatures.
func (b BaseDetector) Signatures() []Signature {
return b.signatures
}
// MatchSignatures checks body and headers against signatures and returns a weighted score.
func (b BaseDetector) MatchSignatures(body string, headers http.Header) float32 {
var weightedScore float32
var totalWeight float32
for _, sig := range b.signatures {
totalWeight += sig.Weight
if sig.HeaderOnly {
var matched bool
if sig.Header != "" {
matched = headerValueContains(headers, sig.Header, sig.Pattern)
} else {
matched = containsHeader(headers, sig.Pattern)
}
if matched {
weightedScore += sig.Weight
}
} else if strings.Contains(body, sig.Pattern) {
weightedScore += sig.Weight
}
}
if totalWeight == 0 {
return 0
}
return weightedScore / totalWeight
}
// headerValueContains reports whether the named header's value contains the signature.
func headerValueContains(headers http.Header, name, signature string) bool {
sigLower := strings.ToLower(signature)
for _, value := range headers.Values(name) {
if strings.Contains(strings.ToLower(value), sigLower) {
return true
}
}
return false
}
// containsHeader checks if a signature pattern exists in headers.
func containsHeader(headers http.Header, signature string) bool {
sigLower := strings.ToLower(signature)
// Check header names
for name := range headers {
if strings.Contains(strings.ToLower(name), sigLower) {
return true
}
}
// Check header values
for _, values := range headers {
for _, value := range values {
if strings.Contains(strings.ToLower(value), sigLower) {
return true
}
}
}
return false
}