Files
trivy/pkg/licensing/classifier.go
2024-12-24 04:47:21 +00:00

88 lines
2.0 KiB
Go

package licensing
import (
"fmt"
"io"
"sort"
"sync"
classifier "github.com/google/licenseclassifier/v2"
"github.com/google/licenseclassifier/v2/assets"
"golang.org/x/xerrors"
"github.com/aquasecurity/trivy/pkg/fanal/types"
"github.com/aquasecurity/trivy/pkg/log"
"github.com/aquasecurity/trivy/pkg/set"
)
var (
cf *classifier.Classifier
classifierOnce sync.Once
m sync.Mutex
)
func initGoogleClassifier() error {
// Initialize the default classifier once.
// This loading is expensive and should be called only when the license classification is needed.
var err error
classifierOnce.Do(func() {
log.Debug("Loading the default license classifier...")
cf, err = assets.DefaultClassifier()
})
return err
}
// Classify detects and classifies the license found in a file
func Classify(filePath string, r io.Reader, confidenceLevel float64) (*types.LicenseFile, error) {
content, err := io.ReadAll(r)
if err != nil {
return nil, xerrors.Errorf("unable to read a license file %q: %w", filePath, err)
}
if err = initGoogleClassifier(); err != nil {
return nil, err
}
var findings types.LicenseFindings
var matchType types.LicenseType
seen := set.New[string]()
// cf.Match is not thread safe
m.Lock()
// Use 'github.com/google/licenseclassifier' to find licenses
result := cf.Match(cf.Normalize(content))
m.Unlock()
for _, match := range result.Matches {
if match.Confidence <= confidenceLevel {
continue
}
if seen.Contains(match.Name) {
continue
}
seen.Append(match.Name)
switch match.MatchType {
case "Header":
matchType = types.LicenseTypeHeader
case "License":
matchType = types.LicenseTypeFile
}
licenseLink := fmt.Sprintf("https://spdx.org/licenses/%s.html", match.Name)
findings = append(findings, types.LicenseFinding{
Name: match.Name,
Confidence: match.Confidence,
Link: licenseLink,
})
}
sort.Sort(findings)
return &types.LicenseFile{
Type: matchType,
FilePath: filePath,
Findings: findings,
}, nil
}