mirror of
https://github.com/aquasecurity/trivy.git
synced 2025-12-05 20:40:16 -08:00
159 lines
3.3 KiB
Go
159 lines
3.3 KiB
Go
package licensing
|
|
|
|
import (
|
|
"context"
|
|
"io"
|
|
"math"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"golang.org/x/xerrors"
|
|
|
|
"github.com/aquasecurity/trivy/pkg/fanal/analyzer"
|
|
"github.com/aquasecurity/trivy/pkg/fanal/types"
|
|
"github.com/aquasecurity/trivy/pkg/licensing"
|
|
"github.com/aquasecurity/trivy/pkg/log"
|
|
"github.com/aquasecurity/trivy/pkg/set"
|
|
xio "github.com/aquasecurity/trivy/pkg/x/io"
|
|
)
|
|
|
|
const version = 1
|
|
|
|
var (
|
|
skipDirs = []string{
|
|
"node_modules/", // node scan will pick these up
|
|
"usr/share/doc/", // dpkg will pick these up
|
|
|
|
// Some heuristic exclusion
|
|
"usr/lib",
|
|
"usr/local/include",
|
|
"usr/include",
|
|
"usr/lib/python",
|
|
"usr/local/go",
|
|
"opt/yarn",
|
|
"usr/lib/gems",
|
|
"usr/src/wordpress",
|
|
}
|
|
|
|
acceptedExtensions = set.NewCaseInsensitive(
|
|
".asp",
|
|
".aspx",
|
|
".bas",
|
|
".bat",
|
|
".b",
|
|
".c",
|
|
".cue",
|
|
".cgi",
|
|
".cs",
|
|
".css",
|
|
".fish",
|
|
".html",
|
|
".h",
|
|
".ini",
|
|
".java",
|
|
".js",
|
|
".jsx",
|
|
".markdown",
|
|
".md",
|
|
".py",
|
|
".php",
|
|
".pl",
|
|
".r",
|
|
".rb",
|
|
".sh",
|
|
".sql",
|
|
".ts",
|
|
".tsx",
|
|
".txt",
|
|
".vue",
|
|
".zsh",
|
|
)
|
|
|
|
acceptedFileNames = set.NewCaseInsensitive(
|
|
"license",
|
|
"licence",
|
|
"copyright",
|
|
)
|
|
)
|
|
|
|
func init() {
|
|
analyzer.RegisterAnalyzer(newLicenseFileAnalyzer())
|
|
}
|
|
|
|
// licenseFileAnalyzer is an analyzer for file headers and license files
|
|
type licenseFileAnalyzer struct {
|
|
classifierConfidenceLevel float64
|
|
}
|
|
|
|
func newLicenseFileAnalyzer() *licenseFileAnalyzer {
|
|
return &licenseFileAnalyzer{}
|
|
}
|
|
|
|
func (a *licenseFileAnalyzer) Analyze(ctx context.Context, input analyzer.AnalysisInput) (*analyzer.AnalysisResult, error) {
|
|
ctx = log.WithContextPrefix(ctx, "license")
|
|
log.DebugContext(ctx, "License scanning", log.FilePath(input.FilePath))
|
|
|
|
// need files to be text based, readable files
|
|
readable, err := isHumanReadable(input.Content, input.Info.Size())
|
|
if err != nil || !readable {
|
|
return nil, nil
|
|
}
|
|
lf, err := licensing.Classify(input.FilePath, input.Content, a.classifierConfidenceLevel)
|
|
if err != nil {
|
|
return nil, xerrors.Errorf("license classification error: %w", err)
|
|
} else if len(lf.Findings) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
return &analyzer.AnalysisResult{
|
|
Licenses: []types.LicenseFile{*lf},
|
|
}, nil
|
|
}
|
|
|
|
func (a *licenseFileAnalyzer) Init(opt analyzer.AnalyzerOptions) error {
|
|
a.classifierConfidenceLevel = opt.LicenseScannerOption.ClassifierConfidenceLevel
|
|
return nil
|
|
}
|
|
|
|
func (a *licenseFileAnalyzer) Required(filePath string, _ os.FileInfo) bool {
|
|
for _, skipDir := range skipDirs {
|
|
if strings.Contains(filePath, skipDir) {
|
|
return false
|
|
}
|
|
}
|
|
if acceptedExtensions.Contains(filepath.Ext(filePath)) {
|
|
return true
|
|
}
|
|
|
|
return acceptedFileNames.Contains(filepath.Base(filePath))
|
|
}
|
|
|
|
func isHumanReadable(content xio.ReadSeekerAt, fileSize int64) (bool, error) {
|
|
headSize := int(math.Min(float64(fileSize), 300))
|
|
head := make([]byte, headSize)
|
|
if _, err := content.Read(head); err != nil {
|
|
return false, err
|
|
}
|
|
if _, err := content.Seek(0, io.SeekStart); err != nil {
|
|
return false, err
|
|
}
|
|
|
|
// cf. https://github.com/file/file/blob/f2a6e7cb7db9b5fd86100403df6b2f830c7f22ba/src/encoding.c#L151-L228
|
|
for _, b := range head {
|
|
if b < 7 || b == 11 || (13 < b && b < 27) || (27 < b && b < 0x20) || b == 0x7f {
|
|
return false, nil
|
|
}
|
|
}
|
|
|
|
return true, nil
|
|
}
|
|
|
|
func (a *licenseFileAnalyzer) Type() analyzer.Type {
|
|
return analyzer.TypeLicenseFile
|
|
}
|
|
|
|
func (a *licenseFileAnalyzer) Version() int {
|
|
return version
|
|
}
|