Files
trivy/pkg/fanal/analyzer/licensing/license.go

159 lines
3.3 KiB
Go

package licensing
import (
"context"
"io"
"math"
"os"
"path/filepath"
"strings"
"golang.org/x/xerrors"
"github.com/aquasecurity/trivy/pkg/fanal/analyzer"
"github.com/aquasecurity/trivy/pkg/fanal/types"
"github.com/aquasecurity/trivy/pkg/licensing"
"github.com/aquasecurity/trivy/pkg/log"
"github.com/aquasecurity/trivy/pkg/set"
xio "github.com/aquasecurity/trivy/pkg/x/io"
)
const version = 1
var (
skipDirs = []string{
"node_modules/", // node scan will pick these up
"usr/share/doc/", // dpkg will pick these up
// Some heuristic exclusion
"usr/lib",
"usr/local/include",
"usr/include",
"usr/lib/python",
"usr/local/go",
"opt/yarn",
"usr/lib/gems",
"usr/src/wordpress",
}
acceptedExtensions = set.NewCaseInsensitive(
".asp",
".aspx",
".bas",
".bat",
".b",
".c",
".cue",
".cgi",
".cs",
".css",
".fish",
".html",
".h",
".ini",
".java",
".js",
".jsx",
".markdown",
".md",
".py",
".php",
".pl",
".r",
".rb",
".sh",
".sql",
".ts",
".tsx",
".txt",
".vue",
".zsh",
)
acceptedFileNames = set.NewCaseInsensitive(
"license",
"licence",
"copyright",
)
)
func init() {
analyzer.RegisterAnalyzer(newLicenseFileAnalyzer())
}
// licenseFileAnalyzer is an analyzer for file headers and license files
type licenseFileAnalyzer struct {
classifierConfidenceLevel float64
}
func newLicenseFileAnalyzer() *licenseFileAnalyzer {
return &licenseFileAnalyzer{}
}
func (a *licenseFileAnalyzer) Analyze(ctx context.Context, input analyzer.AnalysisInput) (*analyzer.AnalysisResult, error) {
ctx = log.WithContextPrefix(ctx, "license")
log.DebugContext(ctx, "License scanning", log.FilePath(input.FilePath))
// need files to be text based, readable files
readable, err := isHumanReadable(input.Content, input.Info.Size())
if err != nil || !readable {
return nil, nil
}
lf, err := licensing.Classify(input.FilePath, input.Content, a.classifierConfidenceLevel)
if err != nil {
return nil, xerrors.Errorf("license classification error: %w", err)
} else if len(lf.Findings) == 0 {
return nil, nil
}
return &analyzer.AnalysisResult{
Licenses: []types.LicenseFile{*lf},
}, nil
}
func (a *licenseFileAnalyzer) Init(opt analyzer.AnalyzerOptions) error {
a.classifierConfidenceLevel = opt.LicenseScannerOption.ClassifierConfidenceLevel
return nil
}
func (a *licenseFileAnalyzer) Required(filePath string, _ os.FileInfo) bool {
for _, skipDir := range skipDirs {
if strings.Contains(filePath, skipDir) {
return false
}
}
if acceptedExtensions.Contains(filepath.Ext(filePath)) {
return true
}
return acceptedFileNames.Contains(filepath.Base(filePath))
}
func isHumanReadable(content xio.ReadSeekerAt, fileSize int64) (bool, error) {
headSize := int(math.Min(float64(fileSize), 300))
head := make([]byte, headSize)
if _, err := content.Read(head); err != nil {
return false, err
}
if _, err := content.Seek(0, io.SeekStart); err != nil {
return false, err
}
// cf. https://github.com/file/file/blob/f2a6e7cb7db9b5fd86100403df6b2f830c7f22ba/src/encoding.c#L151-L228
for _, b := range head {
if b < 7 || b == 11 || (13 < b && b < 27) || (27 < b && b < 0x20) || b == 0x7f {
return false, nil
}
}
return true, nil
}
func (a *licenseFileAnalyzer) Type() analyzer.Type {
return analyzer.TypeLicenseFile
}
func (a *licenseFileAnalyzer) Version() int {
return version
}