fix(sbom): fix incompliant license format for spdx (#3335)

This commit is contained in:
Masahiro331
2023-03-13 00:21:25 +09:00
committed by GitHub
parent f8307635ad
commit aaf265881e
12 changed files with 842 additions and 22 deletions

View File

@@ -87,6 +87,10 @@ func (a *dpkgLicenseAnalyzer) parseCopyright(r dio.ReadSeekerAt) ([]types.Licens
// Machine-readable format
// cf. https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#:~:text=The%20debian%2Fcopyright%20file%20must,in%20the%20Debian%20Policy%20Manual.
l := strings.TrimSpace(line[8:])
// Very rarely has below phrases
l = strings.TrimPrefix(l, "The main library is licensed under ")
l = strings.TrimSuffix(l, " license")
if len(l) > 0 {
// Split licenses without considering "and"/"or"
// examples:

View File

@@ -0,0 +1,74 @@
package expression
import (
"fmt"
"strings"
"github.com/aquasecurity/trivy/pkg/licensing/expression/lexer"
"github.com/aquasecurity/trivy/pkg/licensing/expression/parser"
)
type Operator string
const (
AND Operator = "AND"
OR Operator = "OR"
WITH Operator = "WITH"
)
func (o Operator) String() string {
return fmt.Sprintf(" %s ", string(o))
}
func Normalize(license string, fn ...parser.NormalizeFunc) string {
lex := lexer.New(license)
licenseParser := parser.New(lex).RegisterNormalizeFunc(
fn...,
)
expression, err := licenseParser.Parse()
if err != nil {
return license
}
return licenseParser.Normalize(expression)
}
func Join(elems []string, sep Operator) string {
var licenses []string
for i, license := range elems {
var mid Operator
if sep == AND {
mid = OR
} else if sep == OR {
mid = AND
}
if i != 0 && strings.Contains(strings.ToUpper(license), mid.String()) {
license = fmt.Sprintf("(%s)", license)
}
licenses = append(licenses, license)
}
return strings.Join(licenses, sep.String())
}
// NormalizeForSPDX is normalized license-id replace ' ' to '-'.
// SPDX license MUST NOT be white space between a license-id.
// There MUST be white space on either side of the operator "WITH".
// ref: https://spdx.github.io/spdx-spec/v2.3/SPDX-license-expressions
func NormalizeForSPDX(name string) string {
i := strings.Index(strings.ToUpper(name), WITH.String())
if i < 0 {
return strings.Replace(name, " ", "-", -1)
}
// Convert "WITH" expression split by " " to "-".
// examples:
// GPL-2+ with distribution exception => GPL-2+ with distribution-exception
// GPL-2 with Linux-syscall-note exception => GPL-2 with Linux-syscall-note-exception
// AFL 2.0 with Linux-syscall-note exception => AFL-2.0 with Linux-syscall-note-exception
withSection := strings.Replace(name[i+len(WITH.String()):], " ", "-", -1)
if i > 0 {
return strings.Replace(name[:i], " ", "-", -1) + WITH.String() + withSection
}
return name
}

View File

@@ -0,0 +1,83 @@
package expression
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestNormalizeForSPDX(t *testing.T) {
tests := []struct {
name string
license string
want string
}{
{
name: "happy path",
license: "AFL 2.0",
want: "AFL-2.0",
},
{
name: "happy path with WITH section",
license: "AFL 2.0 with Linux-syscall-note exception",
want: "AFL-2.0 WITH Linux-syscall-note-exception",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equalf(t, tt.want, NormalizeForSPDX(tt.license), "NormalizeWithExpression(%v)", tt.license)
})
}
}
func TestJoin(t *testing.T) {
tests := []struct {
name string
inputElements []string
inputOperator Operator
expect string
}{
{
name: "happy path single license",
inputElements: []string{"MIT"},
inputOperator: AND,
expect: "MIT",
},
{
name: "happy path multi license",
inputElements: []string{"MIT", "GPL1.0"},
inputOperator: AND,
expect: "MIT AND GPL1.0",
},
{
name: "happy path multi license with AND operator",
inputElements: []string{"MIT", "GPL1.0 AND GPL2.0"},
inputOperator: AND,
expect: "MIT AND GPL1.0 AND GPL2.0",
},
{
name: "happy path multi license with OR operator",
inputElements: []string{"MIT", "GPL1.0 OR GPL2.0"},
inputOperator: OR,
expect: "MIT OR GPL1.0 OR GPL2.0",
},
{
name: "happy path multi license with OR operator, separator AND",
inputElements: []string{"MIT", "GPL1.0 OR GPL2.0"},
inputOperator: AND,
expect: "MIT AND (GPL1.0 OR GPL2.0)",
},
{
name: "happy path multi license with AND operator, separator OR",
inputElements: []string{"MIT", "GPL1.0 AND GPL2.0"},
inputOperator: OR,
expect: "MIT OR (GPL1.0 AND GPL2.0)",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := Join(tt.inputElements, tt.inputOperator)
assert.Equal(t, tt.expect, got)
})
}
}

View File

@@ -0,0 +1,85 @@
package lexer
import (
"github.com/aquasecurity/trivy/pkg/licensing/expression/token"
)
type Lexer struct {
input string
position int
readPosition int
ch byte
}
func New(input string) *Lexer {
l := &Lexer{input: input}
l.readChar()
return l
}
func (l *Lexer) NextToken() token.Token {
var tok token.Token
l.skipWhitespace()
switch l.ch {
case 0:
tok = newToken(token.EOF, l.ch)
case '(':
tok = newToken(token.LPAREN, l.ch)
case ')':
tok = newToken(token.RPAREN, l.ch)
default:
if isLetter(l.ch) {
tok.Literal = l.readIdentifier()
tok.Type = token.LookupIdent(tok.Literal)
return tok
} else {
tok = newToken(token.ILLEGAL, l.ch)
}
}
l.readChar()
return tok
}
func isLetter(ch byte) bool {
return 'a' <= ch && ch <= 'z' ||
'A' <= ch && ch <= 'Z' ||
'0' <= ch && ch <= '9' ||
ch == '_' ||
ch == '+' ||
ch == '.' ||
ch == '-' ||
ch == '/' ||
ch == ':' ||
ch == '='
}
func (l *Lexer) readIdentifier() string {
position := l.position
for isLetter(l.ch) {
l.readChar()
}
return l.input[position:l.position]
}
func newToken(tokenType token.TokenType, ch byte) token.Token {
return token.Token{Type: tokenType, Literal: string(ch)}
}
func (l *Lexer) skipWhitespace() {
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
l.readChar()
}
}
func (l *Lexer) readChar() {
if l.readPosition >= len(l.input) {
// 0 is ASCII NUL
l.ch = 0
} else {
l.ch = l.input[l.readPosition]
}
l.position = l.readPosition
l.readPosition++
}

View File

@@ -0,0 +1,143 @@
package lexer
import (
"testing"
"github.com/aquasecurity/trivy/pkg/licensing/expression/token"
"github.com/stretchr/testify/assert"
)
func TestNextToken(t *testing.T) {
tests := []struct {
name string
licenseExpression string
expectTokens []token.Token
}{
{
name: "empty input",
licenseExpression: "",
expectTokens: []token.Token{
{
Type: token.EOF,
Literal: string(byte(0)),
},
},
},
{
name: "single ident",
licenseExpression: "GPL1.0+",
expectTokens: []token.Token{
{
Type: token.IDENT,
Literal: "GPL1.0+",
},
},
},
{
name: "multi ident",
licenseExpression: "Public Domain",
expectTokens: []token.Token{
{
Type: token.IDENT,
Literal: "Public",
},
{
Type: token.IDENT,
Literal: "Domain",
},
},
},
{
name: "AND OR operator",
licenseExpression: "Public Domain AND GPL1.0+ OR GPL2.0_or_later",
expectTokens: []token.Token{
{
Type: token.IDENT,
Literal: "Public",
},
{
Type: token.IDENT,
Literal: "Domain",
},
{
Type: token.AND,
Literal: "AND",
},
{
Type: token.IDENT,
Literal: "GPL1.0+",
},
{
Type: token.OR,
Literal: "OR",
},
{
Type: token.IDENT,
Literal: "GPL2.0_or_later",
},
},
},
{
name: "PAREN operator",
licenseExpression: "(GPL1.0+ OR GPL2.0)",
expectTokens: []token.Token{
{
Type: token.LPAREN,
Literal: "(",
},
{
Type: token.IDENT,
Literal: "GPL1.0+",
},
{
Type: token.OR,
Literal: "OR",
},
{
Type: token.IDENT,
Literal: "GPL2.0",
},
{
Type: token.RPAREN,
Literal: ")",
},
},
},
{
name: "illegal string",
licenseExpression: "GPL1.0+" + string(byte(0x20)) + "あ" + "🇯🇵" + "AND LGPL1.0",
expectTokens: []token.Token{
{
Type: token.IDENT,
Literal: "GPL1.0+",
},
{
Type: token.AND,
Literal: "AND",
},
{
Type: token.IDENT,
Literal: "LGPL1.0+",
},
},
},
}
for _, tt := range tests {
l := New(tt.licenseExpression)
for _, expect := range tt.expectTokens {
tok := l.NextToken()
// Skip literal
if tok.Type == token.ILLEGAL {
continue
}
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, expect.Type, tok.Type)
assert.Equal(t, expect.Literal, tok.Literal)
})
}
}
}

View File

@@ -0,0 +1,102 @@
package parser
import (
"fmt"
"strings"
"golang.org/x/xerrors"
"github.com/aquasecurity/trivy/pkg/licensing/expression/lexer"
"github.com/aquasecurity/trivy/pkg/licensing/expression/token"
)
var (
ErrInvalidExpression = xerrors.New("invalid expression error")
)
type Parser struct {
lex *lexer.Lexer
normalizeFn []NormalizeFunc
}
type LicenseExpression struct {
Node Node
Operator string
Next *LicenseExpression
}
type Node struct {
License string
LicenseExpression *LicenseExpression
}
type NormalizeFunc func(n string) string
func New(lex *lexer.Lexer) *Parser {
return &Parser{
lex: lex,
}
}
func (p *Parser) RegisterNormalizeFunc(fn ...NormalizeFunc) *Parser {
p.normalizeFn = append(p.normalizeFn, fn...)
return p
}
func (p *Parser) Parse() (*LicenseExpression, error) {
root := &LicenseExpression{}
cursor := root
stack := Stack{}
for tok := p.lex.NextToken(); tok.Type != token.EOF; tok = p.lex.NextToken() {
switch tok.Type {
case token.IDENT:
if cursor.Node.License == "" {
cursor.Node = Node{License: tok.Literal}
} else {
cursor.Node.License = fmt.Sprintf("%s %s", cursor.Node.License, tok.Literal)
}
case token.AND, token.OR:
cursor.Operator = string(tok.Type)
cursor.Next = &LicenseExpression{}
cursor = cursor.Next
case token.LPAREN:
p := Pair{root: root, cursor: cursor, bracket: tok.Type}
stack.Push(p)
root = &LicenseExpression{}
cursor = root
case token.RPAREN:
e := stack.Pop()
if e.bracket == token.LPAREN && tok.Type != token.RPAREN {
return nil, ErrInvalidExpression
}
e.cursor.Node.LicenseExpression = root
cursor = e.cursor
root = e.root
}
}
if !stack.IsEmpty() {
return nil, ErrInvalidExpression
}
return root, nil
}
func (p *Parser) Normalize(l *LicenseExpression) string {
cursor := l
var str string
for ; cursor != nil; cursor = cursor.Next {
str = strings.Join([]string{str, p.normalize(cursor.Node), cursor.Operator}, " ")
}
return strings.TrimSpace(str)
}
func (p *Parser) normalize(n Node) string {
if n.LicenseExpression != nil {
return fmt.Sprintf("( %s )", p.Normalize(n.LicenseExpression))
}
for _, fn := range p.normalizeFn {
n.License = fn(n.License)
}
return n.License
}

View File

@@ -0,0 +1,189 @@
package parser
import (
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/aquasecurity/trivy/pkg/licensing/expression/lexer"
)
func TestParse(t *testing.T) {
tests := []struct {
name string
input string
normFunc []NormalizeFunc
expect *LicenseExpression
expectStr string
expectErr string
}{
{
name: "happy path single license",
input: "Public Domain",
expect: &LicenseExpression{
Node: Node{
License: "Public Domain",
},
},
expectStr: "Public Domain",
},
{
name: "happy path tag:value license",
input: "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2",
expect: &LicenseExpression{
Node: Node{
License: "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2",
},
},
expectStr: "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2",
},
{
name: "happy path single license with norm func",
input: "Public Domain with exception",
expect: &LicenseExpression{
Node: Node{
License: "Public Domain with exception",
},
},
normFunc: []NormalizeFunc{
func(n string) string {
return strings.Replace(n, " ", "_", -1)
},
func(n string) string {
if n == "Public_Domain_with_exception" {
return "Unlicense"
}
return n
},
},
expectStr: "Unlicense",
},
{
name: "happy path 2",
input: "Public ._+-",
expect: &LicenseExpression{
Node: Node{
License: "Public ._+-",
},
},
expectStr: "Public ._+-",
},
{
name: "happy path multi license",
input: "Public Domain AND ( GPLv2+ or AFL ) AND LGPLv2+ with distribution exceptions",
expect: &LicenseExpression{
Node: Node{
License: "Public Domain",
},
Operator: "AND",
Next: &LicenseExpression{
Node: Node{
LicenseExpression: &LicenseExpression{
Node: Node{
License: "GPLv2+",
},
Operator: "OR",
Next: &LicenseExpression{
Node: Node{
License: "AFL",
},
},
},
},
Operator: "AND",
Next: &LicenseExpression{
Node: Node{
License: "LGPLv2+ with distribution exceptions",
},
},
},
},
expectStr: "Public Domain AND ( GPLv2+ OR AFL ) AND LGPLv2+ with distribution exceptions",
},
{
name: "happy path nested license",
input: "Public Domain AND ( GPLv2+ or AFL AND ( CC0 or LGPL1.0) )",
expect: &LicenseExpression{
Node: Node{
License: "Public Domain",
},
Operator: "AND",
Next: &LicenseExpression{
Node: Node{
LicenseExpression: &LicenseExpression{
Node: Node{
License: "GPLv2+",
},
Operator: "OR",
Next: &LicenseExpression{
Node: Node{
License: "AFL",
},
Operator: "AND",
Next: &LicenseExpression{
Node: Node{
LicenseExpression: &LicenseExpression{
Node: Node{
License: "CC0",
},
Operator: "OR",
Next: &LicenseExpression{
Node: Node{
License: "LGPL1.0",
},
},
},
},
},
},
},
},
},
},
expectStr: "Public Domain AND ( GPLv2+ OR AFL AND ( CC0 OR LGPL1.0 ) )",
},
{
name: "happy path 2",
input: "( GPLv2+ or CC0 )",
expect: &LicenseExpression{
Node: Node{
LicenseExpression: &LicenseExpression{
Node: Node{
License: "GPLv2+",
},
Operator: "OR",
Next: &LicenseExpression{
Node: Node{
License: "CC0",
},
},
},
},
},
expectStr: "( GPLv2+ OR CC0 )",
},
{
name: "bad path close bracket not found",
input: "Public Domain AND ( GPLv2+ ",
expectErr: "invalid expression error",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
l := lexer.New(tt.input)
p := New(l).RegisterNormalizeFunc(tt.normFunc...)
got, err := p.Parse()
if tt.expectErr != "" {
assert.Equal(t, err.Error(), tt.expectErr)
return
}
require.NoError(t, err)
assert.Equal(t, tt.expect, got)
assert.Equal(t, tt.expectStr, p.Normalize(got))
})
}
}

View File

@@ -0,0 +1,28 @@
package parser
import (
"github.com/aquasecurity/trivy/pkg/licensing/expression/token"
)
type Pair struct {
root *LicenseExpression
cursor *LicenseExpression
bracket token.TokenType
}
type Stack []Pair
func (s *Stack) Push(x Pair) {
*s = append(*s, x)
}
func (s *Stack) Pop() Pair {
l := len(*s)
x := (*s)[l-1]
*s = (*s)[:l-1]
return x
}
func (s *Stack) IsEmpty() bool {
return len(*s) == 0
}

View File

@@ -0,0 +1,37 @@
package token
import (
"strings"
)
const (
ILLEGAL = "ILLEGAL"
EOF = "EOF"
IDENT = "IDENT"
LPAREN = "("
RPAREN = ")"
AND = "AND"
OR = "OR"
)
var keywords = map[string]TokenType{
"AND": AND,
"OR": OR,
}
type TokenType string
type Token struct {
Type TokenType
Literal string
}
func LookupIdent(ident string) TokenType {
if tok, ok := keywords[strings.ToUpper(ident)]; ok {
return tok
}
return IDENT
}

View File

@@ -29,24 +29,27 @@ var mapping = map[string]string{
"GPL3+": GPL30,
"GPL-3+": GPL30,
"GPL-3.0-OR-LATER": GPL30,
"GPL-3+ WITH AUTOCONF EXCEPTION": GPL30withautoconfexception,
"GPL-3+-WITH-BISON-EXCEPTION": GPL20withbisonexception,
"GPL": GPL30, // 2? 3?
// LGPL
"LGPL2": LGPL20,
"LGPL 2": LGPL20,
"LGPL 2.0": LGPL20,
"LGPL-2": LGPL20,
"LGPL2+": LGPL20,
"LGPL-2+": LGPL20,
"LGPL-2.0+": LGPL20,
"LGPL-2.1": LGPL21,
"LGPL 2.1": LGPL21,
"LGPL-2.1+": LGPL21,
"LGPLV2.1+": LGPL21,
"LGPL-3": LGPL30,
"LGPL 3": LGPL30,
"LGPL-3+": LGPL30,
"LGPL": LGPL30, // 2? 3?
"LGPL2": LGPL20,
"LGPL 2": LGPL20,
"LGPL 2.0": LGPL20,
"LGPL-2": LGPL20,
"LGPL2+": LGPL20,
"LGPL-2+": LGPL20,
"LGPL-2.0+": LGPL20,
"LGPL-2.1": LGPL21,
"LGPL 2.1": LGPL21,
"LGPL-2.1+": LGPL21,
"LGPLV2.1+": LGPL21,
"LGPL-3": LGPL30,
"LGPL 3": LGPL30,
"LGPL-3+": LGPL30,
"LGPL": LGPL30, // 2? 3?
"GNU LESSER": LGPL30, // 2? 3?
// MPL
"MPL1.0": MPL10,
@@ -68,6 +71,9 @@ var mapping = map[string]string{
"APACHE 2.0": Apache20,
"RUBY": Ruby,
"ZLIB": Zlib,
// Public Domain
"PUBLIC DOMAIN": Unlicense,
}
func Normalize(name string) string {

View File

@@ -13,6 +13,8 @@ import (
"k8s.io/utils/clock"
ftypes "github.com/aquasecurity/trivy/pkg/fanal/types"
"github.com/aquasecurity/trivy/pkg/licensing"
"github.com/aquasecurity/trivy/pkg/licensing/expression"
"github.com/aquasecurity/trivy/pkg/purl"
"github.com/aquasecurity/trivy/pkg/scanner/utils"
"github.com/aquasecurity/trivy/pkg/types"
@@ -263,7 +265,7 @@ func (m *Marshaler) langPackage(target, appType string) (spdx.Package2_2, error)
}
func (m *Marshaler) pkgToSpdxPackage(t string, class types.ResultClass, metadata types.Metadata, pkg ftypes.Package) (spdx.Package2_2, error) {
license := getLicense(pkg)
license := GetLicense(pkg)
pkgID, err := calcPkgID(m.hasher, pkg)
if err != nil {
@@ -355,12 +357,16 @@ func purlExternalReference(packageURL string) *spdx.PackageExternalReference2_2
}
}
func getLicense(p ftypes.Package) string {
func GetLicense(p ftypes.Package) string {
if len(p.Licenses) == 0 {
return "NONE"
}
return strings.Join(p.Licenses, ", ")
return expression.Normalize(
expression.Join(p.Licenses, expression.AND),
licensing.Normalize,
expression.NormalizeForSPDX,
)
}
func getDocumentNamespace(r types.Report, m *Marshaler) string {

View File

@@ -176,8 +176,8 @@ func TestMarshaler_Marshal(t *testing.T) {
PackageSPDXIdentifier: spdx.ElementID("Package-fd0dc3cf913d5bc3"),
PackageName: "binutils",
PackageVersion: "2.30",
PackageLicenseConcluded: "GPLv3+",
PackageLicenseDeclared: "GPLv3+",
PackageLicenseConcluded: "GPL-3.0",
PackageLicenseDeclared: "GPL-3.0",
PackageExternalReferences: []*spdx.PackageExternalReference2_2{
{
Category: tspdx.CategoryPackageManager,
@@ -338,8 +338,8 @@ func TestMarshaler_Marshal(t *testing.T) {
PackageSPDXIdentifier: spdx.ElementID("Package-d8dccb186bafaf37"),
PackageName: "acl",
PackageVersion: "2.2.53",
PackageLicenseConcluded: "GPLv2+",
PackageLicenseDeclared: "GPLv2+",
PackageLicenseConcluded: "GPL-2.0",
PackageLicenseDeclared: "GPL-2.0",
PackageExternalReferences: []*spdx.PackageExternalReference2_2{
{
Category: tspdx.CategoryPackageManager,
@@ -686,3 +686,66 @@ func TestMarshaler_Marshal(t *testing.T) {
})
}
}
func Test_GetLicense(t *testing.T) {
tests := []struct {
name string
input ftypes.Package
want string
}{
{
name: "happy path",
input: ftypes.Package{
Licenses: []string{
"GPLv2+",
},
},
want: "GPL-2.0",
},
{
name: "happy path with multi license",
input: ftypes.Package{
Licenses: []string{
"GPLv2+",
"GPLv3+",
},
},
want: "GPL-2.0 AND GPL-3.0",
},
{
name: "happy path with OR operator",
input: ftypes.Package{
Licenses: []string{
"GPLv2+",
"LGPL 2.0 or GNU LESSER",
},
},
want: "GPL-2.0 AND ( LGPL-2.0 OR LGPL-3.0 )",
},
{
name: "happy path with AND operator",
input: ftypes.Package{
Licenses: []string{
"GPLv2+",
"LGPL 2.0 and GNU LESSER",
},
},
want: "GPL-2.0 AND LGPL-2.0 AND LGPL-3.0",
},
{
name: "happy path with WITH operator",
input: ftypes.Package{
Licenses: []string{
"AFL 2.0",
"AFL 3.0 with distribution exception",
},
},
want: "AFL-2.0 AND AFL-3.0 WITH distribution-exception",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equalf(t, tt.want, tspdx.GetLicense(tt.input), "getLicense(%v)", tt.input)
})
}
}