From 2bb25e766bc1afe035ea3204f54444774f1ca8ab Mon Sep 17 00:00:00 2001 From: Teppei Fukuda Date: Tue, 14 Mar 2023 09:27:17 +0200 Subject: [PATCH] refactor(license): use goyacc for license parser (#3824) --- Makefile | 8 + pkg/fanal/analyzer/pkg/dpkg/copyright.go | 16 +- pkg/licensing/category.go | 9 + pkg/licensing/expression/expression.go | 104 ++-- pkg/licensing/expression/expression_test.go | 83 +-- pkg/licensing/expression/lexer.go | 119 ++++ pkg/licensing/expression/lexer/lexer.go | 85 --- pkg/licensing/expression/lexer/lexer_test.go | 143 ----- pkg/licensing/expression/lexer_test.go | 239 +++++++++ pkg/licensing/expression/parser.go | 2 + pkg/licensing/expression/parser.go.y | 73 +++ pkg/licensing/expression/parser/parser.go | 102 ---- .../expression/parser/parser_test.go | 189 ------- pkg/licensing/expression/parser/stack.go | 28 - pkg/licensing/expression/parser_gen.go | 507 ++++++++++++++++++ pkg/licensing/expression/parser_test.go | 156 ++++++ pkg/licensing/expression/token/token.go | 37 -- pkg/licensing/expression/types.go | 83 +++ pkg/licensing/expression/y.output | 172 ++++++ pkg/licensing/normalize.go | 6 +- pkg/sbom/spdx/marshal.go | 21 +- pkg/sbom/spdx/marshal_test.go | 16 +- 22 files changed, 1493 insertions(+), 705 deletions(-) create mode 100644 pkg/licensing/expression/lexer.go delete mode 100644 pkg/licensing/expression/lexer/lexer.go delete mode 100644 pkg/licensing/expression/lexer/lexer_test.go create mode 100644 pkg/licensing/expression/lexer_test.go create mode 100644 pkg/licensing/expression/parser.go create mode 100644 pkg/licensing/expression/parser.go.y delete mode 100644 pkg/licensing/expression/parser/parser.go delete mode 100644 pkg/licensing/expression/parser/parser_test.go delete mode 100644 pkg/licensing/expression/parser/stack.go create mode 100644 pkg/licensing/expression/parser_gen.go create mode 100644 pkg/licensing/expression/parser_test.go delete mode 100644 pkg/licensing/expression/token/token.go create mode 100644 pkg/licensing/expression/types.go create mode 100644 pkg/licensing/expression/y.output diff --git a/Makefile b/Makefile index cde76bc5b4..4be79dac2b 100644 --- a/Makefile +++ b/Makefile @@ -36,6 +36,9 @@ $(GOBIN)/labeler: $(GOBIN)/easyjson: go install github.com/mailru/easyjson/...@v0.7.7 +$(GOBIN)/goyacc: + go install golang.org/x/tools/cmd/goyacc@latest + .PHONY: wire wire: $(GOBIN)/wire wire gen ./pkg/commands/... ./pkg/rpc/... @@ -133,3 +136,8 @@ mkdocs-serve: .PHONY: easyjson easyjson: $(GOBIN)/easyjson easyjson pkg/module/serialize/types.go + +# Generate license parser with goyacc +.PHONY: yacc +yacc: $(GOBIN)/goyacc + go generate ./pkg/licensing/expression/... diff --git a/pkg/fanal/analyzer/pkg/dpkg/copyright.go b/pkg/fanal/analyzer/pkg/dpkg/copyright.go index e7fd492b88..e8f8947a0a 100644 --- a/pkg/fanal/analyzer/pkg/dpkg/copyright.go +++ b/pkg/fanal/analyzer/pkg/dpkg/copyright.go @@ -88,9 +88,7 @@ func (a *dpkgLicenseAnalyzer) parseCopyright(r dio.ReadSeekerAt) ([]types.Licens // cf. https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#:~:text=The%20debian%2Fcopyright%20file%20must,in%20the%20Debian%20Policy%20Manual. l := strings.TrimSpace(line[8:]) - // Very rarely has below phrases - l = strings.TrimPrefix(l, "The main library is licensed under ") - l = strings.TrimSuffix(l, " license") + l = normalizeLicense(l) if len(l) > 0 { // Split licenses without considering "and"/"or" // examples: @@ -140,3 +138,15 @@ func (a *dpkgLicenseAnalyzer) Type() analyzer.Type { func (a *dpkgLicenseAnalyzer) Version() int { return dpkgLicenseAnalyzerVersion } + +// normalizeLicense returns a normalized license identifier in a heuristic way +func normalizeLicense(s string) string { + // "The MIT License (MIT)" => "The MIT License" + s, _, _ = strings.Cut(s, "(") + + // Very rarely has below phrases + s = strings.TrimPrefix(s, "The main library is licensed under ") + s = strings.TrimSuffix(s, " license") + + return strings.TrimSpace(s) +} diff --git a/pkg/licensing/category.go b/pkg/licensing/category.go index 2974e46048..f99ccdc52a 100644 --- a/pkg/licensing/category.go +++ b/pkg/licensing/category.go @@ -82,6 +82,15 @@ const ( FacebookExamples = "Facebook-Examples" FreeImage = "FreeImage" FTL = "FTL" + GFDL11WithInvariants = "GFDL-1.1-invariants" + GFDL11NoInvariants = "GFDL-1.1-no-invariants" + GFDL11 = "GFDL-1.1" + GFDL12WithInvariants = "GFDL-1.2-invariants" + GFDL12NoInvariants = "GFDL-1.2-no-invariants" + GFDL12 = "GFDL-1.2" + GFDL13WithInvariants = "GFDL-1.3-invariants" + GFDL13NoInvariants = "GFDL-1.3-no-invariants" + GFDL13 = "GFDL-1.3" GPL10 = "GPL-1.0" GPL20 = "GPL-2.0" GPL20withautoconfexception = "GPL-2.0-with-autoconf-exception" diff --git a/pkg/licensing/expression/expression.go b/pkg/licensing/expression/expression.go index 231acd3e4a..a57e506b76 100644 --- a/pkg/licensing/expression/expression.go +++ b/pkg/licensing/expression/expression.go @@ -1,74 +1,80 @@ package expression import ( - "fmt" "strings" + "unicode" - "github.com/aquasecurity/trivy/pkg/licensing/expression/lexer" - "github.com/aquasecurity/trivy/pkg/licensing/expression/parser" + "golang.org/x/xerrors" ) -type Operator string - -const ( - AND Operator = "AND" - OR Operator = "OR" - WITH Operator = "WITH" +var ( + ErrInvalidExpression = xerrors.New("invalid expression error") ) -func (o Operator) String() string { - return fmt.Sprintf(" %s ", string(o)) +type NormalizeFunc func(license string) string + +func parse(license string) (Expression, error) { + l := NewLexer(strings.NewReader(license)) + if yyParse(l) != 0 { + return nil, xerrors.Errorf("license parse error: %w", l.Err()) + } else if err := l.Err(); err != nil { + return nil, err + } + + return l.result, nil } -func Normalize(license string, fn ...parser.NormalizeFunc) string { - lex := lexer.New(license) - licenseParser := parser.New(lex).RegisterNormalizeFunc( - fn..., - ) - expression, err := licenseParser.Parse() +func Normalize(license string, fn ...NormalizeFunc) (string, error) { + expr, err := parse(license) if err != nil { - return license + return "", xerrors.Errorf("license (%s) parse error: %w", license, err) } - return licenseParser.Normalize(expression) + expr = normalize(expr, fn...) + + return expr.String(), nil } -func Join(elems []string, sep Operator) string { - var licenses []string - for i, license := range elems { - var mid Operator - if sep == AND { - mid = OR - } else if sep == OR { - mid = AND +func normalize(expr Expression, fn ...NormalizeFunc) Expression { + switch e := expr.(type) { + case SimpleExpr: + for _, f := range fn { + e.license = f(e.license) } - - if i != 0 && strings.Contains(strings.ToUpper(license), mid.String()) { - license = fmt.Sprintf("(%s)", license) - } - licenses = append(licenses, license) + return e + case CompoundExpr: + e.left = normalize(e.left, fn...) + e.right = normalize(e.right, fn...) + e.conjunction.literal = strings.ToUpper(e.conjunction.literal) // e.g. "and" => "AND" + return e } - return strings.Join(licenses, sep.String()) + return expr } -// NormalizeForSPDX is normalized license-id replace ' ' to '-'. +// NormalizeForSPDX replaces ' ' to '-' in license-id. // SPDX license MUST NOT be white space between a license-id. // There MUST be white space on either side of the operator "WITH". // ref: https://spdx.github.io/spdx-spec/v2.3/SPDX-license-expressions -func NormalizeForSPDX(name string) string { - i := strings.Index(strings.ToUpper(name), WITH.String()) - if i < 0 { - return strings.Replace(name, " ", "-", -1) +func NormalizeForSPDX(s string) string { + var b strings.Builder + for _, c := range s { + // idstring = 1*(ALPHA / DIGIT / "-" / "." ) + if isAlphabet(c) || unicode.IsNumber(c) || c == '-' || c == '.' { + _, _ = b.WriteRune(c) + } else if c == ':' { + // TODO: Support DocumentRef + _, _ = b.WriteRune(c) + } else { + // Replace invalid characters with '-' + _, _ = b.WriteRune('-') + } } - - // Convert "WITH" expression split by " " to "-". - // examples: - // GPL-2+ with distribution exception => GPL-2+ with distribution-exception - // GPL-2 with Linux-syscall-note exception => GPL-2 with Linux-syscall-note-exception - // AFL 2.0 with Linux-syscall-note exception => AFL-2.0 with Linux-syscall-note-exception - withSection := strings.Replace(name[i+len(WITH.String()):], " ", "-", -1) - if i > 0 { - return strings.Replace(name[:i], " ", "-", -1) + WITH.String() + withSection - } - return name + return b.String() +} + +func isAlphabet(r rune) bool { + if (r < 'a' || r > 'z') && (r < 'A' || r > 'Z') { + return false + } + return true } diff --git a/pkg/licensing/expression/expression_test.go b/pkg/licensing/expression/expression_test.go index 22a36ca500..0e3eaa7ae7 100644 --- a/pkg/licensing/expression/expression_test.go +++ b/pkg/licensing/expression/expression_test.go @@ -1,83 +1,56 @@ package expression import ( + "strings" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) -func TestNormalizeForSPDX(t *testing.T) { +func TestNormalize(t *testing.T) { tests := []struct { name string license string + fn NormalizeFunc want string + wantErr string }{ { - name: "happy path", + name: "SPDX, space", license: "AFL 2.0", + fn: NormalizeForSPDX, want: "AFL-2.0", }, { - name: "happy path with WITH section", + name: "SPDX, exception", license: "AFL 2.0 with Linux-syscall-note exception", + fn: NormalizeForSPDX, want: "AFL-2.0 WITH Linux-syscall-note-exception", }, + { + name: "SPDX, invalid chars", + license: "LGPL_2.1_only or MIT OR BSD-3>Clause", + fn: NormalizeForSPDX, + want: "LGPL-2.1-only OR MIT OR BSD-3-Clause", + }, + { + name: "upper", + license: "LGPL-2.1-only OR MIT", + fn: strings.ToUpper, + want: "LGPL-2.1-ONLY OR MIT", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - assert.Equalf(t, tt.want, NormalizeForSPDX(tt.license), "NormalizeWithExpression(%v)", tt.license) - }) - } -} + got, err := Normalize(tt.license, tt.fn) + if tt.wantErr != "" { + assert.ErrorContains(t, err, tt.wantErr) + return + } -func TestJoin(t *testing.T) { - tests := []struct { - name string - inputElements []string - inputOperator Operator - expect string - }{ - { - name: "happy path single license", - inputElements: []string{"MIT"}, - inputOperator: AND, - expect: "MIT", - }, - { - name: "happy path multi license", - inputElements: []string{"MIT", "GPL1.0"}, - inputOperator: AND, - expect: "MIT AND GPL1.0", - }, - { - name: "happy path multi license with AND operator", - inputElements: []string{"MIT", "GPL1.0 AND GPL2.0"}, - inputOperator: AND, - expect: "MIT AND GPL1.0 AND GPL2.0", - }, - { - name: "happy path multi license with OR operator", - inputElements: []string{"MIT", "GPL1.0 OR GPL2.0"}, - inputOperator: OR, - expect: "MIT OR GPL1.0 OR GPL2.0", - }, - { - name: "happy path multi license with OR operator, separator AND", - inputElements: []string{"MIT", "GPL1.0 OR GPL2.0"}, - inputOperator: AND, - expect: "MIT AND (GPL1.0 OR GPL2.0)", - }, - { - name: "happy path multi license with AND operator, separator OR", - inputElements: []string{"MIT", "GPL1.0 AND GPL2.0"}, - inputOperator: OR, - expect: "MIT OR (GPL1.0 AND GPL2.0)", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := Join(tt.inputElements, tt.inputOperator) - assert.Equal(t, tt.expect, got) + require.NoError(t, err) + assert.Equalf(t, tt.want, got, "NormalizeWithExpression(%v)", tt.license) }) } } diff --git a/pkg/licensing/expression/lexer.go b/pkg/licensing/expression/lexer.go new file mode 100644 index 0000000000..64b8d17ef4 --- /dev/null +++ b/pkg/licensing/expression/lexer.go @@ -0,0 +1,119 @@ +package expression + +import ( + "bufio" + "errors" + "io" + "strings" + "unicode" + "unicode/utf8" + + multierror "github.com/hashicorp/go-multierror" +) + +type Lexer struct { + s *bufio.Scanner + result Expression + errs error +} + +func NewLexer(reader io.Reader) *Lexer { + scanner := bufio.NewScanner(reader) + scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) { + // The implementation references bufio.ScanWords() + + // Skip leading spaces. + start := 0 + for width := 0; start < len(data); start += width { + var r rune + r, width = utf8.DecodeRune(data[start:]) + if !unicode.IsSpace(r) { + break + } + } + // Process terminal symbols + if len(data) > start && (data[start] == '(' || data[start] == ')' || data[start] == '+') { + return start + 1, data[start : start+1], nil + } + + // Scan until space or token, marking end of word. + for width, i := 0, start; i < len(data); i += width { + var r rune + r, width = utf8.DecodeRune(data[i:]) + switch r { + case '(', ')': + return i, data[start:i], nil + case '+': + // Peek the next rune + if len(data) > i+width { + adv := i + i += width + r, width = utf8.DecodeRune(data[i:]) + if unicode.IsSpace(r) || r == '(' || r == ')' { + return adv, data[start:adv], nil + } + } else if atEOF { + return i, data[start:i], nil + } + default: + if unicode.IsSpace(r) { + return i + width, data[start:i], nil + } + } + } + // If we're at EOF, we have a final, non-empty, non-terminated word. Return it. + if atEOF && len(data) > start { + return len(data), data[start:], nil + } + // Request more data. + return start, nil, nil + }) + + return &Lexer{ + s: scanner, + } +} + +func (l *Lexer) Lex(lval *yySymType) int { + if !l.s.Scan() { + return 0 + } + + var token int + literal := l.s.Text() + switch literal { + case "(", ")", "+": + token = int(literal[0]) + default: + token = lookup(literal) + } + + lval.token = Token{ + token: token, + literal: literal, + } + + if err := l.s.Err(); err != nil { + l.errs = multierror.Append(l.errs, l.s.Err()) + } + + return lval.token.token +} + +func (l *Lexer) Error(e string) { + l.errs = multierror.Append(l.errs, errors.New(e)) +} + +func (l *Lexer) Err() error { + return l.errs +} + +func lookup(t string) int { + t = strings.ToUpper(t) + for i, name := range yyToknames { + if t == name { + return yyPrivate + (i - 1) + } + } + return IDENT +} diff --git a/pkg/licensing/expression/lexer/lexer.go b/pkg/licensing/expression/lexer/lexer.go deleted file mode 100644 index c44a191485..0000000000 --- a/pkg/licensing/expression/lexer/lexer.go +++ /dev/null @@ -1,85 +0,0 @@ -package lexer - -import ( - "github.com/aquasecurity/trivy/pkg/licensing/expression/token" -) - -type Lexer struct { - input string - position int - readPosition int - ch byte -} - -func New(input string) *Lexer { - l := &Lexer{input: input} - l.readChar() - return l -} - -func (l *Lexer) NextToken() token.Token { - var tok token.Token - - l.skipWhitespace() - - switch l.ch { - case 0: - tok = newToken(token.EOF, l.ch) - case '(': - tok = newToken(token.LPAREN, l.ch) - case ')': - tok = newToken(token.RPAREN, l.ch) - default: - if isLetter(l.ch) { - tok.Literal = l.readIdentifier() - tok.Type = token.LookupIdent(tok.Literal) - return tok - } else { - tok = newToken(token.ILLEGAL, l.ch) - } - } - l.readChar() - return tok -} - -func isLetter(ch byte) bool { - return 'a' <= ch && ch <= 'z' || - 'A' <= ch && ch <= 'Z' || - '0' <= ch && ch <= '9' || - ch == '_' || - ch == '+' || - ch == '.' || - ch == '-' || - ch == '/' || - ch == ':' || - ch == '=' -} - -func (l *Lexer) readIdentifier() string { - position := l.position - for isLetter(l.ch) { - l.readChar() - } - return l.input[position:l.position] -} - -func newToken(tokenType token.TokenType, ch byte) token.Token { - return token.Token{Type: tokenType, Literal: string(ch)} -} - -func (l *Lexer) skipWhitespace() { - for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { - l.readChar() - } -} - -func (l *Lexer) readChar() { - if l.readPosition >= len(l.input) { - // 0 is ASCII NUL - l.ch = 0 - } else { - l.ch = l.input[l.readPosition] - } - l.position = l.readPosition - l.readPosition++ -} diff --git a/pkg/licensing/expression/lexer/lexer_test.go b/pkg/licensing/expression/lexer/lexer_test.go deleted file mode 100644 index 6c2804abc7..0000000000 --- a/pkg/licensing/expression/lexer/lexer_test.go +++ /dev/null @@ -1,143 +0,0 @@ -package lexer - -import ( - "testing" - - "github.com/aquasecurity/trivy/pkg/licensing/expression/token" - - "github.com/stretchr/testify/assert" -) - -func TestNextToken(t *testing.T) { - tests := []struct { - name string - licenseExpression string - expectTokens []token.Token - }{ - { - name: "empty input", - licenseExpression: "", - expectTokens: []token.Token{ - { - Type: token.EOF, - Literal: string(byte(0)), - }, - }, - }, - { - name: "single ident", - licenseExpression: "GPL1.0+", - expectTokens: []token.Token{ - { - Type: token.IDENT, - Literal: "GPL1.0+", - }, - }, - }, - { - name: "multi ident", - licenseExpression: "Public Domain", - expectTokens: []token.Token{ - { - Type: token.IDENT, - Literal: "Public", - }, - { - Type: token.IDENT, - Literal: "Domain", - }, - }, - }, - { - name: "AND OR operator", - licenseExpression: "Public Domain AND GPL1.0+ OR GPL2.0_or_later", - expectTokens: []token.Token{ - { - Type: token.IDENT, - Literal: "Public", - }, - { - Type: token.IDENT, - Literal: "Domain", - }, - { - Type: token.AND, - Literal: "AND", - }, - { - Type: token.IDENT, - Literal: "GPL1.0+", - }, - { - Type: token.OR, - Literal: "OR", - }, - { - Type: token.IDENT, - Literal: "GPL2.0_or_later", - }, - }, - }, - { - name: "PAREN operator", - licenseExpression: "(GPL1.0+ OR GPL2.0)", - expectTokens: []token.Token{ - { - Type: token.LPAREN, - Literal: "(", - }, - { - Type: token.IDENT, - Literal: "GPL1.0+", - }, - { - Type: token.OR, - Literal: "OR", - }, - { - Type: token.IDENT, - Literal: "GPL2.0", - }, - { - Type: token.RPAREN, - Literal: ")", - }, - }, - }, - { - name: "illegal string", - licenseExpression: "GPL1.0+" + string(byte(0x20)) + "あ" + "🇯🇵" + "AND LGPL1.0", - expectTokens: []token.Token{ - { - Type: token.IDENT, - Literal: "GPL1.0+", - }, - { - Type: token.AND, - Literal: "AND", - }, - { - Type: token.IDENT, - Literal: "LGPL1.0+", - }, - }, - }, - } - - for _, tt := range tests { - l := New(tt.licenseExpression) - for _, expect := range tt.expectTokens { - tok := l.NextToken() - - // Skip literal - if tok.Type == token.ILLEGAL { - continue - } - - t.Run(tt.name, func(t *testing.T) { - assert.Equal(t, expect.Type, tok.Type) - assert.Equal(t, expect.Literal, tok.Literal) - }) - } - } -} diff --git a/pkg/licensing/expression/lexer_test.go b/pkg/licensing/expression/lexer_test.go new file mode 100644 index 0000000000..d9492fb804 --- /dev/null +++ b/pkg/licensing/expression/lexer_test.go @@ -0,0 +1,239 @@ +package expression + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLexer_Lex(t *testing.T) { + tests := []struct { + name string + input string + want []Token + }{ + { + name: "simple", + input: "GPL-2.0-only", + want: []Token{ + { + token: IDENT, + literal: "GPL-2.0-only", + }, + }, + }, + { + name: "with space", + input: "Public Domain", + want: []Token{ + { + token: IDENT, + literal: "Public", + }, + { + token: IDENT, + literal: "Domain", + }, + }, + }, + { + name: "and", + input: "Public Domain AND MIT", + want: []Token{ + { + token: IDENT, + literal: "Public", + }, + { + token: IDENT, + literal: "Domain", + }, + { + token: AND, + literal: "AND", + }, + { + token: IDENT, + literal: "MIT", + }, + }, + }, + { + name: "or", + input: "LGPL-2.1-only OR MIT OR BSD-3-Clause", + want: []Token{ + { + token: IDENT, + literal: "LGPL-2.1-only", + }, + { + token: OR, + literal: "OR", + }, + { + token: IDENT, + literal: "MIT", + }, + { + token: OR, + literal: "OR", + }, + { + token: IDENT, + literal: "BSD-3-Clause", + }, + }, + }, + { + name: "parenthesis", + input: "LGPL-2.1-only AND (MIT OR BSD-3-Clause)", + want: []Token{ + { + token: IDENT, + literal: "LGPL-2.1-only", + }, + { + token: AND, + literal: "AND", + }, + { + token: int('('), + literal: "(", + }, + { + token: IDENT, + literal: "MIT", + }, + { + token: OR, + literal: "OR", + }, + { + token: IDENT, + literal: "BSD-3-Clause", + }, + { + token: int(')'), + literal: ")", + }, + }, + }, + { + name: "exception", + input: "LGPL-2.1-only AND GPL-2.0-or-later WITH Bison-exception-2.2", + want: []Token{ + { + token: IDENT, + literal: "LGPL-2.1-only", + }, + { + token: AND, + literal: "AND", + }, + { + token: IDENT, + literal: "GPL-2.0-or-later", + }, + { + token: WITH, + literal: "WITH", + }, + { + token: IDENT, + literal: "Bison-exception-2.2", + }, + }, + }, + { + name: "plus", + input: "Public Domain+", + want: []Token{ + { + token: IDENT, + literal: "Public", + }, + { + token: IDENT, + literal: "Domain", + }, + { + token: int('+'), + literal: "+", + }, + }, + }, + { + name: "plus in the middle", + input: "ISC+IBM", + want: []Token{ + { + token: IDENT, + literal: "ISC+IBM", + }, + }, + }, + { + name: "plus with the parenthesis", + input: "(GPL1.0+)", + want: []Token{ + { + token: int('('), + literal: "(", + }, + { + token: IDENT, + literal: "GPL1.0", + }, + { + token: int('+'), + literal: "+", + }, + { + token: int(')'), + literal: ")", + }, + }, + }, + { + name: "utf-8", + input: "GPL1.0+ " + string(byte(0x20)) + "あ🇯🇵" + " and LGPL1.0", + want: []Token{ + { + token: IDENT, + literal: "GPL1.0", + }, + { + token: int('+'), + literal: "+", + }, + { + token: IDENT, + literal: "あ🇯🇵", + }, + { + token: AND, + literal: "and", + }, + { + token: IDENT, + literal: "LGPL1.0", + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + l := NewLexer(strings.NewReader(tt.input)) + var got []Token + var lval yySymType + for l.Lex(&lval) != 0 { + got = append(got, lval.token) + lval = yySymType{} + } + require.NoError(t, l.Err()) + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/pkg/licensing/expression/parser.go b/pkg/licensing/expression/parser.go new file mode 100644 index 0000000000..ef80d948f9 --- /dev/null +++ b/pkg/licensing/expression/parser.go @@ -0,0 +1,2 @@ +//go:generate goyacc -o parser_gen.go parser.go.y +package expression diff --git a/pkg/licensing/expression/parser.go.y b/pkg/licensing/expression/parser.go.y new file mode 100644 index 0000000000..82bd110286 --- /dev/null +++ b/pkg/licensing/expression/parser.go.y @@ -0,0 +1,73 @@ +%{ +package expression +%} + +%union{ + token Token + expr Expression +} + +%type license +%type simple +%type plus +%type compound +%token IDENT OR AND WITH + +%left OR +%left AND +%right WITH +%right '+' + +%% + +license + : compound + { + $$ = $1 + if l, ok := yylex.(*Lexer); ok{ + l.result = $$ + } + } + +simple + : IDENT + { + $$ = SimpleExpr{license: $1.literal} + } + | simple IDENT /* e.g. Public Domain */ + { + $$ = SimpleExpr{license: $1.String() + " " + $2.literal} + } + +plus + : simple '+' + { + $$ = SimpleExpr{license: $1.String(), hasPlus: true} + } + +compound + : simple { + $$ = $1 + } + | plus { + $$ = $1 + } + | compound AND compound /* compound-expression "AND" compound-expression */ + { + $$ = CompoundExpr{left: $1, conjunction: $2, right: $3} + } + | compound OR compound /* compound-expression "OR" compound-expression */ + { + $$ = CompoundExpr{left: $1, conjunction: $2, right: $3} + } + | compound WITH compound /* simple-expression "WITH" license-exception-id */ + { + $$ = CompoundExpr{left: $1, conjunction: $2, right: $3} + } + | '(' compound ')' + { + $$ = $2 + } + + +%% \ No newline at end of file diff --git a/pkg/licensing/expression/parser/parser.go b/pkg/licensing/expression/parser/parser.go deleted file mode 100644 index 777d956564..0000000000 --- a/pkg/licensing/expression/parser/parser.go +++ /dev/null @@ -1,102 +0,0 @@ -package parser - -import ( - "fmt" - "strings" - - "golang.org/x/xerrors" - - "github.com/aquasecurity/trivy/pkg/licensing/expression/lexer" - "github.com/aquasecurity/trivy/pkg/licensing/expression/token" -) - -var ( - ErrInvalidExpression = xerrors.New("invalid expression error") -) - -type Parser struct { - lex *lexer.Lexer - normalizeFn []NormalizeFunc -} - -type LicenseExpression struct { - Node Node - Operator string - Next *LicenseExpression -} - -type Node struct { - License string - LicenseExpression *LicenseExpression -} - -type NormalizeFunc func(n string) string - -func New(lex *lexer.Lexer) *Parser { - return &Parser{ - lex: lex, - } -} - -func (p *Parser) RegisterNormalizeFunc(fn ...NormalizeFunc) *Parser { - p.normalizeFn = append(p.normalizeFn, fn...) - return p -} - -func (p *Parser) Parse() (*LicenseExpression, error) { - root := &LicenseExpression{} - cursor := root - stack := Stack{} - - for tok := p.lex.NextToken(); tok.Type != token.EOF; tok = p.lex.NextToken() { - switch tok.Type { - case token.IDENT: - if cursor.Node.License == "" { - cursor.Node = Node{License: tok.Literal} - } else { - cursor.Node.License = fmt.Sprintf("%s %s", cursor.Node.License, tok.Literal) - } - case token.AND, token.OR: - cursor.Operator = string(tok.Type) - cursor.Next = &LicenseExpression{} - cursor = cursor.Next - case token.LPAREN: - p := Pair{root: root, cursor: cursor, bracket: tok.Type} - stack.Push(p) - root = &LicenseExpression{} - cursor = root - case token.RPAREN: - e := stack.Pop() - if e.bracket == token.LPAREN && tok.Type != token.RPAREN { - return nil, ErrInvalidExpression - } - e.cursor.Node.LicenseExpression = root - cursor = e.cursor - root = e.root - } - } - if !stack.IsEmpty() { - return nil, ErrInvalidExpression - } - return root, nil -} - -func (p *Parser) Normalize(l *LicenseExpression) string { - cursor := l - - var str string - for ; cursor != nil; cursor = cursor.Next { - str = strings.Join([]string{str, p.normalize(cursor.Node), cursor.Operator}, " ") - } - return strings.TrimSpace(str) -} - -func (p *Parser) normalize(n Node) string { - if n.LicenseExpression != nil { - return fmt.Sprintf("( %s )", p.Normalize(n.LicenseExpression)) - } - for _, fn := range p.normalizeFn { - n.License = fn(n.License) - } - return n.License -} diff --git a/pkg/licensing/expression/parser/parser_test.go b/pkg/licensing/expression/parser/parser_test.go deleted file mode 100644 index 267d0a9103..0000000000 --- a/pkg/licensing/expression/parser/parser_test.go +++ /dev/null @@ -1,189 +0,0 @@ -package parser - -import ( - "strings" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/aquasecurity/trivy/pkg/licensing/expression/lexer" -) - -func TestParse(t *testing.T) { - tests := []struct { - name string - input string - normFunc []NormalizeFunc - expect *LicenseExpression - expectStr string - expectErr string - }{ - { - name: "happy path single license", - input: "Public Domain", - expect: &LicenseExpression{ - Node: Node{ - License: "Public Domain", - }, - }, - expectStr: "Public Domain", - }, - { - name: "happy path tag:value license", - input: "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2", - expect: &LicenseExpression{ - Node: Node{ - License: "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2", - }, - }, - expectStr: "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2", - }, - { - name: "happy path single license with norm func", - input: "Public Domain with exception", - expect: &LicenseExpression{ - Node: Node{ - License: "Public Domain with exception", - }, - }, - normFunc: []NormalizeFunc{ - func(n string) string { - return strings.Replace(n, " ", "_", -1) - }, - func(n string) string { - if n == "Public_Domain_with_exception" { - return "Unlicense" - } - return n - }, - }, - expectStr: "Unlicense", - }, - { - name: "happy path 2", - input: "Public ._+-", - expect: &LicenseExpression{ - Node: Node{ - License: "Public ._+-", - }, - }, - expectStr: "Public ._+-", - }, - { - name: "happy path multi license", - input: "Public Domain AND ( GPLv2+ or AFL ) AND LGPLv2+ with distribution exceptions", - expect: &LicenseExpression{ - Node: Node{ - License: "Public Domain", - }, - Operator: "AND", - Next: &LicenseExpression{ - Node: Node{ - LicenseExpression: &LicenseExpression{ - Node: Node{ - License: "GPLv2+", - }, - Operator: "OR", - Next: &LicenseExpression{ - Node: Node{ - License: "AFL", - }, - }, - }, - }, - Operator: "AND", - Next: &LicenseExpression{ - Node: Node{ - License: "LGPLv2+ with distribution exceptions", - }, - }, - }, - }, - expectStr: "Public Domain AND ( GPLv2+ OR AFL ) AND LGPLv2+ with distribution exceptions", - }, - { - name: "happy path nested license", - input: "Public Domain AND ( GPLv2+ or AFL AND ( CC0 or LGPL1.0) )", - expect: &LicenseExpression{ - Node: Node{ - License: "Public Domain", - }, - Operator: "AND", - Next: &LicenseExpression{ - Node: Node{ - LicenseExpression: &LicenseExpression{ - Node: Node{ - License: "GPLv2+", - }, - Operator: "OR", - Next: &LicenseExpression{ - Node: Node{ - License: "AFL", - }, - Operator: "AND", - Next: &LicenseExpression{ - Node: Node{ - LicenseExpression: &LicenseExpression{ - Node: Node{ - License: "CC0", - }, - Operator: "OR", - Next: &LicenseExpression{ - Node: Node{ - License: "LGPL1.0", - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - expectStr: "Public Domain AND ( GPLv2+ OR AFL AND ( CC0 OR LGPL1.0 ) )", - }, - { - name: "happy path 2", - input: "( GPLv2+ or CC0 )", - expect: &LicenseExpression{ - Node: Node{ - LicenseExpression: &LicenseExpression{ - Node: Node{ - License: "GPLv2+", - }, - Operator: "OR", - Next: &LicenseExpression{ - Node: Node{ - License: "CC0", - }, - }, - }, - }, - }, - expectStr: "( GPLv2+ OR CC0 )", - }, - { - name: "bad path close bracket not found", - input: "Public Domain AND ( GPLv2+ ", - expectErr: "invalid expression error", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - l := lexer.New(tt.input) - p := New(l).RegisterNormalizeFunc(tt.normFunc...) - - got, err := p.Parse() - if tt.expectErr != "" { - assert.Equal(t, err.Error(), tt.expectErr) - return - } - - require.NoError(t, err) - assert.Equal(t, tt.expect, got) - assert.Equal(t, tt.expectStr, p.Normalize(got)) - }) - } -} diff --git a/pkg/licensing/expression/parser/stack.go b/pkg/licensing/expression/parser/stack.go deleted file mode 100644 index ffb2983243..0000000000 --- a/pkg/licensing/expression/parser/stack.go +++ /dev/null @@ -1,28 +0,0 @@ -package parser - -import ( - "github.com/aquasecurity/trivy/pkg/licensing/expression/token" -) - -type Pair struct { - root *LicenseExpression - cursor *LicenseExpression - bracket token.TokenType -} - -type Stack []Pair - -func (s *Stack) Push(x Pair) { - *s = append(*s, x) -} - -func (s *Stack) Pop() Pair { - l := len(*s) - x := (*s)[l-1] - *s = (*s)[:l-1] - return x -} - -func (s *Stack) IsEmpty() bool { - return len(*s) == 0 -} diff --git a/pkg/licensing/expression/parser_gen.go b/pkg/licensing/expression/parser_gen.go new file mode 100644 index 0000000000..32f6527637 --- /dev/null +++ b/pkg/licensing/expression/parser_gen.go @@ -0,0 +1,507 @@ +// Code generated by goyacc -o parser_gen.go parser.go.y. DO NOT EDIT. + +//line parser.go.y:2 +package expression + +import __yyfmt__ "fmt" + +//line parser.go.y:2 + +//line parser.go.y:5 +type yySymType struct { + yys int + token Token + expr Expression +} + +const IDENT = 57346 +const OR = 57347 +const AND = 57348 +const WITH = 57349 + +var yyToknames = [...]string{ + "$end", + "error", + "$unk", + "IDENT", + "OR", + "AND", + "WITH", + "'+'", + "'('", + "')'", +} + +var yyStatenames = [...]string{} + +const yyEofCode = 1 +const yyErrCode = 2 +const yyInitialStackSize = 16 + +//line parser.go.y:73 + +//line yacctab:1 +var yyExca = [...]int8{ + -1, 1, + 1, -1, + -2, 0, +} + +const yyPrivate = 57344 + +const yyLast = 22 + +var yyAct = [...]int8{ + 8, 7, 9, 2, 10, 16, 9, 4, 11, 12, + 6, 13, 14, 15, 3, 5, 8, 7, 9, 7, + 9, 1, +} + +var yyPact = [...]int16{ + 6, -1000, 11, 0, -1000, 6, -1000, 6, 6, 6, + -1000, -1000, -5, -1, 13, -1, -1000, +} + +var yyPgo = [...]int8{ + 0, 21, 14, 7, 3, +} + +var yyR1 = [...]int8{ + 0, 1, 2, 2, 3, 4, 4, 4, 4, 4, + 4, +} + +var yyR2 = [...]int8{ + 0, 1, 1, 2, 2, 1, 1, 3, 3, 3, + 3, +} + +var yyChk = [...]int16{ + -1000, -1, -4, -2, -3, 9, 4, 6, 5, 7, + 4, 8, -4, -4, -4, -4, 10, +} + +var yyDef = [...]int8{ + 0, -2, 1, 5, 6, 0, 2, 0, 0, 0, + 3, 4, 0, 7, 8, 9, 10, +} + +var yyTok1 = [...]int8{ + 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 9, 10, 3, 8, +} + +var yyTok2 = [...]int8{ + 2, 3, 4, 5, 6, 7, +} + +var yyTok3 = [...]int8{ + 0, +} + +var yyErrorMessages = [...]struct { + state int + token int + msg string +}{} + +//line yaccpar:1 + +/* parser for yacc output */ + +var ( + yyDebug = 0 + yyErrorVerbose = false +) + +type yyLexer interface { + Lex(lval *yySymType) int + Error(s string) +} + +type yyParser interface { + Parse(yyLexer) int + Lookahead() int +} + +type yyParserImpl struct { + lval yySymType + stack [yyInitialStackSize]yySymType + char int +} + +func (p *yyParserImpl) Lookahead() int { + return p.char +} + +func yyNewParser() yyParser { + return &yyParserImpl{} +} + +const yyFlag = -1000 + +func yyTokname(c int) string { + if c >= 1 && c-1 < len(yyToknames) { + if yyToknames[c-1] != "" { + return yyToknames[c-1] + } + } + return __yyfmt__.Sprintf("tok-%v", c) +} + +func yyStatname(s int) string { + if s >= 0 && s < len(yyStatenames) { + if yyStatenames[s] != "" { + return yyStatenames[s] + } + } + return __yyfmt__.Sprintf("state-%v", s) +} + +func yyErrorMessage(state, lookAhead int) string { + const TOKSTART = 4 + + if !yyErrorVerbose { + return "syntax error" + } + + for _, e := range yyErrorMessages { + if e.state == state && e.token == lookAhead { + return "syntax error: " + e.msg + } + } + + res := "syntax error: unexpected " + yyTokname(lookAhead) + + // To match Bison, suggest at most four expected tokens. + expected := make([]int, 0, 4) + + // Look for shiftable tokens. + base := int(yyPact[state]) + for tok := TOKSTART; tok-1 < len(yyToknames); tok++ { + if n := base + tok; n >= 0 && n < yyLast && int(yyChk[int(yyAct[n])]) == tok { + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } + } + + if yyDef[state] == -2 { + i := 0 + for yyExca[i] != -1 || int(yyExca[i+1]) != state { + i += 2 + } + + // Look for tokens that we accept or reduce. + for i += 2; yyExca[i] >= 0; i += 2 { + tok := int(yyExca[i]) + if tok < TOKSTART || yyExca[i+1] == 0 { + continue + } + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } + + // If the default action is to accept or reduce, give up. + if yyExca[i+1] != 0 { + return res + } + } + + for i, tok := range expected { + if i == 0 { + res += ", expecting " + } else { + res += " or " + } + res += yyTokname(tok) + } + return res +} + +func yylex1(lex yyLexer, lval *yySymType) (char, token int) { + token = 0 + char = lex.Lex(lval) + if char <= 0 { + token = int(yyTok1[0]) + goto out + } + if char < len(yyTok1) { + token = int(yyTok1[char]) + goto out + } + if char >= yyPrivate { + if char < yyPrivate+len(yyTok2) { + token = int(yyTok2[char-yyPrivate]) + goto out + } + } + for i := 0; i < len(yyTok3); i += 2 { + token = int(yyTok3[i+0]) + if token == char { + token = int(yyTok3[i+1]) + goto out + } + } + +out: + if token == 0 { + token = int(yyTok2[1]) /* unknown char */ + } + if yyDebug >= 3 { + __yyfmt__.Printf("lex %s(%d)\n", yyTokname(token), uint(char)) + } + return char, token +} + +func yyParse(yylex yyLexer) int { + return yyNewParser().Parse(yylex) +} + +func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int { + var yyn int + var yyVAL yySymType + var yyDollar []yySymType + _ = yyDollar // silence set and not used + yyS := yyrcvr.stack[:] + + Nerrs := 0 /* number of errors */ + Errflag := 0 /* error recovery flag */ + yystate := 0 + yyrcvr.char = -1 + yytoken := -1 // yyrcvr.char translated into internal numbering + defer func() { + // Make sure we report no lookahead when not parsing. + yystate = -1 + yyrcvr.char = -1 + yytoken = -1 + }() + yyp := -1 + goto yystack + +ret0: + return 0 + +ret1: + return 1 + +yystack: + /* put a state and value onto the stack */ + if yyDebug >= 4 { + __yyfmt__.Printf("char %v in %v\n", yyTokname(yytoken), yyStatname(yystate)) + } + + yyp++ + if yyp >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyS[yyp] = yyVAL + yyS[yyp].yys = yystate + +yynewstate: + yyn = int(yyPact[yystate]) + if yyn <= yyFlag { + goto yydefault /* simple state */ + } + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } + yyn += yytoken + if yyn < 0 || yyn >= yyLast { + goto yydefault + } + yyn = int(yyAct[yyn]) + if int(yyChk[yyn]) == yytoken { /* valid shift */ + yyrcvr.char = -1 + yytoken = -1 + yyVAL = yyrcvr.lval + yystate = yyn + if Errflag > 0 { + Errflag-- + } + goto yystack + } + +yydefault: + /* default state action */ + yyn = int(yyDef[yystate]) + if yyn == -2 { + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } + + /* look through exception table */ + xi := 0 + for { + if yyExca[xi+0] == -1 && int(yyExca[xi+1]) == yystate { + break + } + xi += 2 + } + for xi += 2; ; xi += 2 { + yyn = int(yyExca[xi+0]) + if yyn < 0 || yyn == yytoken { + break + } + } + yyn = int(yyExca[xi+1]) + if yyn < 0 { + goto ret0 + } + } + if yyn == 0 { + /* error ... attempt to resume parsing */ + switch Errflag { + case 0: /* brand new error */ + yylex.Error(yyErrorMessage(yystate, yytoken)) + Nerrs++ + if yyDebug >= 1 { + __yyfmt__.Printf("%s", yyStatname(yystate)) + __yyfmt__.Printf(" saw %s\n", yyTokname(yytoken)) + } + fallthrough + + case 1, 2: /* incompletely recovered error ... try again */ + Errflag = 3 + + /* find a state where "error" is a legal shift action */ + for yyp >= 0 { + yyn = int(yyPact[yyS[yyp].yys]) + yyErrCode + if yyn >= 0 && yyn < yyLast { + yystate = int(yyAct[yyn]) /* simulate a shift of "error" */ + if int(yyChk[yystate]) == yyErrCode { + goto yystack + } + } + + /* the current p has no shift on "error", pop stack */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys) + } + yyp-- + } + /* there is no state on the stack with an error shift ... abort */ + goto ret1 + + case 3: /* no shift yet; clobber input char */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery discards %s\n", yyTokname(yytoken)) + } + if yytoken == yyEofCode { + goto ret1 + } + yyrcvr.char = -1 + yytoken = -1 + goto yynewstate /* try again in the same state */ + } + } + + /* reduction by production yyn */ + if yyDebug >= 2 { + __yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate)) + } + + yynt := yyn + yypt := yyp + _ = yypt // guard against "declared and not used" + + yyp -= int(yyR2[yyn]) + // yyp is now the index of $0. Perform the default action. Iff the + // reduced production is ε, $1 is possibly out of range. + if yyp+1 >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyVAL = yyS[yyp+1] + + /* consult goto table to find next state */ + yyn = int(yyR1[yyn]) + yyg := int(yyPgo[yyn]) + yyj := yyg + yyS[yyp].yys + 1 + + if yyj >= yyLast { + yystate = int(yyAct[yyg]) + } else { + yystate = int(yyAct[yyj]) + if int(yyChk[yystate]) != -yyn { + yystate = int(yyAct[yyg]) + } + } + // dummy call; replaced with literal code + switch yynt { + + case 1: + yyDollar = yyS[yypt-1 : yypt+1] +//line parser.go.y:25 + { + yyVAL.expr = yyDollar[1].expr + if l, ok := yylex.(*Lexer); ok { + l.result = yyVAL.expr + } + } + case 2: + yyDollar = yyS[yypt-1 : yypt+1] +//line parser.go.y:34 + { + yyVAL.expr = SimpleExpr{license: yyDollar[1].token.literal} + } + case 3: + yyDollar = yyS[yypt-2 : yypt+1] +//line parser.go.y:38 + { + yyVAL.expr = SimpleExpr{license: yyDollar[1].expr.String() + " " + yyDollar[2].token.literal} + } + case 4: + yyDollar = yyS[yypt-2 : yypt+1] +//line parser.go.y:44 + { + yyVAL.expr = SimpleExpr{license: yyDollar[1].expr.String(), hasPlus: true} + } + case 5: + yyDollar = yyS[yypt-1 : yypt+1] +//line parser.go.y:49 + { + yyVAL.expr = yyDollar[1].expr + } + case 6: + yyDollar = yyS[yypt-1 : yypt+1] +//line parser.go.y:52 + { + yyVAL.expr = yyDollar[1].expr + } + case 7: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.go.y:56 + { + yyVAL.expr = CompoundExpr{left: yyDollar[1].expr, conjunction: yyDollar[2].token, right: yyDollar[3].expr} + } + case 8: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.go.y:60 + { + yyVAL.expr = CompoundExpr{left: yyDollar[1].expr, conjunction: yyDollar[2].token, right: yyDollar[3].expr} + } + case 9: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.go.y:64 + { + yyVAL.expr = CompoundExpr{left: yyDollar[1].expr, conjunction: yyDollar[2].token, right: yyDollar[3].expr} + } + case 10: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.go.y:68 + { + yyVAL.expr = yyDollar[2].expr + } + } + goto yystack /* stack new state and value */ +} diff --git a/pkg/licensing/expression/parser_test.go b/pkg/licensing/expression/parser_test.go new file mode 100644 index 0000000000..a828df9020 --- /dev/null +++ b/pkg/licensing/expression/parser_test.go @@ -0,0 +1,156 @@ +package expression + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParse(t *testing.T) { + tests := []struct { + name string + input string + want Expression + wantStr string + wantErr string + }{ + { + name: "single license", + input: "Public Domain", + want: SimpleExpr{ + license: "Public Domain", + }, + wantStr: "Public Domain", + }, + { + name: "tag:value license", + input: "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2", + want: SimpleExpr{ + license: "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2", + }, + wantStr: "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2", + }, + { + name: "symbols", + input: "Public ._-+", + want: SimpleExpr{ + license: "Public ._-", + hasPlus: true, + }, + wantStr: "Public ._-+", + }, + { + name: "multi licenses", + input: "Public Domain AND ( GPLv2+ or AFL ) AND LGPLv2+ with distribution exceptions", + want: CompoundExpr{ + left: CompoundExpr{ + left: SimpleExpr{ + license: "Public Domain", + }, + conjunction: Token{ + token: AND, + literal: "AND", + }, + right: CompoundExpr{ + left: SimpleExpr{ + license: "GPLv2", + hasPlus: true, + }, + conjunction: Token{ + token: OR, + literal: "or", + }, + right: SimpleExpr{ + license: "AFL", + }, + }, + }, + conjunction: Token{ + token: AND, + literal: "AND", + }, + right: CompoundExpr{ + left: SimpleExpr{ + license: "LGPLv2", + hasPlus: true, + }, + conjunction: Token{ + token: WITH, + literal: "with", + }, + right: SimpleExpr{ + license: "distribution exceptions", + }, + }, + }, + wantStr: "Public Domain AND (GPLv2+ or AFL) AND LGPLv2+ with distribution exceptions", + }, + { + name: "nested licenses", + input: "Public Domain AND ( GPLv2+ or AFL AND ( CC0 or LGPL1.0) )", + want: CompoundExpr{ + left: SimpleExpr{ + license: "Public Domain", + }, + conjunction: Token{ + token: AND, + literal: "AND", + }, + right: CompoundExpr{ + left: SimpleExpr{ + license: "GPLv2", + hasPlus: true, + }, + conjunction: Token{ + token: OR, + literal: "or", + }, + right: CompoundExpr{ + left: SimpleExpr{ + license: "AFL", + }, + conjunction: Token{ + token: AND, + literal: "AND", + }, + right: CompoundExpr{ + left: SimpleExpr{ + license: "CC0", + }, + conjunction: Token{ + token: OR, + literal: "or", + }, + right: SimpleExpr{ + license: "LGPL1.0", + }, + }, + }, + }, + }, + wantStr: "Public Domain AND (GPLv2+ or AFL AND (CC0 or LGPL1.0))", + }, + { + name: "bad path close bracket not found", + input: "Public Domain AND ( GPLv2+ ", + wantErr: "syntax error", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + l := NewLexer(strings.NewReader(tt.input)) + ret := yyParse(l) + err := l.Err() + if tt.wantErr != "" { + assert.Equal(t, ret, 1) + assert.ErrorContains(t, err, tt.wantErr) + return + } + require.NoError(t, err) + assert.Equal(t, tt.want, l.result) + assert.Equal(t, tt.wantStr, l.result.String()) + }) + } +} diff --git a/pkg/licensing/expression/token/token.go b/pkg/licensing/expression/token/token.go deleted file mode 100644 index 23e7e64d8f..0000000000 --- a/pkg/licensing/expression/token/token.go +++ /dev/null @@ -1,37 +0,0 @@ -package token - -import ( - "strings" -) - -const ( - ILLEGAL = "ILLEGAL" - EOF = "EOF" - - IDENT = "IDENT" - - LPAREN = "(" - RPAREN = ")" - - AND = "AND" - OR = "OR" -) - -var keywords = map[string]TokenType{ - "AND": AND, - "OR": OR, -} - -type TokenType string - -type Token struct { - Type TokenType - Literal string -} - -func LookupIdent(ident string) TokenType { - if tok, ok := keywords[strings.ToUpper(ident)]; ok { - return tok - } - return IDENT -} diff --git a/pkg/licensing/expression/types.go b/pkg/licensing/expression/types.go new file mode 100644 index 0000000000..f5315f4ffb --- /dev/null +++ b/pkg/licensing/expression/types.go @@ -0,0 +1,83 @@ +package expression + +import ( + "fmt" + + "golang.org/x/exp/slices" + + "github.com/aquasecurity/trivy/pkg/licensing" +) + +var versioned = []string{ + licensing.AGPL10, + licensing.AGPL30, + licensing.GFDL11WithInvariants, + licensing.GFDL11NoInvariants, + licensing.GFDL11, + licensing.GFDL12WithInvariants, + licensing.GFDL12NoInvariants, + licensing.GFDL12, + licensing.GFDL13WithInvariants, + licensing.GFDL13NoInvariants, + licensing.GFDL13, + licensing.GPL10, + licensing.GPL20, + licensing.GPL30, + licensing.LGPL20, + licensing.LGPL21, + licensing.LGPL30, +} + +type Expression interface { + String() string +} + +type Token struct { + token int + literal string +} + +type SimpleExpr struct { + license string + hasPlus bool +} + +func (s SimpleExpr) String() string { + if slices.Contains(versioned, s.license) { + if s.hasPlus { + // e.g. AGPL-1.0-or-later + return s.license + "-or-later" + } + // e.g. GPL-1.0-only + return s.license + "-only" + } + + if s.hasPlus { + return s.license + "+" + } + return s.license +} + +type CompoundExpr struct { + left Expression + conjunction Token + right Expression +} + +func (c CompoundExpr) String() string { + left := c.left.String() + if l, ok := c.left.(CompoundExpr); ok { + // e.g. (A OR B) AND C + if c.conjunction.token > l.conjunction.token { + left = fmt.Sprintf("(%s)", left) + } + } + right := c.right.String() + if r, ok := c.right.(CompoundExpr); ok { + // e.g. A AND (B OR C) + if c.conjunction.token > r.conjunction.token { + right = fmt.Sprintf("(%s)", right) + } + } + return fmt.Sprintf("%s %s %s", left, c.conjunction.literal, right) +} diff --git a/pkg/licensing/expression/y.output b/pkg/licensing/expression/y.output new file mode 100644 index 0000000000..6e39c0c8f4 --- /dev/null +++ b/pkg/licensing/expression/y.output @@ -0,0 +1,172 @@ + +state 0 + $accept: .license $end + + IDENT shift 6 + '(' shift 5 + . error + + license goto 1 + simple goto 3 + plus goto 4 + compound goto 2 + +state 1 + $accept: license.$end + + $end accept + . error + + +state 2 + license: compound. (1) + compound: compound.AND compound + compound: compound.OR compound + compound: compound.WITH compound + + OR shift 8 + AND shift 7 + WITH shift 9 + . reduce 1 (src line 23) + + +state 3 + simple: simple.IDENT + plus: simple.'+' + compound: simple. (5) + + IDENT shift 10 + '+' shift 11 + . reduce 5 (src line 48) + + +state 4 + compound: plus. (6) + + . reduce 6 (src line 52) + + +state 5 + compound: '('.compound ')' + + IDENT shift 6 + '(' shift 5 + . error + + simple goto 3 + plus goto 4 + compound goto 12 + +state 6 + simple: IDENT. (2) + + . reduce 2 (src line 32) + + +state 7 + compound: compound AND.compound + + IDENT shift 6 + '(' shift 5 + . error + + simple goto 3 + plus goto 4 + compound goto 13 + +state 8 + compound: compound OR.compound + + IDENT shift 6 + '(' shift 5 + . error + + simple goto 3 + plus goto 4 + compound goto 14 + +state 9 + compound: compound WITH.compound + + IDENT shift 6 + '(' shift 5 + . error + + simple goto 3 + plus goto 4 + compound goto 15 + +state 10 + simple: simple IDENT. (3) + + . reduce 3 (src line 37) + + +state 11 + plus: simple '+'. (4) + + . reduce 4 (src line 42) + + +state 12 + compound: compound.AND compound + compound: compound.OR compound + compound: compound.WITH compound + compound: '(' compound.')' + + OR shift 8 + AND shift 7 + WITH shift 9 + ')' shift 16 + . error + + +state 13 + compound: compound.AND compound + compound: compound AND compound. (7) + compound: compound.OR compound + compound: compound.WITH compound + + WITH shift 9 + . reduce 7 (src line 55) + + +state 14 + compound: compound.AND compound + compound: compound.OR compound + compound: compound OR compound. (8) + compound: compound.WITH compound + + AND shift 7 + WITH shift 9 + . reduce 8 (src line 59) + + +state 15 + compound: compound.AND compound + compound: compound.OR compound + compound: compound.WITH compound + compound: compound WITH compound. (9) + + WITH shift 9 + . reduce 9 (src line 63) + + +state 16 + compound: '(' compound ')'. (10) + + . reduce 10 (src line 67) + + +10 terminals, 5 nonterminals +11 grammar rules, 17/16000 states +0 shift/reduce, 0 reduce/reduce conflicts reported +54 working sets used +memory: parser 15/240000 +13 extra closures +23 shift entries, 1 exceptions +8 goto entries +8 entries saved by goto default +Optimizer space used: output 22/240000 +22 table entries, 0 zero +maximum spread: 10, maximum offset: 9 diff --git a/pkg/licensing/normalize.go b/pkg/licensing/normalize.go index b85cbc9162..5e25fc89ca 100644 --- a/pkg/licensing/normalize.go +++ b/pkg/licensing/normalize.go @@ -1,6 +1,8 @@ package licensing -import "strings" +import ( + "strings" +) var mapping = map[string]string{ // GPL @@ -14,6 +16,7 @@ var mapping = map[string]string{ "GPL-2": GPL20, "GPL-2.0-ONLY": GPL20, "GPL2+": GPL20, + "GPLV2": GPL20, "GPLV2+": GPL20, "GPL-2+": GPL20, "GPL-2.0+": GPL20, @@ -23,6 +26,7 @@ var mapping = map[string]string{ "GPL3": GPL30, "GPL 3.0": GPL30, "GPL 3": GPL30, + "GPLV3": GPL30, "GPLV3+": GPL30, "GPL-3": GPL30, "GPL-3.0-ONLY": GPL30, diff --git a/pkg/sbom/spdx/marshal.go b/pkg/sbom/spdx/marshal.go index 187aec21be..fc0b3eec98 100644 --- a/pkg/sbom/spdx/marshal.go +++ b/pkg/sbom/spdx/marshal.go @@ -8,6 +8,7 @@ import ( "github.com/google/uuid" "github.com/mitchellh/hashstructure/v2" + "github.com/samber/lo" "github.com/spdx/tools-golang/spdx" "golang.org/x/xerrors" "k8s.io/utils/clock" @@ -15,6 +16,7 @@ import ( ftypes "github.com/aquasecurity/trivy/pkg/fanal/types" "github.com/aquasecurity/trivy/pkg/licensing" "github.com/aquasecurity/trivy/pkg/licensing/expression" + "github.com/aquasecurity/trivy/pkg/log" "github.com/aquasecurity/trivy/pkg/purl" "github.com/aquasecurity/trivy/pkg/scanner/utils" "github.com/aquasecurity/trivy/pkg/types" @@ -362,11 +364,20 @@ func GetLicense(p ftypes.Package) string { return "NONE" } - return expression.Normalize( - expression.Join(p.Licenses, expression.AND), - licensing.Normalize, - expression.NormalizeForSPDX, - ) + license := strings.Join(lo.Map(p.Licenses, func(license string, index int) string { + // e.g. GPL-3.0-with-autoconf-exception + license = strings.ReplaceAll(license, "-with-", " WITH ") + license = strings.ReplaceAll(license, "-WITH-", " WITH ") + + return fmt.Sprintf("(%s)", license) + }), " AND ") + s, err := expression.Normalize(license, licensing.Normalize, expression.NormalizeForSPDX) + if err != nil { + // Not fail on the invalid license + log.Logger.Warnf("Unable to marshal SPDX licenses %q", license) + return "" + } + return s } func getDocumentNamespace(r types.Report, m *Marshaler) string { diff --git a/pkg/sbom/spdx/marshal_test.go b/pkg/sbom/spdx/marshal_test.go index 6b402a5db6..459df03894 100644 --- a/pkg/sbom/spdx/marshal_test.go +++ b/pkg/sbom/spdx/marshal_test.go @@ -176,8 +176,8 @@ func TestMarshaler_Marshal(t *testing.T) { PackageSPDXIdentifier: spdx.ElementID("Package-fd0dc3cf913d5bc3"), PackageName: "binutils", PackageVersion: "2.30", - PackageLicenseConcluded: "GPL-3.0", - PackageLicenseDeclared: "GPL-3.0", + PackageLicenseConcluded: "GPL-3.0-or-later", + PackageLicenseDeclared: "GPL-3.0-or-later", PackageExternalReferences: []*spdx.PackageExternalReference2_2{ { Category: tspdx.CategoryPackageManager, @@ -338,8 +338,8 @@ func TestMarshaler_Marshal(t *testing.T) { PackageSPDXIdentifier: spdx.ElementID("Package-d8dccb186bafaf37"), PackageName: "acl", PackageVersion: "2.2.53", - PackageLicenseConcluded: "GPL-2.0", - PackageLicenseDeclared: "GPL-2.0", + PackageLicenseConcluded: "GPL-2.0-or-later", + PackageLicenseDeclared: "GPL-2.0-or-later", PackageExternalReferences: []*spdx.PackageExternalReference2_2{ { Category: tspdx.CategoryPackageManager, @@ -700,7 +700,7 @@ func Test_GetLicense(t *testing.T) { "GPLv2+", }, }, - want: "GPL-2.0", + want: "GPL-2.0-or-later", }, { name: "happy path with multi license", @@ -710,7 +710,7 @@ func Test_GetLicense(t *testing.T) { "GPLv3+", }, }, - want: "GPL-2.0 AND GPL-3.0", + want: "GPL-2.0-or-later AND GPL-3.0-or-later", }, { name: "happy path with OR operator", @@ -720,7 +720,7 @@ func Test_GetLicense(t *testing.T) { "LGPL 2.0 or GNU LESSER", }, }, - want: "GPL-2.0 AND ( LGPL-2.0 OR LGPL-3.0 )", + want: "GPL-2.0-or-later AND (LGPL-2.0-only OR LGPL-3.0-only)", }, { name: "happy path with AND operator", @@ -730,7 +730,7 @@ func Test_GetLicense(t *testing.T) { "LGPL 2.0 and GNU LESSER", }, }, - want: "GPL-2.0 AND LGPL-2.0 AND LGPL-3.0", + want: "GPL-2.0-or-later AND LGPL-2.0-only AND LGPL-3.0-only", }, { name: "happy path with WITH operator",