refactor(license): use goyacc for license parser (#3824)

This commit is contained in:
Teppei Fukuda
2023-03-14 09:27:17 +02:00
committed by GitHub
parent 00c763bc10
commit 2bb25e766b
22 changed files with 1493 additions and 705 deletions

View File

@@ -36,6 +36,9 @@ $(GOBIN)/labeler:
$(GOBIN)/easyjson: $(GOBIN)/easyjson:
go install github.com/mailru/easyjson/...@v0.7.7 go install github.com/mailru/easyjson/...@v0.7.7
$(GOBIN)/goyacc:
go install golang.org/x/tools/cmd/goyacc@latest
.PHONY: wire .PHONY: wire
wire: $(GOBIN)/wire wire: $(GOBIN)/wire
wire gen ./pkg/commands/... ./pkg/rpc/... wire gen ./pkg/commands/... ./pkg/rpc/...
@@ -133,3 +136,8 @@ mkdocs-serve:
.PHONY: easyjson .PHONY: easyjson
easyjson: $(GOBIN)/easyjson easyjson: $(GOBIN)/easyjson
easyjson pkg/module/serialize/types.go easyjson pkg/module/serialize/types.go
# Generate license parser with goyacc
.PHONY: yacc
yacc: $(GOBIN)/goyacc
go generate ./pkg/licensing/expression/...

View File

@@ -88,9 +88,7 @@ func (a *dpkgLicenseAnalyzer) parseCopyright(r dio.ReadSeekerAt) ([]types.Licens
// cf. https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#:~:text=The%20debian%2Fcopyright%20file%20must,in%20the%20Debian%20Policy%20Manual. // cf. https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#:~:text=The%20debian%2Fcopyright%20file%20must,in%20the%20Debian%20Policy%20Manual.
l := strings.TrimSpace(line[8:]) l := strings.TrimSpace(line[8:])
// Very rarely has below phrases l = normalizeLicense(l)
l = strings.TrimPrefix(l, "The main library is licensed under ")
l = strings.TrimSuffix(l, " license")
if len(l) > 0 { if len(l) > 0 {
// Split licenses without considering "and"/"or" // Split licenses without considering "and"/"or"
// examples: // examples:
@@ -140,3 +138,15 @@ func (a *dpkgLicenseAnalyzer) Type() analyzer.Type {
func (a *dpkgLicenseAnalyzer) Version() int { func (a *dpkgLicenseAnalyzer) Version() int {
return dpkgLicenseAnalyzerVersion return dpkgLicenseAnalyzerVersion
} }
// normalizeLicense returns a normalized license identifier in a heuristic way
func normalizeLicense(s string) string {
// "The MIT License (MIT)" => "The MIT License"
s, _, _ = strings.Cut(s, "(")
// Very rarely has below phrases
s = strings.TrimPrefix(s, "The main library is licensed under ")
s = strings.TrimSuffix(s, " license")
return strings.TrimSpace(s)
}

View File

@@ -82,6 +82,15 @@ const (
FacebookExamples = "Facebook-Examples" FacebookExamples = "Facebook-Examples"
FreeImage = "FreeImage" FreeImage = "FreeImage"
FTL = "FTL" FTL = "FTL"
GFDL11WithInvariants = "GFDL-1.1-invariants"
GFDL11NoInvariants = "GFDL-1.1-no-invariants"
GFDL11 = "GFDL-1.1"
GFDL12WithInvariants = "GFDL-1.2-invariants"
GFDL12NoInvariants = "GFDL-1.2-no-invariants"
GFDL12 = "GFDL-1.2"
GFDL13WithInvariants = "GFDL-1.3-invariants"
GFDL13NoInvariants = "GFDL-1.3-no-invariants"
GFDL13 = "GFDL-1.3"
GPL10 = "GPL-1.0" GPL10 = "GPL-1.0"
GPL20 = "GPL-2.0" GPL20 = "GPL-2.0"
GPL20withautoconfexception = "GPL-2.0-with-autoconf-exception" GPL20withautoconfexception = "GPL-2.0-with-autoconf-exception"

View File

@@ -1,74 +1,80 @@
package expression package expression
import ( import (
"fmt"
"strings" "strings"
"unicode"
"github.com/aquasecurity/trivy/pkg/licensing/expression/lexer" "golang.org/x/xerrors"
"github.com/aquasecurity/trivy/pkg/licensing/expression/parser"
) )
type Operator string var (
ErrInvalidExpression = xerrors.New("invalid expression error")
const (
AND Operator = "AND"
OR Operator = "OR"
WITH Operator = "WITH"
) )
func (o Operator) String() string { type NormalizeFunc func(license string) string
return fmt.Sprintf(" %s ", string(o))
func parse(license string) (Expression, error) {
l := NewLexer(strings.NewReader(license))
if yyParse(l) != 0 {
return nil, xerrors.Errorf("license parse error: %w", l.Err())
} else if err := l.Err(); err != nil {
return nil, err
}
return l.result, nil
} }
func Normalize(license string, fn ...parser.NormalizeFunc) string { func Normalize(license string, fn ...NormalizeFunc) (string, error) {
lex := lexer.New(license) expr, err := parse(license)
licenseParser := parser.New(lex).RegisterNormalizeFunc(
fn...,
)
expression, err := licenseParser.Parse()
if err != nil { if err != nil {
return license return "", xerrors.Errorf("license (%s) parse error: %w", license, err)
} }
return licenseParser.Normalize(expression) expr = normalize(expr, fn...)
return expr.String(), nil
} }
func Join(elems []string, sep Operator) string { func normalize(expr Expression, fn ...NormalizeFunc) Expression {
var licenses []string switch e := expr.(type) {
for i, license := range elems { case SimpleExpr:
var mid Operator for _, f := range fn {
if sep == AND { e.license = f(e.license)
mid = OR
} else if sep == OR {
mid = AND
} }
return e
if i != 0 && strings.Contains(strings.ToUpper(license), mid.String()) { case CompoundExpr:
license = fmt.Sprintf("(%s)", license) e.left = normalize(e.left, fn...)
} e.right = normalize(e.right, fn...)
licenses = append(licenses, license) e.conjunction.literal = strings.ToUpper(e.conjunction.literal) // e.g. "and" => "AND"
return e
} }
return strings.Join(licenses, sep.String()) return expr
} }
// NormalizeForSPDX is normalized license-id replace ' ' to '-'. // NormalizeForSPDX replaces ' ' to '-' in license-id.
// SPDX license MUST NOT be white space between a license-id. // SPDX license MUST NOT be white space between a license-id.
// There MUST be white space on either side of the operator "WITH". // There MUST be white space on either side of the operator "WITH".
// ref: https://spdx.github.io/spdx-spec/v2.3/SPDX-license-expressions // ref: https://spdx.github.io/spdx-spec/v2.3/SPDX-license-expressions
func NormalizeForSPDX(name string) string { func NormalizeForSPDX(s string) string {
i := strings.Index(strings.ToUpper(name), WITH.String()) var b strings.Builder
if i < 0 { for _, c := range s {
return strings.Replace(name, " ", "-", -1) // idstring = 1*(ALPHA / DIGIT / "-" / "." )
if isAlphabet(c) || unicode.IsNumber(c) || c == '-' || c == '.' {
_, _ = b.WriteRune(c)
} else if c == ':' {
// TODO: Support DocumentRef
_, _ = b.WriteRune(c)
} else {
// Replace invalid characters with '-'
_, _ = b.WriteRune('-')
}
} }
return b.String()
// Convert "WITH" expression split by " " to "-". }
// examples:
// GPL-2+ with distribution exception => GPL-2+ with distribution-exception func isAlphabet(r rune) bool {
// GPL-2 with Linux-syscall-note exception => GPL-2 with Linux-syscall-note-exception if (r < 'a' || r > 'z') && (r < 'A' || r > 'Z') {
// AFL 2.0 with Linux-syscall-note exception => AFL-2.0 with Linux-syscall-note-exception return false
withSection := strings.Replace(name[i+len(WITH.String()):], " ", "-", -1) }
if i > 0 { return true
return strings.Replace(name[:i], " ", "-", -1) + WITH.String() + withSection
}
return name
} }

View File

@@ -1,83 +1,56 @@
package expression package expression
import ( import (
"strings"
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
) )
func TestNormalizeForSPDX(t *testing.T) { func TestNormalize(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
license string license string
fn NormalizeFunc
want string want string
wantErr string
}{ }{
{ {
name: "happy path", name: "SPDX, space",
license: "AFL 2.0", license: "AFL 2.0",
fn: NormalizeForSPDX,
want: "AFL-2.0", want: "AFL-2.0",
}, },
{ {
name: "happy path with WITH section", name: "SPDX, exception",
license: "AFL 2.0 with Linux-syscall-note exception", license: "AFL 2.0 with Linux-syscall-note exception",
fn: NormalizeForSPDX,
want: "AFL-2.0 WITH Linux-syscall-note-exception", want: "AFL-2.0 WITH Linux-syscall-note-exception",
}, },
{
name: "SPDX, invalid chars",
license: "LGPL_2.1_only or MIT OR BSD-3>Clause",
fn: NormalizeForSPDX,
want: "LGPL-2.1-only OR MIT OR BSD-3-Clause",
},
{
name: "upper",
license: "LGPL-2.1-only OR MIT",
fn: strings.ToUpper,
want: "LGPL-2.1-ONLY OR MIT",
},
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
assert.Equalf(t, tt.want, NormalizeForSPDX(tt.license), "NormalizeWithExpression(%v)", tt.license) got, err := Normalize(tt.license, tt.fn)
}) if tt.wantErr != "" {
} assert.ErrorContains(t, err, tt.wantErr)
} return
}
func TestJoin(t *testing.T) { require.NoError(t, err)
tests := []struct { assert.Equalf(t, tt.want, got, "NormalizeWithExpression(%v)", tt.license)
name string
inputElements []string
inputOperator Operator
expect string
}{
{
name: "happy path single license",
inputElements: []string{"MIT"},
inputOperator: AND,
expect: "MIT",
},
{
name: "happy path multi license",
inputElements: []string{"MIT", "GPL1.0"},
inputOperator: AND,
expect: "MIT AND GPL1.0",
},
{
name: "happy path multi license with AND operator",
inputElements: []string{"MIT", "GPL1.0 AND GPL2.0"},
inputOperator: AND,
expect: "MIT AND GPL1.0 AND GPL2.0",
},
{
name: "happy path multi license with OR operator",
inputElements: []string{"MIT", "GPL1.0 OR GPL2.0"},
inputOperator: OR,
expect: "MIT OR GPL1.0 OR GPL2.0",
},
{
name: "happy path multi license with OR operator, separator AND",
inputElements: []string{"MIT", "GPL1.0 OR GPL2.0"},
inputOperator: AND,
expect: "MIT AND (GPL1.0 OR GPL2.0)",
},
{
name: "happy path multi license with AND operator, separator OR",
inputElements: []string{"MIT", "GPL1.0 AND GPL2.0"},
inputOperator: OR,
expect: "MIT OR (GPL1.0 AND GPL2.0)",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := Join(tt.inputElements, tt.inputOperator)
assert.Equal(t, tt.expect, got)
}) })
} }
} }

View File

@@ -0,0 +1,119 @@
package expression
import (
"bufio"
"errors"
"io"
"strings"
"unicode"
"unicode/utf8"
multierror "github.com/hashicorp/go-multierror"
)
type Lexer struct {
s *bufio.Scanner
result Expression
errs error
}
func NewLexer(reader io.Reader) *Lexer {
scanner := bufio.NewScanner(reader)
scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) {
// The implementation references bufio.ScanWords()
// Skip leading spaces.
start := 0
for width := 0; start < len(data); start += width {
var r rune
r, width = utf8.DecodeRune(data[start:])
if !unicode.IsSpace(r) {
break
}
}
// Process terminal symbols
if len(data) > start && (data[start] == '(' || data[start] == ')' || data[start] == '+') {
return start + 1, data[start : start+1], nil
}
// Scan until space or token, marking end of word.
for width, i := 0, start; i < len(data); i += width {
var r rune
r, width = utf8.DecodeRune(data[i:])
switch r {
case '(', ')':
return i, data[start:i], nil
case '+':
// Peek the next rune
if len(data) > i+width {
adv := i
i += width
r, width = utf8.DecodeRune(data[i:])
if unicode.IsSpace(r) || r == '(' || r == ')' {
return adv, data[start:adv], nil
}
} else if atEOF {
return i, data[start:i], nil
}
default:
if unicode.IsSpace(r) {
return i + width, data[start:i], nil
}
}
}
// If we're at EOF, we have a final, non-empty, non-terminated word. Return it.
if atEOF && len(data) > start {
return len(data), data[start:], nil
}
// Request more data.
return start, nil, nil
})
return &Lexer{
s: scanner,
}
}
func (l *Lexer) Lex(lval *yySymType) int {
if !l.s.Scan() {
return 0
}
var token int
literal := l.s.Text()
switch literal {
case "(", ")", "+":
token = int(literal[0])
default:
token = lookup(literal)
}
lval.token = Token{
token: token,
literal: literal,
}
if err := l.s.Err(); err != nil {
l.errs = multierror.Append(l.errs, l.s.Err())
}
return lval.token.token
}
func (l *Lexer) Error(e string) {
l.errs = multierror.Append(l.errs, errors.New(e))
}
func (l *Lexer) Err() error {
return l.errs
}
func lookup(t string) int {
t = strings.ToUpper(t)
for i, name := range yyToknames {
if t == name {
return yyPrivate + (i - 1)
}
}
return IDENT
}

View File

@@ -1,85 +0,0 @@
package lexer
import (
"github.com/aquasecurity/trivy/pkg/licensing/expression/token"
)
type Lexer struct {
input string
position int
readPosition int
ch byte
}
func New(input string) *Lexer {
l := &Lexer{input: input}
l.readChar()
return l
}
func (l *Lexer) NextToken() token.Token {
var tok token.Token
l.skipWhitespace()
switch l.ch {
case 0:
tok = newToken(token.EOF, l.ch)
case '(':
tok = newToken(token.LPAREN, l.ch)
case ')':
tok = newToken(token.RPAREN, l.ch)
default:
if isLetter(l.ch) {
tok.Literal = l.readIdentifier()
tok.Type = token.LookupIdent(tok.Literal)
return tok
} else {
tok = newToken(token.ILLEGAL, l.ch)
}
}
l.readChar()
return tok
}
func isLetter(ch byte) bool {
return 'a' <= ch && ch <= 'z' ||
'A' <= ch && ch <= 'Z' ||
'0' <= ch && ch <= '9' ||
ch == '_' ||
ch == '+' ||
ch == '.' ||
ch == '-' ||
ch == '/' ||
ch == ':' ||
ch == '='
}
func (l *Lexer) readIdentifier() string {
position := l.position
for isLetter(l.ch) {
l.readChar()
}
return l.input[position:l.position]
}
func newToken(tokenType token.TokenType, ch byte) token.Token {
return token.Token{Type: tokenType, Literal: string(ch)}
}
func (l *Lexer) skipWhitespace() {
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
l.readChar()
}
}
func (l *Lexer) readChar() {
if l.readPosition >= len(l.input) {
// 0 is ASCII NUL
l.ch = 0
} else {
l.ch = l.input[l.readPosition]
}
l.position = l.readPosition
l.readPosition++
}

View File

@@ -1,143 +0,0 @@
package lexer
import (
"testing"
"github.com/aquasecurity/trivy/pkg/licensing/expression/token"
"github.com/stretchr/testify/assert"
)
func TestNextToken(t *testing.T) {
tests := []struct {
name string
licenseExpression string
expectTokens []token.Token
}{
{
name: "empty input",
licenseExpression: "",
expectTokens: []token.Token{
{
Type: token.EOF,
Literal: string(byte(0)),
},
},
},
{
name: "single ident",
licenseExpression: "GPL1.0+",
expectTokens: []token.Token{
{
Type: token.IDENT,
Literal: "GPL1.0+",
},
},
},
{
name: "multi ident",
licenseExpression: "Public Domain",
expectTokens: []token.Token{
{
Type: token.IDENT,
Literal: "Public",
},
{
Type: token.IDENT,
Literal: "Domain",
},
},
},
{
name: "AND OR operator",
licenseExpression: "Public Domain AND GPL1.0+ OR GPL2.0_or_later",
expectTokens: []token.Token{
{
Type: token.IDENT,
Literal: "Public",
},
{
Type: token.IDENT,
Literal: "Domain",
},
{
Type: token.AND,
Literal: "AND",
},
{
Type: token.IDENT,
Literal: "GPL1.0+",
},
{
Type: token.OR,
Literal: "OR",
},
{
Type: token.IDENT,
Literal: "GPL2.0_or_later",
},
},
},
{
name: "PAREN operator",
licenseExpression: "(GPL1.0+ OR GPL2.0)",
expectTokens: []token.Token{
{
Type: token.LPAREN,
Literal: "(",
},
{
Type: token.IDENT,
Literal: "GPL1.0+",
},
{
Type: token.OR,
Literal: "OR",
},
{
Type: token.IDENT,
Literal: "GPL2.0",
},
{
Type: token.RPAREN,
Literal: ")",
},
},
},
{
name: "illegal string",
licenseExpression: "GPL1.0+" + string(byte(0x20)) + "あ" + "🇯🇵" + "AND LGPL1.0",
expectTokens: []token.Token{
{
Type: token.IDENT,
Literal: "GPL1.0+",
},
{
Type: token.AND,
Literal: "AND",
},
{
Type: token.IDENT,
Literal: "LGPL1.0+",
},
},
},
}
for _, tt := range tests {
l := New(tt.licenseExpression)
for _, expect := range tt.expectTokens {
tok := l.NextToken()
// Skip literal
if tok.Type == token.ILLEGAL {
continue
}
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, expect.Type, tok.Type)
assert.Equal(t, expect.Literal, tok.Literal)
})
}
}
}

View File

@@ -0,0 +1,239 @@
package expression
import (
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestLexer_Lex(t *testing.T) {
tests := []struct {
name string
input string
want []Token
}{
{
name: "simple",
input: "GPL-2.0-only",
want: []Token{
{
token: IDENT,
literal: "GPL-2.0-only",
},
},
},
{
name: "with space",
input: "Public Domain",
want: []Token{
{
token: IDENT,
literal: "Public",
},
{
token: IDENT,
literal: "Domain",
},
},
},
{
name: "and",
input: "Public Domain AND MIT",
want: []Token{
{
token: IDENT,
literal: "Public",
},
{
token: IDENT,
literal: "Domain",
},
{
token: AND,
literal: "AND",
},
{
token: IDENT,
literal: "MIT",
},
},
},
{
name: "or",
input: "LGPL-2.1-only OR MIT OR BSD-3-Clause",
want: []Token{
{
token: IDENT,
literal: "LGPL-2.1-only",
},
{
token: OR,
literal: "OR",
},
{
token: IDENT,
literal: "MIT",
},
{
token: OR,
literal: "OR",
},
{
token: IDENT,
literal: "BSD-3-Clause",
},
},
},
{
name: "parenthesis",
input: "LGPL-2.1-only AND (MIT OR BSD-3-Clause)",
want: []Token{
{
token: IDENT,
literal: "LGPL-2.1-only",
},
{
token: AND,
literal: "AND",
},
{
token: int('('),
literal: "(",
},
{
token: IDENT,
literal: "MIT",
},
{
token: OR,
literal: "OR",
},
{
token: IDENT,
literal: "BSD-3-Clause",
},
{
token: int(')'),
literal: ")",
},
},
},
{
name: "exception",
input: "LGPL-2.1-only AND GPL-2.0-or-later WITH Bison-exception-2.2",
want: []Token{
{
token: IDENT,
literal: "LGPL-2.1-only",
},
{
token: AND,
literal: "AND",
},
{
token: IDENT,
literal: "GPL-2.0-or-later",
},
{
token: WITH,
literal: "WITH",
},
{
token: IDENT,
literal: "Bison-exception-2.2",
},
},
},
{
name: "plus",
input: "Public Domain+",
want: []Token{
{
token: IDENT,
literal: "Public",
},
{
token: IDENT,
literal: "Domain",
},
{
token: int('+'),
literal: "+",
},
},
},
{
name: "plus in the middle",
input: "ISC+IBM",
want: []Token{
{
token: IDENT,
literal: "ISC+IBM",
},
},
},
{
name: "plus with the parenthesis",
input: "(GPL1.0+)",
want: []Token{
{
token: int('('),
literal: "(",
},
{
token: IDENT,
literal: "GPL1.0",
},
{
token: int('+'),
literal: "+",
},
{
token: int(')'),
literal: ")",
},
},
},
{
name: "utf-8",
input: "GPL1.0+ " + string(byte(0x20)) + "あ🇯🇵" + " and LGPL1.0",
want: []Token{
{
token: IDENT,
literal: "GPL1.0",
},
{
token: int('+'),
literal: "+",
},
{
token: IDENT,
literal: "あ🇯🇵",
},
{
token: AND,
literal: "and",
},
{
token: IDENT,
literal: "LGPL1.0",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
l := NewLexer(strings.NewReader(tt.input))
var got []Token
var lval yySymType
for l.Lex(&lval) != 0 {
got = append(got, lval.token)
lval = yySymType{}
}
require.NoError(t, l.Err())
assert.Equal(t, tt.want, got)
})
}
}

View File

@@ -0,0 +1,2 @@
//go:generate goyacc -o parser_gen.go parser.go.y
package expression

View File

@@ -0,0 +1,73 @@
%{
package expression
%}
%union{
token Token
expr Expression
}
%type<expr> license
%type<expr> simple
%type<expr> plus
%type<expr> compound
%token<token> IDENT OR AND WITH
%left OR
%left AND
%right WITH
%right '+'
%%
license
: compound
{
$$ = $1
if l, ok := yylex.(*Lexer); ok{
l.result = $$
}
}
simple
: IDENT
{
$$ = SimpleExpr{license: $1.literal}
}
| simple IDENT /* e.g. Public Domain */
{
$$ = SimpleExpr{license: $1.String() + " " + $2.literal}
}
plus
: simple '+'
{
$$ = SimpleExpr{license: $1.String(), hasPlus: true}
}
compound
: simple {
$$ = $1
}
| plus {
$$ = $1
}
| compound AND compound /* compound-expression "AND" compound-expression */
{
$$ = CompoundExpr{left: $1, conjunction: $2, right: $3}
}
| compound OR compound /* compound-expression "OR" compound-expression */
{
$$ = CompoundExpr{left: $1, conjunction: $2, right: $3}
}
| compound WITH compound /* simple-expression "WITH" license-exception-id */
{
$$ = CompoundExpr{left: $1, conjunction: $2, right: $3}
}
| '(' compound ')'
{
$$ = $2
}
%%

View File

@@ -1,102 +0,0 @@
package parser
import (
"fmt"
"strings"
"golang.org/x/xerrors"
"github.com/aquasecurity/trivy/pkg/licensing/expression/lexer"
"github.com/aquasecurity/trivy/pkg/licensing/expression/token"
)
var (
ErrInvalidExpression = xerrors.New("invalid expression error")
)
type Parser struct {
lex *lexer.Lexer
normalizeFn []NormalizeFunc
}
type LicenseExpression struct {
Node Node
Operator string
Next *LicenseExpression
}
type Node struct {
License string
LicenseExpression *LicenseExpression
}
type NormalizeFunc func(n string) string
func New(lex *lexer.Lexer) *Parser {
return &Parser{
lex: lex,
}
}
func (p *Parser) RegisterNormalizeFunc(fn ...NormalizeFunc) *Parser {
p.normalizeFn = append(p.normalizeFn, fn...)
return p
}
func (p *Parser) Parse() (*LicenseExpression, error) {
root := &LicenseExpression{}
cursor := root
stack := Stack{}
for tok := p.lex.NextToken(); tok.Type != token.EOF; tok = p.lex.NextToken() {
switch tok.Type {
case token.IDENT:
if cursor.Node.License == "" {
cursor.Node = Node{License: tok.Literal}
} else {
cursor.Node.License = fmt.Sprintf("%s %s", cursor.Node.License, tok.Literal)
}
case token.AND, token.OR:
cursor.Operator = string(tok.Type)
cursor.Next = &LicenseExpression{}
cursor = cursor.Next
case token.LPAREN:
p := Pair{root: root, cursor: cursor, bracket: tok.Type}
stack.Push(p)
root = &LicenseExpression{}
cursor = root
case token.RPAREN:
e := stack.Pop()
if e.bracket == token.LPAREN && tok.Type != token.RPAREN {
return nil, ErrInvalidExpression
}
e.cursor.Node.LicenseExpression = root
cursor = e.cursor
root = e.root
}
}
if !stack.IsEmpty() {
return nil, ErrInvalidExpression
}
return root, nil
}
func (p *Parser) Normalize(l *LicenseExpression) string {
cursor := l
var str string
for ; cursor != nil; cursor = cursor.Next {
str = strings.Join([]string{str, p.normalize(cursor.Node), cursor.Operator}, " ")
}
return strings.TrimSpace(str)
}
func (p *Parser) normalize(n Node) string {
if n.LicenseExpression != nil {
return fmt.Sprintf("( %s )", p.Normalize(n.LicenseExpression))
}
for _, fn := range p.normalizeFn {
n.License = fn(n.License)
}
return n.License
}

View File

@@ -1,189 +0,0 @@
package parser
import (
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/aquasecurity/trivy/pkg/licensing/expression/lexer"
)
func TestParse(t *testing.T) {
tests := []struct {
name string
input string
normFunc []NormalizeFunc
expect *LicenseExpression
expectStr string
expectErr string
}{
{
name: "happy path single license",
input: "Public Domain",
expect: &LicenseExpression{
Node: Node{
License: "Public Domain",
},
},
expectStr: "Public Domain",
},
{
name: "happy path tag:value license",
input: "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2",
expect: &LicenseExpression{
Node: Node{
License: "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2",
},
},
expectStr: "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2",
},
{
name: "happy path single license with norm func",
input: "Public Domain with exception",
expect: &LicenseExpression{
Node: Node{
License: "Public Domain with exception",
},
},
normFunc: []NormalizeFunc{
func(n string) string {
return strings.Replace(n, " ", "_", -1)
},
func(n string) string {
if n == "Public_Domain_with_exception" {
return "Unlicense"
}
return n
},
},
expectStr: "Unlicense",
},
{
name: "happy path 2",
input: "Public ._+-",
expect: &LicenseExpression{
Node: Node{
License: "Public ._+-",
},
},
expectStr: "Public ._+-",
},
{
name: "happy path multi license",
input: "Public Domain AND ( GPLv2+ or AFL ) AND LGPLv2+ with distribution exceptions",
expect: &LicenseExpression{
Node: Node{
License: "Public Domain",
},
Operator: "AND",
Next: &LicenseExpression{
Node: Node{
LicenseExpression: &LicenseExpression{
Node: Node{
License: "GPLv2+",
},
Operator: "OR",
Next: &LicenseExpression{
Node: Node{
License: "AFL",
},
},
},
},
Operator: "AND",
Next: &LicenseExpression{
Node: Node{
License: "LGPLv2+ with distribution exceptions",
},
},
},
},
expectStr: "Public Domain AND ( GPLv2+ OR AFL ) AND LGPLv2+ with distribution exceptions",
},
{
name: "happy path nested license",
input: "Public Domain AND ( GPLv2+ or AFL AND ( CC0 or LGPL1.0) )",
expect: &LicenseExpression{
Node: Node{
License: "Public Domain",
},
Operator: "AND",
Next: &LicenseExpression{
Node: Node{
LicenseExpression: &LicenseExpression{
Node: Node{
License: "GPLv2+",
},
Operator: "OR",
Next: &LicenseExpression{
Node: Node{
License: "AFL",
},
Operator: "AND",
Next: &LicenseExpression{
Node: Node{
LicenseExpression: &LicenseExpression{
Node: Node{
License: "CC0",
},
Operator: "OR",
Next: &LicenseExpression{
Node: Node{
License: "LGPL1.0",
},
},
},
},
},
},
},
},
},
},
expectStr: "Public Domain AND ( GPLv2+ OR AFL AND ( CC0 OR LGPL1.0 ) )",
},
{
name: "happy path 2",
input: "( GPLv2+ or CC0 )",
expect: &LicenseExpression{
Node: Node{
LicenseExpression: &LicenseExpression{
Node: Node{
License: "GPLv2+",
},
Operator: "OR",
Next: &LicenseExpression{
Node: Node{
License: "CC0",
},
},
},
},
},
expectStr: "( GPLv2+ OR CC0 )",
},
{
name: "bad path close bracket not found",
input: "Public Domain AND ( GPLv2+ ",
expectErr: "invalid expression error",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
l := lexer.New(tt.input)
p := New(l).RegisterNormalizeFunc(tt.normFunc...)
got, err := p.Parse()
if tt.expectErr != "" {
assert.Equal(t, err.Error(), tt.expectErr)
return
}
require.NoError(t, err)
assert.Equal(t, tt.expect, got)
assert.Equal(t, tt.expectStr, p.Normalize(got))
})
}
}

View File

@@ -1,28 +0,0 @@
package parser
import (
"github.com/aquasecurity/trivy/pkg/licensing/expression/token"
)
type Pair struct {
root *LicenseExpression
cursor *LicenseExpression
bracket token.TokenType
}
type Stack []Pair
func (s *Stack) Push(x Pair) {
*s = append(*s, x)
}
func (s *Stack) Pop() Pair {
l := len(*s)
x := (*s)[l-1]
*s = (*s)[:l-1]
return x
}
func (s *Stack) IsEmpty() bool {
return len(*s) == 0
}

View File

@@ -0,0 +1,507 @@
// Code generated by goyacc -o parser_gen.go parser.go.y. DO NOT EDIT.
//line parser.go.y:2
package expression
import __yyfmt__ "fmt"
//line parser.go.y:2
//line parser.go.y:5
type yySymType struct {
yys int
token Token
expr Expression
}
const IDENT = 57346
const OR = 57347
const AND = 57348
const WITH = 57349
var yyToknames = [...]string{
"$end",
"error",
"$unk",
"IDENT",
"OR",
"AND",
"WITH",
"'+'",
"'('",
"')'",
}
var yyStatenames = [...]string{}
const yyEofCode = 1
const yyErrCode = 2
const yyInitialStackSize = 16
//line parser.go.y:73
//line yacctab:1
var yyExca = [...]int8{
-1, 1,
1, -1,
-2, 0,
}
const yyPrivate = 57344
const yyLast = 22
var yyAct = [...]int8{
8, 7, 9, 2, 10, 16, 9, 4, 11, 12,
6, 13, 14, 15, 3, 5, 8, 7, 9, 7,
9, 1,
}
var yyPact = [...]int16{
6, -1000, 11, 0, -1000, 6, -1000, 6, 6, 6,
-1000, -1000, -5, -1, 13, -1, -1000,
}
var yyPgo = [...]int8{
0, 21, 14, 7, 3,
}
var yyR1 = [...]int8{
0, 1, 2, 2, 3, 4, 4, 4, 4, 4,
4,
}
var yyR2 = [...]int8{
0, 1, 1, 2, 2, 1, 1, 3, 3, 3,
3,
}
var yyChk = [...]int16{
-1000, -1, -4, -2, -3, 9, 4, 6, 5, 7,
4, 8, -4, -4, -4, -4, 10,
}
var yyDef = [...]int8{
0, -2, 1, 5, 6, 0, 2, 0, 0, 0,
3, 4, 0, 7, 8, 9, 10,
}
var yyTok1 = [...]int8{
1, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
9, 10, 3, 8,
}
var yyTok2 = [...]int8{
2, 3, 4, 5, 6, 7,
}
var yyTok3 = [...]int8{
0,
}
var yyErrorMessages = [...]struct {
state int
token int
msg string
}{}
//line yaccpar:1
/* parser for yacc output */
var (
yyDebug = 0
yyErrorVerbose = false
)
type yyLexer interface {
Lex(lval *yySymType) int
Error(s string)
}
type yyParser interface {
Parse(yyLexer) int
Lookahead() int
}
type yyParserImpl struct {
lval yySymType
stack [yyInitialStackSize]yySymType
char int
}
func (p *yyParserImpl) Lookahead() int {
return p.char
}
func yyNewParser() yyParser {
return &yyParserImpl{}
}
const yyFlag = -1000
func yyTokname(c int) string {
if c >= 1 && c-1 < len(yyToknames) {
if yyToknames[c-1] != "" {
return yyToknames[c-1]
}
}
return __yyfmt__.Sprintf("tok-%v", c)
}
func yyStatname(s int) string {
if s >= 0 && s < len(yyStatenames) {
if yyStatenames[s] != "" {
return yyStatenames[s]
}
}
return __yyfmt__.Sprintf("state-%v", s)
}
func yyErrorMessage(state, lookAhead int) string {
const TOKSTART = 4
if !yyErrorVerbose {
return "syntax error"
}
for _, e := range yyErrorMessages {
if e.state == state && e.token == lookAhead {
return "syntax error: " + e.msg
}
}
res := "syntax error: unexpected " + yyTokname(lookAhead)
// To match Bison, suggest at most four expected tokens.
expected := make([]int, 0, 4)
// Look for shiftable tokens.
base := int(yyPact[state])
for tok := TOKSTART; tok-1 < len(yyToknames); tok++ {
if n := base + tok; n >= 0 && n < yyLast && int(yyChk[int(yyAct[n])]) == tok {
if len(expected) == cap(expected) {
return res
}
expected = append(expected, tok)
}
}
if yyDef[state] == -2 {
i := 0
for yyExca[i] != -1 || int(yyExca[i+1]) != state {
i += 2
}
// Look for tokens that we accept or reduce.
for i += 2; yyExca[i] >= 0; i += 2 {
tok := int(yyExca[i])
if tok < TOKSTART || yyExca[i+1] == 0 {
continue
}
if len(expected) == cap(expected) {
return res
}
expected = append(expected, tok)
}
// If the default action is to accept or reduce, give up.
if yyExca[i+1] != 0 {
return res
}
}
for i, tok := range expected {
if i == 0 {
res += ", expecting "
} else {
res += " or "
}
res += yyTokname(tok)
}
return res
}
func yylex1(lex yyLexer, lval *yySymType) (char, token int) {
token = 0
char = lex.Lex(lval)
if char <= 0 {
token = int(yyTok1[0])
goto out
}
if char < len(yyTok1) {
token = int(yyTok1[char])
goto out
}
if char >= yyPrivate {
if char < yyPrivate+len(yyTok2) {
token = int(yyTok2[char-yyPrivate])
goto out
}
}
for i := 0; i < len(yyTok3); i += 2 {
token = int(yyTok3[i+0])
if token == char {
token = int(yyTok3[i+1])
goto out
}
}
out:
if token == 0 {
token = int(yyTok2[1]) /* unknown char */
}
if yyDebug >= 3 {
__yyfmt__.Printf("lex %s(%d)\n", yyTokname(token), uint(char))
}
return char, token
}
func yyParse(yylex yyLexer) int {
return yyNewParser().Parse(yylex)
}
func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int {
var yyn int
var yyVAL yySymType
var yyDollar []yySymType
_ = yyDollar // silence set and not used
yyS := yyrcvr.stack[:]
Nerrs := 0 /* number of errors */
Errflag := 0 /* error recovery flag */
yystate := 0
yyrcvr.char = -1
yytoken := -1 // yyrcvr.char translated into internal numbering
defer func() {
// Make sure we report no lookahead when not parsing.
yystate = -1
yyrcvr.char = -1
yytoken = -1
}()
yyp := -1
goto yystack
ret0:
return 0
ret1:
return 1
yystack:
/* put a state and value onto the stack */
if yyDebug >= 4 {
__yyfmt__.Printf("char %v in %v\n", yyTokname(yytoken), yyStatname(yystate))
}
yyp++
if yyp >= len(yyS) {
nyys := make([]yySymType, len(yyS)*2)
copy(nyys, yyS)
yyS = nyys
}
yyS[yyp] = yyVAL
yyS[yyp].yys = yystate
yynewstate:
yyn = int(yyPact[yystate])
if yyn <= yyFlag {
goto yydefault /* simple state */
}
if yyrcvr.char < 0 {
yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval)
}
yyn += yytoken
if yyn < 0 || yyn >= yyLast {
goto yydefault
}
yyn = int(yyAct[yyn])
if int(yyChk[yyn]) == yytoken { /* valid shift */
yyrcvr.char = -1
yytoken = -1
yyVAL = yyrcvr.lval
yystate = yyn
if Errflag > 0 {
Errflag--
}
goto yystack
}
yydefault:
/* default state action */
yyn = int(yyDef[yystate])
if yyn == -2 {
if yyrcvr.char < 0 {
yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval)
}
/* look through exception table */
xi := 0
for {
if yyExca[xi+0] == -1 && int(yyExca[xi+1]) == yystate {
break
}
xi += 2
}
for xi += 2; ; xi += 2 {
yyn = int(yyExca[xi+0])
if yyn < 0 || yyn == yytoken {
break
}
}
yyn = int(yyExca[xi+1])
if yyn < 0 {
goto ret0
}
}
if yyn == 0 {
/* error ... attempt to resume parsing */
switch Errflag {
case 0: /* brand new error */
yylex.Error(yyErrorMessage(yystate, yytoken))
Nerrs++
if yyDebug >= 1 {
__yyfmt__.Printf("%s", yyStatname(yystate))
__yyfmt__.Printf(" saw %s\n", yyTokname(yytoken))
}
fallthrough
case 1, 2: /* incompletely recovered error ... try again */
Errflag = 3
/* find a state where "error" is a legal shift action */
for yyp >= 0 {
yyn = int(yyPact[yyS[yyp].yys]) + yyErrCode
if yyn >= 0 && yyn < yyLast {
yystate = int(yyAct[yyn]) /* simulate a shift of "error" */
if int(yyChk[yystate]) == yyErrCode {
goto yystack
}
}
/* the current p has no shift on "error", pop stack */
if yyDebug >= 2 {
__yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys)
}
yyp--
}
/* there is no state on the stack with an error shift ... abort */
goto ret1
case 3: /* no shift yet; clobber input char */
if yyDebug >= 2 {
__yyfmt__.Printf("error recovery discards %s\n", yyTokname(yytoken))
}
if yytoken == yyEofCode {
goto ret1
}
yyrcvr.char = -1
yytoken = -1
goto yynewstate /* try again in the same state */
}
}
/* reduction by production yyn */
if yyDebug >= 2 {
__yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate))
}
yynt := yyn
yypt := yyp
_ = yypt // guard against "declared and not used"
yyp -= int(yyR2[yyn])
// yyp is now the index of $0. Perform the default action. Iff the
// reduced production is ε, $1 is possibly out of range.
if yyp+1 >= len(yyS) {
nyys := make([]yySymType, len(yyS)*2)
copy(nyys, yyS)
yyS = nyys
}
yyVAL = yyS[yyp+1]
/* consult goto table to find next state */
yyn = int(yyR1[yyn])
yyg := int(yyPgo[yyn])
yyj := yyg + yyS[yyp].yys + 1
if yyj >= yyLast {
yystate = int(yyAct[yyg])
} else {
yystate = int(yyAct[yyj])
if int(yyChk[yystate]) != -yyn {
yystate = int(yyAct[yyg])
}
}
// dummy call; replaced with literal code
switch yynt {
case 1:
yyDollar = yyS[yypt-1 : yypt+1]
//line parser.go.y:25
{
yyVAL.expr = yyDollar[1].expr
if l, ok := yylex.(*Lexer); ok {
l.result = yyVAL.expr
}
}
case 2:
yyDollar = yyS[yypt-1 : yypt+1]
//line parser.go.y:34
{
yyVAL.expr = SimpleExpr{license: yyDollar[1].token.literal}
}
case 3:
yyDollar = yyS[yypt-2 : yypt+1]
//line parser.go.y:38
{
yyVAL.expr = SimpleExpr{license: yyDollar[1].expr.String() + " " + yyDollar[2].token.literal}
}
case 4:
yyDollar = yyS[yypt-2 : yypt+1]
//line parser.go.y:44
{
yyVAL.expr = SimpleExpr{license: yyDollar[1].expr.String(), hasPlus: true}
}
case 5:
yyDollar = yyS[yypt-1 : yypt+1]
//line parser.go.y:49
{
yyVAL.expr = yyDollar[1].expr
}
case 6:
yyDollar = yyS[yypt-1 : yypt+1]
//line parser.go.y:52
{
yyVAL.expr = yyDollar[1].expr
}
case 7:
yyDollar = yyS[yypt-3 : yypt+1]
//line parser.go.y:56
{
yyVAL.expr = CompoundExpr{left: yyDollar[1].expr, conjunction: yyDollar[2].token, right: yyDollar[3].expr}
}
case 8:
yyDollar = yyS[yypt-3 : yypt+1]
//line parser.go.y:60
{
yyVAL.expr = CompoundExpr{left: yyDollar[1].expr, conjunction: yyDollar[2].token, right: yyDollar[3].expr}
}
case 9:
yyDollar = yyS[yypt-3 : yypt+1]
//line parser.go.y:64
{
yyVAL.expr = CompoundExpr{left: yyDollar[1].expr, conjunction: yyDollar[2].token, right: yyDollar[3].expr}
}
case 10:
yyDollar = yyS[yypt-3 : yypt+1]
//line parser.go.y:68
{
yyVAL.expr = yyDollar[2].expr
}
}
goto yystack /* stack new state and value */
}

View File

@@ -0,0 +1,156 @@
package expression
import (
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestParse(t *testing.T) {
tests := []struct {
name string
input string
want Expression
wantStr string
wantErr string
}{
{
name: "single license",
input: "Public Domain",
want: SimpleExpr{
license: "Public Domain",
},
wantStr: "Public Domain",
},
{
name: "tag:value license",
input: "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2",
want: SimpleExpr{
license: "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2",
},
wantStr: "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2",
},
{
name: "symbols",
input: "Public ._-+",
want: SimpleExpr{
license: "Public ._-",
hasPlus: true,
},
wantStr: "Public ._-+",
},
{
name: "multi licenses",
input: "Public Domain AND ( GPLv2+ or AFL ) AND LGPLv2+ with distribution exceptions",
want: CompoundExpr{
left: CompoundExpr{
left: SimpleExpr{
license: "Public Domain",
},
conjunction: Token{
token: AND,
literal: "AND",
},
right: CompoundExpr{
left: SimpleExpr{
license: "GPLv2",
hasPlus: true,
},
conjunction: Token{
token: OR,
literal: "or",
},
right: SimpleExpr{
license: "AFL",
},
},
},
conjunction: Token{
token: AND,
literal: "AND",
},
right: CompoundExpr{
left: SimpleExpr{
license: "LGPLv2",
hasPlus: true,
},
conjunction: Token{
token: WITH,
literal: "with",
},
right: SimpleExpr{
license: "distribution exceptions",
},
},
},
wantStr: "Public Domain AND (GPLv2+ or AFL) AND LGPLv2+ with distribution exceptions",
},
{
name: "nested licenses",
input: "Public Domain AND ( GPLv2+ or AFL AND ( CC0 or LGPL1.0) )",
want: CompoundExpr{
left: SimpleExpr{
license: "Public Domain",
},
conjunction: Token{
token: AND,
literal: "AND",
},
right: CompoundExpr{
left: SimpleExpr{
license: "GPLv2",
hasPlus: true,
},
conjunction: Token{
token: OR,
literal: "or",
},
right: CompoundExpr{
left: SimpleExpr{
license: "AFL",
},
conjunction: Token{
token: AND,
literal: "AND",
},
right: CompoundExpr{
left: SimpleExpr{
license: "CC0",
},
conjunction: Token{
token: OR,
literal: "or",
},
right: SimpleExpr{
license: "LGPL1.0",
},
},
},
},
},
wantStr: "Public Domain AND (GPLv2+ or AFL AND (CC0 or LGPL1.0))",
},
{
name: "bad path close bracket not found",
input: "Public Domain AND ( GPLv2+ ",
wantErr: "syntax error",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
l := NewLexer(strings.NewReader(tt.input))
ret := yyParse(l)
err := l.Err()
if tt.wantErr != "" {
assert.Equal(t, ret, 1)
assert.ErrorContains(t, err, tt.wantErr)
return
}
require.NoError(t, err)
assert.Equal(t, tt.want, l.result)
assert.Equal(t, tt.wantStr, l.result.String())
})
}
}

View File

@@ -1,37 +0,0 @@
package token
import (
"strings"
)
const (
ILLEGAL = "ILLEGAL"
EOF = "EOF"
IDENT = "IDENT"
LPAREN = "("
RPAREN = ")"
AND = "AND"
OR = "OR"
)
var keywords = map[string]TokenType{
"AND": AND,
"OR": OR,
}
type TokenType string
type Token struct {
Type TokenType
Literal string
}
func LookupIdent(ident string) TokenType {
if tok, ok := keywords[strings.ToUpper(ident)]; ok {
return tok
}
return IDENT
}

View File

@@ -0,0 +1,83 @@
package expression
import (
"fmt"
"golang.org/x/exp/slices"
"github.com/aquasecurity/trivy/pkg/licensing"
)
var versioned = []string{
licensing.AGPL10,
licensing.AGPL30,
licensing.GFDL11WithInvariants,
licensing.GFDL11NoInvariants,
licensing.GFDL11,
licensing.GFDL12WithInvariants,
licensing.GFDL12NoInvariants,
licensing.GFDL12,
licensing.GFDL13WithInvariants,
licensing.GFDL13NoInvariants,
licensing.GFDL13,
licensing.GPL10,
licensing.GPL20,
licensing.GPL30,
licensing.LGPL20,
licensing.LGPL21,
licensing.LGPL30,
}
type Expression interface {
String() string
}
type Token struct {
token int
literal string
}
type SimpleExpr struct {
license string
hasPlus bool
}
func (s SimpleExpr) String() string {
if slices.Contains(versioned, s.license) {
if s.hasPlus {
// e.g. AGPL-1.0-or-later
return s.license + "-or-later"
}
// e.g. GPL-1.0-only
return s.license + "-only"
}
if s.hasPlus {
return s.license + "+"
}
return s.license
}
type CompoundExpr struct {
left Expression
conjunction Token
right Expression
}
func (c CompoundExpr) String() string {
left := c.left.String()
if l, ok := c.left.(CompoundExpr); ok {
// e.g. (A OR B) AND C
if c.conjunction.token > l.conjunction.token {
left = fmt.Sprintf("(%s)", left)
}
}
right := c.right.String()
if r, ok := c.right.(CompoundExpr); ok {
// e.g. A AND (B OR C)
if c.conjunction.token > r.conjunction.token {
right = fmt.Sprintf("(%s)", right)
}
}
return fmt.Sprintf("%s %s %s", left, c.conjunction.literal, right)
}

View File

@@ -0,0 +1,172 @@
state 0
$accept: .license $end
IDENT shift 6
'(' shift 5
. error
license goto 1
simple goto 3
plus goto 4
compound goto 2
state 1
$accept: license.$end
$end accept
. error
state 2
license: compound. (1)
compound: compound.AND compound
compound: compound.OR compound
compound: compound.WITH compound
OR shift 8
AND shift 7
WITH shift 9
. reduce 1 (src line 23)
state 3
simple: simple.IDENT
plus: simple.'+'
compound: simple. (5)
IDENT shift 10
'+' shift 11
. reduce 5 (src line 48)
state 4
compound: plus. (6)
. reduce 6 (src line 52)
state 5
compound: '('.compound ')'
IDENT shift 6
'(' shift 5
. error
simple goto 3
plus goto 4
compound goto 12
state 6
simple: IDENT. (2)
. reduce 2 (src line 32)
state 7
compound: compound AND.compound
IDENT shift 6
'(' shift 5
. error
simple goto 3
plus goto 4
compound goto 13
state 8
compound: compound OR.compound
IDENT shift 6
'(' shift 5
. error
simple goto 3
plus goto 4
compound goto 14
state 9
compound: compound WITH.compound
IDENT shift 6
'(' shift 5
. error
simple goto 3
plus goto 4
compound goto 15
state 10
simple: simple IDENT. (3)
. reduce 3 (src line 37)
state 11
plus: simple '+'. (4)
. reduce 4 (src line 42)
state 12
compound: compound.AND compound
compound: compound.OR compound
compound: compound.WITH compound
compound: '(' compound.')'
OR shift 8
AND shift 7
WITH shift 9
')' shift 16
. error
state 13
compound: compound.AND compound
compound: compound AND compound. (7)
compound: compound.OR compound
compound: compound.WITH compound
WITH shift 9
. reduce 7 (src line 55)
state 14
compound: compound.AND compound
compound: compound.OR compound
compound: compound OR compound. (8)
compound: compound.WITH compound
AND shift 7
WITH shift 9
. reduce 8 (src line 59)
state 15
compound: compound.AND compound
compound: compound.OR compound
compound: compound.WITH compound
compound: compound WITH compound. (9)
WITH shift 9
. reduce 9 (src line 63)
state 16
compound: '(' compound ')'. (10)
. reduce 10 (src line 67)
10 terminals, 5 nonterminals
11 grammar rules, 17/16000 states
0 shift/reduce, 0 reduce/reduce conflicts reported
54 working sets used
memory: parser 15/240000
13 extra closures
23 shift entries, 1 exceptions
8 goto entries
8 entries saved by goto default
Optimizer space used: output 22/240000
22 table entries, 0 zero
maximum spread: 10, maximum offset: 9

View File

@@ -1,6 +1,8 @@
package licensing package licensing
import "strings" import (
"strings"
)
var mapping = map[string]string{ var mapping = map[string]string{
// GPL // GPL
@@ -14,6 +16,7 @@ var mapping = map[string]string{
"GPL-2": GPL20, "GPL-2": GPL20,
"GPL-2.0-ONLY": GPL20, "GPL-2.0-ONLY": GPL20,
"GPL2+": GPL20, "GPL2+": GPL20,
"GPLV2": GPL20,
"GPLV2+": GPL20, "GPLV2+": GPL20,
"GPL-2+": GPL20, "GPL-2+": GPL20,
"GPL-2.0+": GPL20, "GPL-2.0+": GPL20,
@@ -23,6 +26,7 @@ var mapping = map[string]string{
"GPL3": GPL30, "GPL3": GPL30,
"GPL 3.0": GPL30, "GPL 3.0": GPL30,
"GPL 3": GPL30, "GPL 3": GPL30,
"GPLV3": GPL30,
"GPLV3+": GPL30, "GPLV3+": GPL30,
"GPL-3": GPL30, "GPL-3": GPL30,
"GPL-3.0-ONLY": GPL30, "GPL-3.0-ONLY": GPL30,

View File

@@ -8,6 +8,7 @@ import (
"github.com/google/uuid" "github.com/google/uuid"
"github.com/mitchellh/hashstructure/v2" "github.com/mitchellh/hashstructure/v2"
"github.com/samber/lo"
"github.com/spdx/tools-golang/spdx" "github.com/spdx/tools-golang/spdx"
"golang.org/x/xerrors" "golang.org/x/xerrors"
"k8s.io/utils/clock" "k8s.io/utils/clock"
@@ -15,6 +16,7 @@ import (
ftypes "github.com/aquasecurity/trivy/pkg/fanal/types" ftypes "github.com/aquasecurity/trivy/pkg/fanal/types"
"github.com/aquasecurity/trivy/pkg/licensing" "github.com/aquasecurity/trivy/pkg/licensing"
"github.com/aquasecurity/trivy/pkg/licensing/expression" "github.com/aquasecurity/trivy/pkg/licensing/expression"
"github.com/aquasecurity/trivy/pkg/log"
"github.com/aquasecurity/trivy/pkg/purl" "github.com/aquasecurity/trivy/pkg/purl"
"github.com/aquasecurity/trivy/pkg/scanner/utils" "github.com/aquasecurity/trivy/pkg/scanner/utils"
"github.com/aquasecurity/trivy/pkg/types" "github.com/aquasecurity/trivy/pkg/types"
@@ -362,11 +364,20 @@ func GetLicense(p ftypes.Package) string {
return "NONE" return "NONE"
} }
return expression.Normalize( license := strings.Join(lo.Map(p.Licenses, func(license string, index int) string {
expression.Join(p.Licenses, expression.AND), // e.g. GPL-3.0-with-autoconf-exception
licensing.Normalize, license = strings.ReplaceAll(license, "-with-", " WITH ")
expression.NormalizeForSPDX, license = strings.ReplaceAll(license, "-WITH-", " WITH ")
)
return fmt.Sprintf("(%s)", license)
}), " AND ")
s, err := expression.Normalize(license, licensing.Normalize, expression.NormalizeForSPDX)
if err != nil {
// Not fail on the invalid license
log.Logger.Warnf("Unable to marshal SPDX licenses %q", license)
return ""
}
return s
} }
func getDocumentNamespace(r types.Report, m *Marshaler) string { func getDocumentNamespace(r types.Report, m *Marshaler) string {

View File

@@ -176,8 +176,8 @@ func TestMarshaler_Marshal(t *testing.T) {
PackageSPDXIdentifier: spdx.ElementID("Package-fd0dc3cf913d5bc3"), PackageSPDXIdentifier: spdx.ElementID("Package-fd0dc3cf913d5bc3"),
PackageName: "binutils", PackageName: "binutils",
PackageVersion: "2.30", PackageVersion: "2.30",
PackageLicenseConcluded: "GPL-3.0", PackageLicenseConcluded: "GPL-3.0-or-later",
PackageLicenseDeclared: "GPL-3.0", PackageLicenseDeclared: "GPL-3.0-or-later",
PackageExternalReferences: []*spdx.PackageExternalReference2_2{ PackageExternalReferences: []*spdx.PackageExternalReference2_2{
{ {
Category: tspdx.CategoryPackageManager, Category: tspdx.CategoryPackageManager,
@@ -338,8 +338,8 @@ func TestMarshaler_Marshal(t *testing.T) {
PackageSPDXIdentifier: spdx.ElementID("Package-d8dccb186bafaf37"), PackageSPDXIdentifier: spdx.ElementID("Package-d8dccb186bafaf37"),
PackageName: "acl", PackageName: "acl",
PackageVersion: "2.2.53", PackageVersion: "2.2.53",
PackageLicenseConcluded: "GPL-2.0", PackageLicenseConcluded: "GPL-2.0-or-later",
PackageLicenseDeclared: "GPL-2.0", PackageLicenseDeclared: "GPL-2.0-or-later",
PackageExternalReferences: []*spdx.PackageExternalReference2_2{ PackageExternalReferences: []*spdx.PackageExternalReference2_2{
{ {
Category: tspdx.CategoryPackageManager, Category: tspdx.CategoryPackageManager,
@@ -700,7 +700,7 @@ func Test_GetLicense(t *testing.T) {
"GPLv2+", "GPLv2+",
}, },
}, },
want: "GPL-2.0", want: "GPL-2.0-or-later",
}, },
{ {
name: "happy path with multi license", name: "happy path with multi license",
@@ -710,7 +710,7 @@ func Test_GetLicense(t *testing.T) {
"GPLv3+", "GPLv3+",
}, },
}, },
want: "GPL-2.0 AND GPL-3.0", want: "GPL-2.0-or-later AND GPL-3.0-or-later",
}, },
{ {
name: "happy path with OR operator", name: "happy path with OR operator",
@@ -720,7 +720,7 @@ func Test_GetLicense(t *testing.T) {
"LGPL 2.0 or GNU LESSER", "LGPL 2.0 or GNU LESSER",
}, },
}, },
want: "GPL-2.0 AND ( LGPL-2.0 OR LGPL-3.0 )", want: "GPL-2.0-or-later AND (LGPL-2.0-only OR LGPL-3.0-only)",
}, },
{ {
name: "happy path with AND operator", name: "happy path with AND operator",
@@ -730,7 +730,7 @@ func Test_GetLicense(t *testing.T) {
"LGPL 2.0 and GNU LESSER", "LGPL 2.0 and GNU LESSER",
}, },
}, },
want: "GPL-2.0 AND LGPL-2.0 AND LGPL-3.0", want: "GPL-2.0-or-later AND LGPL-2.0-only AND LGPL-3.0-only",
}, },
{ {
name: "happy path with WITH operator", name: "happy path with WITH operator",