refactor: add case-insensitive string set implementation (#9720)

This commit is contained in:
Teppei Fukuda
2025-10-29 13:25:35 +04:00
committed by GitHub
parent 758f271040
commit 89fc7b65a5
7 changed files with 563 additions and 40 deletions

View File

@@ -5,7 +5,6 @@ import (
"debug/buildinfo"
"fmt"
"runtime/debug"
"slices"
"sort"
"strings"
@@ -18,12 +17,16 @@ import (
"github.com/aquasecurity/trivy/pkg/dependency"
ftypes "github.com/aquasecurity/trivy/pkg/fanal/types"
"github.com/aquasecurity/trivy/pkg/log"
"github.com/aquasecurity/trivy/pkg/set"
xio "github.com/aquasecurity/trivy/pkg/x/io"
)
var (
ErrUnrecognizedExe = xerrors.New("unrecognized executable format")
ErrNonGoBinary = xerrors.New("non go binary")
// defaultVersionPrefixes contains common prefixes used in -ldflags version keys
defaultVersionPrefixes = set.NewCaseInsensitive("main", "common", "version", "cmd")
)
// convertError detects buildinfo.errUnrecognizedFormat and convert to
@@ -187,15 +190,9 @@ func (p *Parser) ParseLDFlags(name string, flags []string) string {
// foundVersions doesn't contain duplicates. Versions are filled into first corresponding category.
// Possible elements(categories):
// [0]: Versions using format `github.com/<module_owner>/<module_name>/cmd/**/*.<version>=x.x.x`
// [1]: Versions that use prefixes from `defaultPrefixes`
// [1]: Versions that use prefixes from `defaultVersionPrefixes`
// [2]: Other versions
var foundVersions = make([][]string, 3)
defaultPrefixes := []string{
"main",
"common",
"version",
"cmd",
}
for key, val := range x {
// It's valid to set the -X flags with quotes so we trim any that might
// have been provided: Ex:
@@ -212,7 +209,7 @@ func (p *Parser) ParseLDFlags(name string, flags []string) string {
switch {
case strings.HasPrefix(key, name+"/cmd/"):
foundVersions[0] = append(foundVersions[0], val)
case slices.Contains(defaultPrefixes, strings.ToLower(versionPrefix(key))):
case defaultVersionPrefixes.Contains(versionPrefix(key)):
foundVersions[1] = append(foundVersions[1], val)
default:
foundVersions[2] = append(foundVersions[2], val)

View File

@@ -6,7 +6,6 @@ import (
"math"
"os"
"path/filepath"
"slices"
"strings"
"golang.org/x/xerrors"
@@ -15,6 +14,7 @@ import (
"github.com/aquasecurity/trivy/pkg/fanal/types"
"github.com/aquasecurity/trivy/pkg/licensing"
"github.com/aquasecurity/trivy/pkg/log"
"github.com/aquasecurity/trivy/pkg/set"
xio "github.com/aquasecurity/trivy/pkg/x/io"
)
@@ -36,7 +36,7 @@ var (
"usr/src/wordpress",
}
acceptedExtensions = []string{
acceptedExtensions = set.NewCaseInsensitive(
".asp",
".aspx",
".bas",
@@ -68,13 +68,13 @@ var (
".txt",
".vue",
".zsh",
}
)
acceptedFileNames = []string{
acceptedFileNames = set.NewCaseInsensitive(
"license",
"licence",
"copyright",
}
)
)
func init() {
@@ -122,13 +122,11 @@ func (a *licenseFileAnalyzer) Required(filePath string, _ os.FileInfo) bool {
return false
}
}
ext := strings.ToLower(filepath.Ext(filePath))
if slices.Contains(acceptedExtensions, ext) {
if acceptedExtensions.Contains(filepath.Ext(filePath)) {
return true
}
baseName := strings.ToLower(filepath.Base(filePath))
return slices.Contains(acceptedFileNames, baseName)
return acceptedFileNames.Contains(filepath.Base(filePath))
}
func isHumanReadable(content xio.ReadSeekerAt, fileSize int64) (bool, error) {

View File

@@ -4,8 +4,6 @@ import (
"fmt"
"io/fs"
"path"
"slices"
"strings"
"github.com/open-policy-agent/opa/v1/loader"
"github.com/open-policy-agent/opa/v1/storage"
@@ -14,6 +12,8 @@ import (
"github.com/aquasecurity/trivy/pkg/set"
)
var dataFileExtensions = set.NewCaseInsensitive(".yaml", ".yml", ".json")
// initialize a store populated with OPA data files found in dataPaths
func initStore(dataFS fs.FS, dataPaths, namespaces []string) (storage.Store, error) {
dataFiles := set.New[string]()
@@ -55,9 +55,5 @@ func initStore(dataFS fs.FS, dataPaths, namespaces []string) (storage.Store, err
}
func isDataFile(filePath string) bool {
return slices.Contains([]string{
".yaml",
".yml",
".json",
}, strings.ToLower(path.Ext(filePath)))
return dataFileExtensions.Contains(path.Ext(filePath))
}

View File

@@ -5,13 +5,13 @@ import (
"encoding/json/v2"
"fmt"
"maps"
"slices"
"strings"
"time"
"github.com/samber/lo"
"github.com/aquasecurity/trivy/pkg/iac/types"
"github.com/aquasecurity/trivy/pkg/set"
)
type EvalContext struct{}
@@ -42,6 +42,8 @@ var NullValue = Value{
Kind: KindNull,
}
var boolTrueValues = set.NewCaseInsensitive("true", "1", "yes", "on", "enabled")
func NewValue(value any, metadata types.Metadata) Value {
v := Value{
@@ -207,14 +209,7 @@ func (v Value) AsIntValue(defaultValue int, metadata types.Metadata) types.IntVa
func (v Value) AsBoolValue(defaultValue bool, metadata types.Metadata) types.BoolValue {
v.Resolve()
if v.Kind == KindString {
possibleValue := strings.ToLower(v.rLit.(string))
if slices.Contains([]string{
"true",
"1",
"yes",
"on",
"enabled",
}, possibleValue) {
if boolTrueValues.Contains(v.rLit.(string)) {
return types.Bool(true, metadata)
}
}

View File

@@ -373,7 +373,7 @@ var (
}
)
var spdxLicenses = set.New[string]()
var spdxLicenses = set.NewCaseInsensitive()
//go:embed licenses.json
var licenses []byte
@@ -389,10 +389,8 @@ var initSpdxLicenses = sync.OnceFunc(func() {
return
}
// SPDX license list is case-insensitive. Store in upper case for simplicity.
spdxLicenses.Append(lo.Map(lics, func(l string, _ int) string {
return strings.ToUpper(l)
})...)
// SPDX license list is case-insensitive.
spdxLicenses.Append(lics...)
})
//go:embed exceptions.json
@@ -419,7 +417,7 @@ var initSpdxExceptions = sync.OnceFunc(func() {
func ValidateSPDXLicense(license string) bool {
initSpdxLicenses()
return spdxLicenses.Contains(strings.ToUpper(license))
return spdxLicenses.Contains(license)
}
// ValidateSPDXException returns true if SPDX exception list contain exceptionID

106
pkg/set/case_insensitive.go Normal file
View File

@@ -0,0 +1,106 @@
package set
import (
"iter"
"maps"
"slices"
"strings"
)
// caseInsensitiveStringSet represents a case-insensitive string set implementation
// It stores strings with case-insensitive comparison while preserving the original casing
// of the first occurrence of each unique string (case-insensitive).
// The map key is the lowercase version, and the value is the original string.
// WARNING: This implementation is not thread-safe
type caseInsensitiveStringSet map[string]string //nolint: gocritic
// NewCaseInsensitive creates a new empty case-insensitive string set with optional initial values
// The first occurrence of each unique string (case-insensitive) will be preserved.
// For example: NewCaseInsensitive("Hello", "HELLO", "world") will contain "Hello" and "world"
func NewCaseInsensitive(values ...string) Set[string] {
s := make(caseInsensitiveStringSet, len(values))
s.Append(values...)
return s
}
// Append adds multiple items to the set and returns the new size
// If an item already exists (case-insensitive), it will not be added again
// and the original casing is preserved
func (s caseInsensitiveStringSet) Append(values ...string) int {
for _, v := range values {
key := strings.ToLower(v)
if _, exists := s[key]; !exists {
s[key] = v
}
}
return len(s)
}
// Remove removes an item from the set (case-insensitive)
func (s caseInsensitiveStringSet) Remove(item string) {
delete(s, strings.ToLower(item))
}
// Contains checks if an item exists in the set (case-insensitive)
func (s caseInsensitiveStringSet) Contains(item string) bool {
_, exists := s[strings.ToLower(item)]
return exists
}
// Size returns the number of items in the set
func (s caseInsensitiveStringSet) Size() int {
return len(s)
}
// Clear removes all items from the set
func (s caseInsensitiveStringSet) Clear() {
clear(s)
}
// Clone returns a new set with a copy of all items
func (s caseInsensitiveStringSet) Clone() Set[string] {
return maps.Clone(s)
}
// Items returns all items in the set as a slice with their original casing
func (s caseInsensitiveStringSet) Items() []string {
return slices.Collect(s.Iter())
}
// Iter returns an iterator over the set values with their original casing
func (s caseInsensitiveStringSet) Iter() iter.Seq[string] {
return maps.Values(s)
}
// Union returns a new case-insensitive set containing all items from both sets
// If the same item (case-insensitive) exists in both sets, the casing from this set is preserved
func (s caseInsensitiveStringSet) Union(other Set[string]) Set[string] {
result := make(caseInsensitiveStringSet, s.Size()+other.Size())
maps.Copy(result, s)
result.Append(other.Items()...)
return result
}
// Intersection returns a new case-insensitive set containing items present in both sets
// The casing from this set is preserved for matching items
func (s caseInsensitiveStringSet) Intersection(other Set[string]) Set[string] {
result := make(caseInsensitiveStringSet)
for _, v := range s {
if other.Contains(v) {
result.Append(v)
}
}
return result
}
// Difference returns a new case-insensitive set containing items present in this set but not in the other
// The casing from this set is preserved
func (s caseInsensitiveStringSet) Difference(other Set[string]) Set[string] {
result := make(caseInsensitiveStringSet)
for _, v := range s {
if !other.Contains(v) {
result.Append(v)
}
}
return result
}

View File

@@ -0,0 +1,433 @@
package set_test
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/aquasecurity/trivy/pkg/set"
)
func TestNewCaseInsensitive(t *testing.T) {
tests := []struct {
name string
values []string
want []string
desc string
}{
{
name: "empty set",
values: []string{},
want: []string{},
desc: "should create empty set when no values provided",
},
{
name: "single value",
values: []string{"Hello"},
want: []string{"Hello"},
desc: "should create set with single value",
},
{
name: "multiple values",
values: []string{"Hello", "World", "Test"},
want: []string{"Hello", "World", "Test"},
desc: "should create set with multiple values",
},
{
name: "case insensitive duplicates",
values: []string{"Hello", "HELLO", "hello", "HeLLo"},
want: []string{"Hello"},
desc: "should treat case variations as duplicates and preserve first occurrence",
},
{
name: "mixed case duplicates",
values: []string{"Test", "TEST", "test", "World", "WORLD"},
want: []string{"Test", "World"},
desc: "should treat case variations as duplicates across multiple strings and preserve first occurrences",
},
{
name: "empty strings",
values: []string{"", "test", ""},
want: []string{"", "test"},
desc: "should handle empty strings and treat duplicates correctly",
},
{
name: "unicode strings",
values: []string{"こんにちは", "世界", "こんにちは"},
want: []string{"こんにちは", "世界"},
desc: "should handle unicode strings correctly",
},
{
name: "strings with spaces",
values: []string{"Hello World", "hello world", "HELLO WORLD"},
want: []string{"Hello World"},
desc: "should handle strings with spaces case-insensitively and preserve original spacing",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := set.NewCaseInsensitive(tt.values...)
assert.ElementsMatch(t, tt.want, s.Items(), "unexpected set contents")
})
}
}
func TestCaseInsensitiveSet_Append(t *testing.T) {
tests := []struct {
name string
initial []string
append []string
want []string
}{
{
name: "append to empty set",
initial: []string{},
append: []string{"Hello", "World"},
want: []string{"Hello", "World"},
},
{
name: "append case variations",
initial: []string{"Hello"},
append: []string{"HELLO", "hello"},
want: []string{"Hello"},
},
{
name: "append new and existing",
initial: []string{"Hello"},
append: []string{"HELLO", "World"},
want: []string{"Hello", "World"},
},
{
name: "append empty slice",
initial: []string{"Hello"},
append: []string{},
want: []string{"Hello"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := set.NewCaseInsensitive(tt.initial...)
got := s.Append(tt.append...)
assert.Equal(t, len(tt.want), got, "unexpected returned size")
assert.ElementsMatch(t, tt.want, s.Items(), "unexpected set contents")
})
}
}
func TestCaseInsensitiveSet_Contains(t *testing.T) {
tests := []struct {
name string
initial []string
check string
want bool
}{
{
name: "exact match",
initial: []string{"Hello"},
check: "Hello",
want: true,
},
{
name: "lowercase match",
initial: []string{"Hello"},
check: "hello",
want: true,
},
{
name: "uppercase match",
initial: []string{"Hello"},
check: "HELLO",
want: true,
},
{
name: "mixed case match",
initial: []string{"Hello"},
check: "HeLLo",
want: true,
},
{
name: "not found",
initial: []string{"Hello"},
check: "World",
want: false,
},
{
name: "empty string exists",
initial: []string{""},
check: "",
want: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := set.NewCaseInsensitive(tt.initial...)
got := s.Contains(tt.check)
assert.Equal(t, tt.want, got, "unexpected contains result")
})
}
}
func TestCaseInsensitiveSet_Remove(t *testing.T) {
tests := []struct {
name string
initial []string
remove string
wantSize int
}{
{
name: "remove exact match",
initial: []string{"Hello", "World"},
remove: "Hello",
wantSize: 1,
},
{
name: "remove with different case",
initial: []string{"Hello", "World"},
remove: "hello",
wantSize: 1,
},
{
name: "remove uppercase",
initial: []string{"Hello", "World"},
remove: "WORLD",
wantSize: 1,
},
{
name: "remove non-existing",
initial: []string{"Hello"},
remove: "World",
wantSize: 1,
},
{
name: "remove from empty set",
initial: []string{},
remove: "Hello",
wantSize: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := set.NewCaseInsensitive(tt.initial...)
s.Remove(tt.remove)
got := s.Size()
assert.Equal(t, tt.wantSize, got, "unexpected set size after remove")
assert.False(t, s.Contains(tt.remove), "set should not contain removed item")
})
}
}
func TestCaseInsensitiveSet_Clear(t *testing.T) {
tests := []struct {
name string
initial []string
}{
{
name: "clear non-empty set",
initial: []string{"Hello", "World", "Test"},
},
{
name: "clear empty set",
initial: []string{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := set.NewCaseInsensitive(tt.initial...)
s.Clear()
assert.Zero(t, s.Size(), "set should be empty after clear")
assert.Empty(t, s.Items(), "items should be empty after clear")
})
}
}
func TestCaseInsensitiveSet_Clone(t *testing.T) {
t.Run("empty set", func(t *testing.T) {
original := set.NewCaseInsensitive()
cloned := original.Clone()
assert.Equal(t, 0, cloned.Size(), "cloned set should be empty")
// Verify independence
original.Append("test")
assert.False(t, cloned.Contains("test"), "cloned set should not be affected by original")
})
t.Run("non-empty set", func(t *testing.T) {
original := set.NewCaseInsensitive("Hello", "World")
cloned := original.Clone()
assert.Equal(t, original.Size(), cloned.Size(), "sizes should match")
assert.True(t, cloned.Contains("hello"), "should contain hello (case insensitive)")
assert.True(t, cloned.Contains("WORLD"), "should contain world (case insensitive)")
// Verify independence
original.Append("new")
assert.False(t, cloned.Contains("new"), "cloned set should not be affected by original")
cloned.Append("another")
assert.False(t, original.Contains("another"), "original set should not be affected by clone")
})
t.Run("preserves casing", func(t *testing.T) {
original := set.NewCaseInsensitive("Hello", "WORLD")
cloned := original.Clone()
assert.ElementsMatch(t, original.Items(), cloned.Items(), "cloned set should preserve original casing")
})
}
func TestCaseInsensitiveSet_Union(t *testing.T) {
tests := []struct {
name string
set1 []string
set2 []string
want []string
}{
{
name: "non-overlapping sets",
set1: []string{"Hello", "World"},
set2: []string{"Test", "Data"},
want: []string{"Hello", "World", "Test", "Data"},
},
{
name: "overlapping sets with same case",
set1: []string{"Hello", "World"},
set2: []string{"World", "Test"},
want: []string{"Hello", "World", "Test"},
},
{
name: "overlapping sets with different case",
set1: []string{"Hello", "World"},
set2: []string{"HELLO", "test"},
want: []string{"Hello", "World", "test"},
},
{
name: "union with empty set",
set1: []string{"Hello"},
set2: []string{},
want: []string{"Hello"},
},
{
name: "empty sets union",
set1: []string{},
set2: []string{},
want: []string{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s1 := set.NewCaseInsensitive(tt.set1...)
s2 := set.NewCaseInsensitive(tt.set2...)
result := s1.Union(s2)
got := result.Items()
assert.ElementsMatch(t, tt.want, got, "unexpected union result")
})
}
}
func TestCaseInsensitiveSet_Intersection(t *testing.T) {
tests := []struct {
name string
set1 []string
set2 []string
want []string
}{
{
name: "overlapping sets with same case",
set1: []string{"Hello", "World", "Test"},
set2: []string{"World", "Test", "Data"},
want: []string{"World", "Test"},
},
{
name: "overlapping sets with different case",
set1: []string{"Hello", "World"},
set2: []string{"hello", "WORLD"},
want: []string{"Hello", "World"},
},
{
name: "non-overlapping sets",
set1: []string{"Hello"},
set2: []string{"World"},
want: []string{},
},
{
name: "intersection with empty set",
set1: []string{"Hello"},
set2: []string{},
want: []string{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s1 := set.NewCaseInsensitive(tt.set1...)
s2 := set.NewCaseInsensitive(tt.set2...)
result := s1.Intersection(s2)
got := result.Items()
assert.ElementsMatch(t, tt.want, got, "unexpected intersection result")
})
}
}
func TestCaseInsensitiveSet_Difference(t *testing.T) {
tests := []struct {
name string
set1 []string
set2 []string
want []string
}{
{
name: "difference with same case",
set1: []string{"Hello", "World", "Test"},
set2: []string{"World", "Data"},
want: []string{"Hello", "Test"},
},
{
name: "difference with different case",
set1: []string{"Hello", "World", "Test"},
set2: []string{"hello", "WORLD"},
want: []string{"Test"},
},
{
name: "difference with non-overlapping set",
set1: []string{"Hello", "World"},
set2: []string{"Test", "Data"},
want: []string{"Hello", "World"},
},
{
name: "difference with empty set",
set1: []string{"Hello", "World"},
set2: []string{},
want: []string{"Hello", "World"},
},
{
name: "difference of empty set",
set1: []string{},
set2: []string{"Hello"},
want: []string{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s1 := set.NewCaseInsensitive(tt.set1...)
s2 := set.NewCaseInsensitive(tt.set2...)
result := s1.Difference(s2)
got := result.Items()
assert.ElementsMatch(t, tt.want, got, "unexpected difference result")
})
}
}