Files
trivy/pkg/dependency/parser/java/jar/parse.go
Teppei Fukuda 3eecfc6b6e refactor: unify Library and Package structs (#6633)
Signed-off-by: knqyf263 <knqyf263@gmail.com>
Co-authored-by: DmitriyLewen <91113035+DmitriyLewen@users.noreply.github.com>
Co-authored-by: DmitriyLewen <dmitriy.lewen@smartforce.io>
2024-05-07 12:25:52 +00:00

446 lines
13 KiB
Go

package jar
import (
"archive/zip"
"bufio"
"crypto/sha1" // nolint:gosec
"encoding/hex"
"errors"
"fmt"
"io"
"os"
"path"
"path/filepath"
"regexp"
"strings"
"github.com/samber/lo"
"golang.org/x/xerrors"
ftypes "github.com/aquasecurity/trivy/pkg/fanal/types"
"github.com/aquasecurity/trivy/pkg/log"
xio "github.com/aquasecurity/trivy/pkg/x/io"
)
var (
jarFileRegEx = regexp.MustCompile(`^([a-zA-Z0-9\._-]*[^-*])-(\d\S*(?:-SNAPSHOT)?).jar$`)
)
type Client interface {
Exists(groupID, artifactID string) (bool, error)
SearchBySHA1(sha1 string) (Properties, error)
SearchByArtifactID(artifactID, version string) (string, error)
}
type Parser struct {
logger *log.Logger
rootFilePath string
offline bool
size int64
client Client
}
type Option func(*Parser)
func WithFilePath(filePath string) Option {
return func(p *Parser) {
p.rootFilePath = filePath
}
}
func WithOffline(offline bool) Option {
return func(p *Parser) {
p.offline = offline
}
}
func WithSize(size int64) Option {
return func(p *Parser) {
p.size = size
}
}
func NewParser(c Client, opts ...Option) *Parser {
p := &Parser{
logger: log.WithPrefix("jar"),
client: c,
}
for _, opt := range opts {
opt(p)
}
return p
}
func (p *Parser) Parse(r xio.ReadSeekerAt) ([]ftypes.Package, []ftypes.Dependency, error) {
pkgs, deps, err := p.parseArtifact(p.rootFilePath, p.size, r)
if err != nil {
return nil, nil, xerrors.Errorf("unable to parse %s: %w", p.rootFilePath, err)
}
return removePackageDuplicates(pkgs), deps, nil
}
func (p *Parser) parseArtifact(filePath string, size int64, r xio.ReadSeekerAt) ([]ftypes.Package, []ftypes.Dependency, error) {
p.logger.Debug("Parsing Java artifacts...", log.String("file", filePath))
// Try to extract artifactId and version from the file name
// e.g. spring-core-5.3.4-SNAPSHOT.jar => sprint-core, 5.3.4-SNAPSHOT
fileName := filepath.Base(filePath)
fileProps := parseFileName(filePath)
pkgs, m, foundPomProps, err := p.traverseZip(filePath, size, r, fileProps)
if err != nil {
return nil, nil, xerrors.Errorf("zip error: %w", err)
}
// If pom.properties is found, it should be preferred than MANIFEST.MF.
if foundPomProps {
return pkgs, nil, nil
}
manifestProps := m.properties(filePath)
if p.offline {
// In offline mode, we will not check if the artifact information is correct.
if !manifestProps.Valid() {
p.logger.Debug("Unable to identify POM in offline mode", log.String("file", fileName))
return pkgs, nil, nil
}
return append(pkgs, manifestProps.Package()), nil, nil
}
if manifestProps.Valid() {
// Even if MANIFEST.MF is found, the groupId and artifactId might not be valid.
// We have to make sure that the artifact exists actually.
if ok, _ := p.client.Exists(manifestProps.GroupID, manifestProps.ArtifactID); ok {
// If groupId and artifactId are valid, they will be returned.
return append(pkgs, manifestProps.Package()), nil, nil
}
}
// If groupId and artifactId are not found, call Maven Central's search API with SHA-1 digest.
props, err := p.searchBySHA1(r, filePath)
if err == nil {
return append(pkgs, props.Package()), nil, nil
} else if !errors.Is(err, ArtifactNotFoundErr) {
return nil, nil, xerrors.Errorf("failed to search by SHA1: %w", err)
}
p.logger.Debug("No such POM in the central repositories", log.String("file", fileName))
// Return when artifactId or version from the file name are empty
if fileProps.ArtifactID == "" || fileProps.Version == "" {
return pkgs, nil, nil
}
// Try to search groupId by artifactId via sonatype API
// When some artifacts have the same groupIds, it might result in false detection.
fileProps.GroupID, err = p.client.SearchByArtifactID(fileProps.ArtifactID, fileProps.Version)
if err == nil {
p.logger.Debug("POM was determined in a heuristic way", log.String("file", fileName),
log.String("artifact", fileProps.String()))
pkgs = append(pkgs, fileProps.Package())
} else if !errors.Is(err, ArtifactNotFoundErr) {
return nil, nil, xerrors.Errorf("failed to search by artifact id: %w", err)
}
return pkgs, nil, nil
}
func (p *Parser) traverseZip(filePath string, size int64, r xio.ReadSeekerAt, fileProps Properties) (
[]ftypes.Package, manifest, bool, error) {
var pkgs []ftypes.Package
var m manifest
var foundPomProps bool
zr, err := zip.NewReader(r, size)
if err != nil {
return nil, manifest{}, false, xerrors.Errorf("zip error: %w", err)
}
for _, fileInJar := range zr.File {
switch {
case filepath.Base(fileInJar.Name) == "pom.properties":
props, err := parsePomProperties(fileInJar, filePath)
if err != nil {
return nil, manifest{}, false, xerrors.Errorf("failed to parse %s: %w", fileInJar.Name, err)
}
// Validation of props to avoid getting packages with empty Name/Version
if props.Valid() {
pkgs = append(pkgs, props.Package())
// Check if the pom.properties is for the original JAR/WAR/EAR
if fileProps.ArtifactID == props.ArtifactID && fileProps.Version == props.Version {
foundPomProps = true
}
}
case filepath.Base(fileInJar.Name) == "MANIFEST.MF":
m, err = parseManifest(fileInJar)
if err != nil {
return nil, manifest{}, false, xerrors.Errorf("failed to parse MANIFEST.MF: %w", err)
}
case isArtifact(fileInJar.Name):
innerPkgs, _, err := p.parseInnerJar(fileInJar, filePath) // TODO process inner deps
if err != nil {
p.logger.Debug("Failed to parse", log.String("file", fileInJar.Name), log.Err(err))
continue
}
pkgs = append(pkgs, innerPkgs...)
}
}
return pkgs, m, foundPomProps, nil
}
func (p *Parser) parseInnerJar(zf *zip.File, rootPath string) ([]ftypes.Package, []ftypes.Dependency, error) {
fr, err := zf.Open()
if err != nil {
return nil, nil, xerrors.Errorf("unable to open %s: %w", zf.Name, err)
}
f, err := os.CreateTemp("", "inner")
if err != nil {
return nil, nil, xerrors.Errorf("unable to create a temp file: %w", err)
}
defer func() {
_ = f.Close()
_ = os.Remove(f.Name())
}()
// Copy the file content to the temp file
if n, err := io.CopyN(f, fr, int64(zf.UncompressedSize64)); err != nil {
return nil, nil, xerrors.Errorf("file copy error: %w", err)
} else if n != int64(zf.UncompressedSize64) {
return nil, nil, xerrors.Errorf("file copy size error: %w", err)
}
// build full path to inner jar
fullPath := path.Join(rootPath, zf.Name) // nolint:gosec
if !strings.HasPrefix(fullPath, path.Clean(rootPath)) {
return nil, nil, nil // zip slip
}
// Parse jar/war/ear recursively
innerPkgs, innerDeps, err := p.parseArtifact(fullPath, int64(zf.UncompressedSize64), f)
if err != nil {
return nil, nil, xerrors.Errorf("failed to parse %s: %w", zf.Name, err)
}
return innerPkgs, innerDeps, nil
}
func (p *Parser) searchBySHA1(r io.ReadSeeker, filePath string) (Properties, error) {
if _, err := r.Seek(0, io.SeekStart); err != nil {
return Properties{}, xerrors.Errorf("file seek error: %w", err)
}
h := sha1.New() // nolint:gosec
if _, err := io.Copy(h, r); err != nil {
return Properties{}, xerrors.Errorf("unable to calculate SHA-1: %w", err)
}
s := hex.EncodeToString(h.Sum(nil))
prop, err := p.client.SearchBySHA1(s)
if err != nil {
return Properties{}, err
}
prop.FilePath = filePath
return prop, nil
}
func isArtifact(name string) bool {
ext := filepath.Ext(name)
if ext == ".jar" || ext == ".ear" || ext == ".war" {
return true
}
return false
}
func parseFileName(filePath string) Properties {
fileName := filepath.Base(filePath)
packageVersion := jarFileRegEx.FindStringSubmatch(fileName)
if len(packageVersion) != 3 {
return Properties{}
}
return Properties{
ArtifactID: packageVersion[1],
Version: packageVersion[2],
FilePath: filePath,
}
}
func parsePomProperties(f *zip.File, filePath string) (Properties, error) {
file, err := f.Open()
if err != nil {
return Properties{}, xerrors.Errorf("unable to open pom.properties: %w", err)
}
defer file.Close()
p := Properties{
FilePath: filePath,
}
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
switch {
case strings.HasPrefix(line, "groupId="):
p.GroupID = strings.TrimPrefix(line, "groupId=")
case strings.HasPrefix(line, "artifactId="):
p.ArtifactID = strings.TrimPrefix(line, "artifactId=")
case strings.HasPrefix(line, "version="):
p.Version = strings.TrimPrefix(line, "version=")
}
}
if err = scanner.Err(); err != nil {
return Properties{}, xerrors.Errorf("scan error: %w", err)
}
return p, nil
}
type manifest struct {
implementationVersion string
implementationTitle string
implementationVendor string
implementationVendorId string
specificationTitle string
specificationVersion string
specificationVendor string
bundleName string
bundleVersion string
bundleSymbolicName string
}
func parseManifest(f *zip.File) (manifest, error) {
file, err := f.Open()
if err != nil {
return manifest{}, xerrors.Errorf("unable to open MANIFEST.MF: %w", err)
}
defer file.Close()
var m manifest
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
// Skip variables. e.g. Bundle-Name: %bundleName
ss := strings.Fields(line)
if len(ss) <= 1 || (len(ss) > 1 && strings.HasPrefix(ss[1], "%")) {
continue
}
// It is not determined which fields are present in each application.
// In some cases, none of them are included, in which case they cannot be detected.
switch {
case strings.HasPrefix(line, "Implementation-Version:"):
m.implementationVersion = strings.TrimPrefix(line, "Implementation-Version:")
case strings.HasPrefix(line, "Implementation-Title:"):
m.implementationTitle = strings.TrimPrefix(line, "Implementation-Title:")
case strings.HasPrefix(line, "Implementation-Vendor:"):
m.implementationVendor = strings.TrimPrefix(line, "Implementation-Vendor:")
case strings.HasPrefix(line, "Implementation-Vendor-Id:"):
m.implementationVendorId = strings.TrimPrefix(line, "Implementation-Vendor-Id:")
case strings.HasPrefix(line, "Specification-Version:"):
m.specificationVersion = strings.TrimPrefix(line, "Specification-Version:")
case strings.HasPrefix(line, "Specification-Title:"):
m.specificationTitle = strings.TrimPrefix(line, "Specification-Title:")
case strings.HasPrefix(line, "Specification-Vendor:"):
m.specificationVendor = strings.TrimPrefix(line, "Specification-Vendor:")
case strings.HasPrefix(line, "Bundle-Version:"):
m.bundleVersion = strings.TrimPrefix(line, "Bundle-Version:")
case strings.HasPrefix(line, "Bundle-Name:"):
m.bundleName = strings.TrimPrefix(line, "Bundle-Name:")
case strings.HasPrefix(line, "Bundle-SymbolicName:"):
m.bundleSymbolicName = strings.TrimPrefix(line, "Bundle-SymbolicName:")
}
}
if err = scanner.Err(); err != nil {
return manifest{}, xerrors.Errorf("scan error: %w", err)
}
return m, nil
}
func (m manifest) properties(filePath string) Properties {
groupID, err := m.determineGroupID()
if err != nil {
return Properties{}
}
artifactID, err := m.determineArtifactID()
if err != nil {
return Properties{}
}
version, err := m.determineVersion()
if err != nil {
return Properties{}
}
return Properties{
GroupID: groupID,
ArtifactID: artifactID,
Version: version,
FilePath: filePath,
}
}
func (m manifest) determineGroupID() (string, error) {
var groupID string
switch {
case m.implementationVendorId != "":
groupID = m.implementationVendorId
case m.bundleSymbolicName != "":
groupID = m.bundleSymbolicName
// e.g. "com.fasterxml.jackson.core.jackson-databind" => "com.fasterxml.jackson.core"
idx := strings.LastIndex(m.bundleSymbolicName, ".")
if idx > 0 {
groupID = m.bundleSymbolicName[:idx]
}
case m.implementationVendor != "":
groupID = m.implementationVendor
case m.specificationVendor != "":
groupID = m.specificationVendor
default:
return "", xerrors.New("no groupID found")
}
return strings.TrimSpace(groupID), nil
}
func (m manifest) determineArtifactID() (string, error) {
var artifactID string
switch {
case m.implementationTitle != "":
artifactID = m.implementationTitle
case m.specificationTitle != "":
artifactID = m.specificationTitle
case m.bundleName != "":
artifactID = m.bundleName
default:
return "", xerrors.New("no artifactID found")
}
return strings.TrimSpace(artifactID), nil
}
func (m manifest) determineVersion() (string, error) {
var version string
switch {
case m.implementationVersion != "":
version = m.implementationVersion
case m.specificationVersion != "":
version = m.specificationVersion
case m.bundleVersion != "":
version = m.bundleVersion
default:
return "", xerrors.New("no version found")
}
return strings.TrimSpace(version), nil
}
func removePackageDuplicates(pkgs []ftypes.Package) []ftypes.Package {
return lo.UniqBy(pkgs, func(pkg ftypes.Package) string {
return fmt.Sprintf("%s::%s::%s", pkg.Name, pkg.Version, pkg.FilePath)
})
}