From d3b75dfc0158477fbb8e9a54f7f060b3ec23386a Mon Sep 17 00:00:00 2001 From: Xueqin Cui <72771658+cuixq@users.noreply.github.com> Date: Mon, 14 Oct 2024 13:23:38 +1100 Subject: [PATCH] fix: set CharsetReader and Entity when reading pom.xml (#1325) https://github.com/google/osv-scanner/issues/1321 If non UTF-8 encoding is declared in pom.xml, we need to set `CharsetReader` to avoid the error. --- cmd/osv-scanner/__snapshots__/main_test.snap | 14 +++++++++++ .../fixtures/maven-transitive/encoding.xml | 23 +++++++++++++++++++ cmd/osv-scanner/main_test.go | 5 ++++ internal/manifest/maven.go | 3 +-- .../resolution/datasource/maven_registry.go | 14 +++++++---- internal/resolution/manifest/maven.go | 5 ++-- internal/utility/maven/maven.go | 3 +-- 7 files changed, 56 insertions(+), 11 deletions(-) create mode 100644 cmd/osv-scanner/fixtures/maven-transitive/encoding.xml diff --git a/cmd/osv-scanner/__snapshots__/main_test.snap b/cmd/osv-scanner/__snapshots__/main_test.snap index ea42a22235..b1dc8eee20 100755 --- a/cmd/osv-scanner/__snapshots__/main_test.snap +++ b/cmd/osv-scanner/__snapshots__/main_test.snap @@ -2358,6 +2358,20 @@ No issues found --- +[TestRun_MavenTransitive/scans_pom.xml_with_non_UTF-8_encoding - 1] +Scanned /fixtures/maven-transitive/encoding.xml file as a pom.xml and found 2 packages ++-------------------------------------+------+-----------+-------------+---------+----------------------------------------+ +| OSV URL | CVSS | ECOSYSTEM | PACKAGE | VERSION | SOURCE | ++-------------------------------------+------+-----------+-------------+---------+----------------------------------------+ +| https://osv.dev/GHSA-269g-pwp5-87pp | 4.4 | Maven | junit:junit | 4.12 | fixtures/maven-transitive/encoding.xml | ++-------------------------------------+------+-----------+-------------+---------+----------------------------------------+ + +--- + +[TestRun_MavenTransitive/scans_pom.xml_with_non_UTF-8_encoding - 2] + +--- + [TestRun_MavenTransitive/scans_transitive_dependencies_by_specifying_pom.xml - 1] Scanned /fixtures/maven-transitive/abc.xml file as a pom.xml and found 3 packages +-------------------------------------+------+-----------+-------------------------------------+---------+-----------------------------------+ diff --git a/cmd/osv-scanner/fixtures/maven-transitive/encoding.xml b/cmd/osv-scanner/fixtures/maven-transitive/encoding.xml new file mode 100644 index 0000000000..2f3711a4f6 --- /dev/null +++ b/cmd/osv-scanner/fixtures/maven-transitive/encoding.xml @@ -0,0 +1,23 @@ + + + + 4.0.0 + + com.mycompany.app + my-app + 1.0.0 + + my-app + + http://www.example.com + + + + junit + junit + 4.12 + + + + diff --git a/cmd/osv-scanner/main_test.go b/cmd/osv-scanner/main_test.go index b02b69070c..6f6cc8b4a5 100644 --- a/cmd/osv-scanner/main_test.go +++ b/cmd/osv-scanner/main_test.go @@ -893,6 +893,11 @@ func TestRun_MavenTransitive(t *testing.T) { args: []string{"", "--config=./fixtures/osv-scanner-empty-config.toml", "-L", "pom.xml:./fixtures/maven-transitive/abc.xml"}, exit: 1, }, + { + name: "scans pom.xml with non UTF-8 encoding", + args: []string{"", "--config=./fixtures/osv-scanner-empty-config.toml", "-L", "pom.xml:./fixtures/maven-transitive/encoding.xml"}, + exit: 1, + }, { // Direct dependencies do not have any vulnerability. name: "does not scan transitive dependencies for pom.xml with offline mode", diff --git a/internal/manifest/maven.go b/internal/manifest/maven.go index faef0e4e4a..4b32b459ad 100644 --- a/internal/manifest/maven.go +++ b/internal/manifest/maven.go @@ -2,7 +2,6 @@ package manifest import ( "context" - "encoding/xml" "fmt" "path/filepath" @@ -31,7 +30,7 @@ func (e MavenResolverExtractor) Extract(f lockfile.DepFile) ([]lockfile.PackageD ctx := context.Background() var project maven.Project - if err := xml.NewDecoder(f).Decode(&project); err != nil { + if err := datasource.NewMavenDecoder(f).Decode(&project); err != nil { return []lockfile.PackageDetails{}, fmt.Errorf("could not extract from %s: %w", f.Path(), err) } // Merging parents data by parsing local parent pom.xml or fetching from upstream. diff --git a/internal/resolution/datasource/maven_registry.go b/internal/resolution/datasource/maven_registry.go index 5dd6630e9b..421be79403 100644 --- a/internal/resolution/datasource/maven_registry.go +++ b/internal/resolution/datasource/maven_registry.go @@ -5,6 +5,7 @@ import ( "encoding/xml" "errors" "fmt" + "io" "net/http" "net/url" "strings" @@ -135,12 +136,17 @@ func get(ctx context.Context, url string, dst interface{}) error { return fmt.Errorf("%w: Maven registry query status: %s", errAPIFailed, resp.Status) } - d := xml.NewDecoder(resp.Body) + return NewMavenDecoder(resp.Body).Decode(dst) +} + +// NewMavenDecoder returns an xml decoder with CharsetReader and Entity set. +func NewMavenDecoder(reader io.Reader) *xml.Decoder { + decoder := xml.NewDecoder(reader) // Set charset reader for conversion from non-UTF-8 charset into UTF-8. - d.CharsetReader = charset.NewReaderLabel + decoder.CharsetReader = charset.NewReaderLabel // Set HTML entity map for translation between non-standard entity names // and string replacements. - d.Entity = xml.HTMLEntity + decoder.Entity = xml.HTMLEntity - return d.Decode(dst) + return decoder } diff --git a/internal/resolution/manifest/maven.go b/internal/resolution/manifest/maven.go index 3d30186092..d3a87c1d10 100644 --- a/internal/resolution/manifest/maven.go +++ b/internal/resolution/manifest/maven.go @@ -4,7 +4,6 @@ import ( "bytes" "cmp" "context" - "encoding/xml" "errors" "fmt" "io" @@ -68,7 +67,7 @@ func (m MavenReadWriter) Read(df lockfile.DepFile) (Manifest, error) { ctx := context.Background() var project maven.Project - if err := xml.NewDecoder(df).Decode(&project); err != nil { + if err := datasource.NewMavenDecoder(df).Decode(&project); err != nil { return Manifest{}, fmt.Errorf("failed to unmarshal project: %w", err) } properties := buildPropertiesWithOrigins(project, "") @@ -316,7 +315,7 @@ func (MavenReadWriter) Write(df lockfile.DepFile, w io.Writer, patch Patch) erro } var proj maven.Project - err = xml.NewDecoder(f).Decode(&proj) + err = datasource.NewMavenDecoder(f).Decode(&proj) f.Close() if err != nil { return fmt.Errorf("failed to unmarshal project: %w", err) diff --git a/internal/utility/maven/maven.go b/internal/utility/maven/maven.go index 16f85bd95d..ce80e12847 100644 --- a/internal/utility/maven/maven.go +++ b/internal/utility/maven/maven.go @@ -2,7 +2,6 @@ package maven import ( "context" - "encoding/xml" "errors" "fmt" "os" @@ -52,7 +51,7 @@ func MergeParents(ctx context.Context, mavenClient *datasource.MavenRegistryAPIC if err != nil { return fmt.Errorf("failed to open parent file %s: %w", parentPath, err) } - err = xml.NewDecoder(f).Decode(&proj) + err = datasource.NewMavenDecoder(f).Decode(&proj) f.Close() if err != nil { return fmt.Errorf("failed to unmarshal project: %w", err)