Skip to content

Commit

Permalink
fix: set CharsetReader and Entity when reading pom.xml (#1325)
Browse files Browse the repository at this point in the history
#1321

If non UTF-8 encoding is declared in pom.xml, we need to set
`CharsetReader` to avoid the error.
  • Loading branch information
cuixq authored Oct 14, 2024
1 parent f492e39 commit d3b75df
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 11 deletions.
14 changes: 14 additions & 0 deletions cmd/osv-scanner/__snapshots__/main_test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -2358,6 +2358,20 @@ No issues found

---

[TestRun_MavenTransitive/scans_pom.xml_with_non_UTF-8_encoding - 1]
Scanned <rootdir>/fixtures/maven-transitive/encoding.xml file as a pom.xml and found 2 packages
+-------------------------------------+------+-----------+-------------+---------+----------------------------------------+
| OSV URL | CVSS | ECOSYSTEM | PACKAGE | VERSION | SOURCE |
+-------------------------------------+------+-----------+-------------+---------+----------------------------------------+
| https://osv.dev/GHSA-269g-pwp5-87pp | 4.4 | Maven | junit:junit | 4.12 | fixtures/maven-transitive/encoding.xml |
+-------------------------------------+------+-----------+-------------+---------+----------------------------------------+

---

[TestRun_MavenTransitive/scans_pom.xml_with_non_UTF-8_encoding - 2]

---

[TestRun_MavenTransitive/scans_transitive_dependencies_by_specifying_pom.xml - 1]
Scanned <rootdir>/fixtures/maven-transitive/abc.xml file as a pom.xml and found 3 packages
+-------------------------------------+------+-----------+-------------------------------------+---------+-----------------------------------+
Expand Down
23 changes: 23 additions & 0 deletions cmd/osv-scanner/fixtures/maven-transitive/encoding.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?xml version="1.0" encoding="iso-8859-1"?>

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.mycompany.app</groupId>
<artifactId>my-app</artifactId>
<version>1.0.0</version>

<name>my-app</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
</dependencies>

</project>
5 changes: 5 additions & 0 deletions cmd/osv-scanner/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,11 @@ func TestRun_MavenTransitive(t *testing.T) {
args: []string{"", "--config=./fixtures/osv-scanner-empty-config.toml", "-L", "pom.xml:./fixtures/maven-transitive/abc.xml"},
exit: 1,
},
{
name: "scans pom.xml with non UTF-8 encoding",
args: []string{"", "--config=./fixtures/osv-scanner-empty-config.toml", "-L", "pom.xml:./fixtures/maven-transitive/encoding.xml"},
exit: 1,
},
{
// Direct dependencies do not have any vulnerability.
name: "does not scan transitive dependencies for pom.xml with offline mode",
Expand Down
3 changes: 1 addition & 2 deletions internal/manifest/maven.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package manifest

import (
"context"
"encoding/xml"
"fmt"
"path/filepath"

Expand Down Expand Up @@ -31,7 +30,7 @@ func (e MavenResolverExtractor) Extract(f lockfile.DepFile) ([]lockfile.PackageD
ctx := context.Background()

var project maven.Project
if err := xml.NewDecoder(f).Decode(&project); err != nil {
if err := datasource.NewMavenDecoder(f).Decode(&project); err != nil {
return []lockfile.PackageDetails{}, fmt.Errorf("could not extract from %s: %w", f.Path(), err)
}
// Merging parents data by parsing local parent pom.xml or fetching from upstream.
Expand Down
14 changes: 10 additions & 4 deletions internal/resolution/datasource/maven_registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/xml"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
Expand Down Expand Up @@ -135,12 +136,17 @@ func get(ctx context.Context, url string, dst interface{}) error {
return fmt.Errorf("%w: Maven registry query status: %s", errAPIFailed, resp.Status)
}

d := xml.NewDecoder(resp.Body)
return NewMavenDecoder(resp.Body).Decode(dst)
}

// NewMavenDecoder returns an xml decoder with CharsetReader and Entity set.
func NewMavenDecoder(reader io.Reader) *xml.Decoder {
decoder := xml.NewDecoder(reader)
// Set charset reader for conversion from non-UTF-8 charset into UTF-8.
d.CharsetReader = charset.NewReaderLabel
decoder.CharsetReader = charset.NewReaderLabel
// Set HTML entity map for translation between non-standard entity names
// and string replacements.
d.Entity = xml.HTMLEntity
decoder.Entity = xml.HTMLEntity

return d.Decode(dst)
return decoder
}
5 changes: 2 additions & 3 deletions internal/resolution/manifest/maven.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"bytes"
"cmp"
"context"
"encoding/xml"
"errors"
"fmt"
"io"
Expand Down Expand Up @@ -68,7 +67,7 @@ func (m MavenReadWriter) Read(df lockfile.DepFile) (Manifest, error) {
ctx := context.Background()

var project maven.Project
if err := xml.NewDecoder(df).Decode(&project); err != nil {
if err := datasource.NewMavenDecoder(df).Decode(&project); err != nil {
return Manifest{}, fmt.Errorf("failed to unmarshal project: %w", err)
}
properties := buildPropertiesWithOrigins(project, "")
Expand Down Expand Up @@ -316,7 +315,7 @@ func (MavenReadWriter) Write(df lockfile.DepFile, w io.Writer, patch Patch) erro
}

var proj maven.Project
err = xml.NewDecoder(f).Decode(&proj)
err = datasource.NewMavenDecoder(f).Decode(&proj)
f.Close()
if err != nil {
return fmt.Errorf("failed to unmarshal project: %w", err)
Expand Down
3 changes: 1 addition & 2 deletions internal/utility/maven/maven.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package maven

import (
"context"
"encoding/xml"
"errors"
"fmt"
"os"
Expand Down Expand Up @@ -52,7 +51,7 @@ func MergeParents(ctx context.Context, mavenClient *datasource.MavenRegistryAPIC
if err != nil {
return fmt.Errorf("failed to open parent file %s: %w", parentPath, err)
}
err = xml.NewDecoder(f).Decode(&proj)
err = datasource.NewMavenDecoder(f).Decode(&proj)
f.Close()
if err != nil {
return fmt.Errorf("failed to unmarshal project: %w", err)
Expand Down

0 comments on commit d3b75df

Please sign in to comment.