diff --git a/assets/kubernetes/multi-docs-file-level/expected-dyff.human b/assets/kubernetes/multi-docs-file-level/expected-dyff.human new file mode 100644 index 0000000..b029bd8 --- /dev/null +++ b/assets/kubernetes/multi-docs-file-level/expected-dyff.human @@ -0,0 +1,39 @@ + +metadata (v1/Service/foo) + + one map entry added: + annotations: + foo: bar + +(root level) (v1/Service/foo-2) +- one document removed: + --- + apiVersion: v1 + kind: Service + metadata: + name: foo-2 + spec: + selector: + kubernetes.io/app: foo-2 + +(root level) (v1/Service/bar) ++ one document added: + --- + apiVersion: v1 + kind: Service + metadata: + name: bar + spec: + selector: + kubernetes.io/app: bar + +(root level) (v1/Service/baz) ++ one document added: + --- + apiVersion: v1 + kind: Service + metadata: + name: baz + spec: + selector: + kubernetes.io/app: baz + diff --git a/assets/kubernetes/multi-docs-file-level/from.yaml b/assets/kubernetes/multi-docs-file-level/from.yaml new file mode 100644 index 0000000..be43b0f --- /dev/null +++ b/assets/kubernetes/multi-docs-file-level/from.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: foo +spec: + selector: + kubernetes.io/app: foo + +--- +apiVersion: v1 +kind: Service +metadata: + name: foo-2 +spec: + selector: + kubernetes.io/app: foo-2 diff --git a/assets/kubernetes/multi-docs-file-level/to.yaml b/assets/kubernetes/multi-docs-file-level/to.yaml new file mode 100644 index 0000000..a12fd72 --- /dev/null +++ b/assets/kubernetes/multi-docs-file-level/to.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: Service +metadata: + name: foo + annotations: + foo: bar +spec: + selector: + kubernetes.io/app: foo + +--- +apiVersion: v1 +kind: Service +metadata: + name: bar +spec: + selector: + kubernetes.io/app: bar + +--- +apiVersion: v1 +kind: Service +metadata: + name: baz +spec: + selector: + kubernetes.io/app: baz diff --git a/assets/kubernetes/rename/expected-dyff.human b/assets/kubernetes/rename/expected-dyff.human new file mode 100644 index 0000000..c751fa5 --- /dev/null +++ b/assets/kubernetes/rename/expected-dyff.human @@ -0,0 +1,34 @@ + +data.pinniped.yaml + ± value change in multiline text (one insert, no deletions) + discovery: + url: null + api: + servingCertificate: + + [two lines unchanged)] + + apiGroupSuffix: pinniped.dev + # aggregatedAPIServerPort may be set here, although other YAML references to the default port (10250) may also need to be updated + # impersonationProxyServerPort may be set here, although other YAML references to the default port (8444) may also need to be updated + names: + + # Example comment + servingCertificateSecret: pinniped-concierge-api-tls-serving-certificate + credentialIssuer: pinniped-concierge-config + apiService: pinniped-concierge-api + impersonationLoadBalancerService: pinniped-concierge-impersonation-proxy-load-balancer + + [five lines unchanged)] + + labels: {"app": "pinniped-concierge"} + kubeCertAgent: + namePrefix: pinniped-concierge-kube-cert-agent- + image: projects.registry.vmware.com/pinniped/pinniped-server:latest + + + +metadata.name + ± value change + - pinniped-concierge-config-9bfbmfgt2f + + pinniped-concierge-config-296567ccmt + diff --git a/assets/kubernetes/rename/from.yaml b/assets/kubernetes/rename/from.yaml new file mode 100644 index 0000000..dba1d91 --- /dev/null +++ b/assets/kubernetes/rename/from.yaml @@ -0,0 +1,29 @@ +apiVersion: v1 +data: + pinniped.yaml: | + discovery: + url: null + api: + servingCertificate: + durationSeconds: 2592000 + renewBeforeSeconds: 2160000 + apiGroupSuffix: pinniped.dev + # aggregatedAPIServerPort may be set here, although other YAML references to the default port (10250) may also need to be updated + # impersonationProxyServerPort may be set here, although other YAML references to the default port (8444) may also need to be updated + names: + servingCertificateSecret: pinniped-concierge-api-tls-serving-certificate + credentialIssuer: pinniped-concierge-config + apiService: pinniped-concierge-api + impersonationLoadBalancerService: pinniped-concierge-impersonation-proxy-load-balancer + impersonationClusterIPService: pinniped-concierge-impersonation-proxy-cluster-ip + impersonationTLSCertificateSecret: pinniped-concierge-impersonation-proxy-tls-serving-certificate + impersonationCACertificateSecret: pinniped-concierge-impersonation-proxy-ca-certificate + impersonationSignerSecret: pinniped-concierge-impersonation-proxy-signer-ca-certificate + agentServiceAccount: pinniped-concierge-kube-cert-agent + labels: {"app": "pinniped-concierge"} + kubeCertAgent: + namePrefix: pinniped-concierge-kube-cert-agent- + image: projects.registry.vmware.com/pinniped/pinniped-server:latest +kind: ConfigMap +metadata: + name: pinniped-concierge-config-9bfbmfgt2f diff --git a/assets/kubernetes/rename/from/kustomization.yaml b/assets/kubernetes/rename/from/kustomization.yaml new file mode 100644 index 0000000..f3ade59 --- /dev/null +++ b/assets/kubernetes/rename/from/kustomization.yaml @@ -0,0 +1,5 @@ +# Source: pinniped-concierge/templates/configmap-pinniped-concierge-config.yaml +configMapGenerator: + - name: pinniped-concierge-config + files: + - pinniped.yaml diff --git a/assets/kubernetes/rename/from/pinniped.yaml b/assets/kubernetes/rename/from/pinniped.yaml new file mode 100644 index 0000000..f69f322 --- /dev/null +++ b/assets/kubernetes/rename/from/pinniped.yaml @@ -0,0 +1,23 @@ +discovery: + url: null +api: + servingCertificate: + durationSeconds: 2592000 + renewBeforeSeconds: 2160000 +apiGroupSuffix: pinniped.dev +# aggregatedAPIServerPort may be set here, although other YAML references to the default port (10250) may also need to be updated +# impersonationProxyServerPort may be set here, although other YAML references to the default port (8444) may also need to be updated +names: + servingCertificateSecret: pinniped-concierge-api-tls-serving-certificate + credentialIssuer: pinniped-concierge-config + apiService: pinniped-concierge-api + impersonationLoadBalancerService: pinniped-concierge-impersonation-proxy-load-balancer + impersonationClusterIPService: pinniped-concierge-impersonation-proxy-cluster-ip + impersonationTLSCertificateSecret: pinniped-concierge-impersonation-proxy-tls-serving-certificate + impersonationCACertificateSecret: pinniped-concierge-impersonation-proxy-ca-certificate + impersonationSignerSecret: pinniped-concierge-impersonation-proxy-signer-ca-certificate + agentServiceAccount: pinniped-concierge-kube-cert-agent +labels: {"app": "pinniped-concierge"} +kubeCertAgent: + namePrefix: pinniped-concierge-kube-cert-agent- + image: projects.registry.vmware.com/pinniped/pinniped-server:latest diff --git a/assets/kubernetes/rename/to.yaml b/assets/kubernetes/rename/to.yaml new file mode 100644 index 0000000..1cb80e0 --- /dev/null +++ b/assets/kubernetes/rename/to.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +data: + pinniped.yaml: | + discovery: + url: null + api: + servingCertificate: + durationSeconds: 2592000 + renewBeforeSeconds: 2160000 + apiGroupSuffix: pinniped.dev + # aggregatedAPIServerPort may be set here, although other YAML references to the default port (10250) may also need to be updated + # impersonationProxyServerPort may be set here, although other YAML references to the default port (8444) may also need to be updated + names: + # Example comment + servingCertificateSecret: pinniped-concierge-api-tls-serving-certificate + credentialIssuer: pinniped-concierge-config + apiService: pinniped-concierge-api + impersonationLoadBalancerService: pinniped-concierge-impersonation-proxy-load-balancer + impersonationClusterIPService: pinniped-concierge-impersonation-proxy-cluster-ip + impersonationTLSCertificateSecret: pinniped-concierge-impersonation-proxy-tls-serving-certificate + impersonationCACertificateSecret: pinniped-concierge-impersonation-proxy-ca-certificate + impersonationSignerSecret: pinniped-concierge-impersonation-proxy-signer-ca-certificate + agentServiceAccount: pinniped-concierge-kube-cert-agent + labels: {"app": "pinniped-concierge"} + kubeCertAgent: + namePrefix: pinniped-concierge-kube-cert-agent- + image: projects.registry.vmware.com/pinniped/pinniped-server:latest +kind: ConfigMap +metadata: + name: pinniped-concierge-config-296567ccmt diff --git a/assets/kubernetes/rename/to/kustomization.yaml b/assets/kubernetes/rename/to/kustomization.yaml new file mode 100644 index 0000000..f3ade59 --- /dev/null +++ b/assets/kubernetes/rename/to/kustomization.yaml @@ -0,0 +1,5 @@ +# Source: pinniped-concierge/templates/configmap-pinniped-concierge-config.yaml +configMapGenerator: + - name: pinniped-concierge-config + files: + - pinniped.yaml diff --git a/assets/kubernetes/rename/to/pinniped.yaml b/assets/kubernetes/rename/to/pinniped.yaml new file mode 100644 index 0000000..fdaabdb --- /dev/null +++ b/assets/kubernetes/rename/to/pinniped.yaml @@ -0,0 +1,24 @@ +discovery: + url: null +api: + servingCertificate: + durationSeconds: 2592000 + renewBeforeSeconds: 2160000 +apiGroupSuffix: pinniped.dev +# aggregatedAPIServerPort may be set here, although other YAML references to the default port (10250) may also need to be updated +# impersonationProxyServerPort may be set here, although other YAML references to the default port (8444) may also need to be updated +names: + # Example comment + servingCertificateSecret: pinniped-concierge-api-tls-serving-certificate + credentialIssuer: pinniped-concierge-config + apiService: pinniped-concierge-api + impersonationLoadBalancerService: pinniped-concierge-impersonation-proxy-load-balancer + impersonationClusterIPService: pinniped-concierge-impersonation-proxy-cluster-ip + impersonationTLSCertificateSecret: pinniped-concierge-impersonation-proxy-tls-serving-certificate + impersonationCACertificateSecret: pinniped-concierge-impersonation-proxy-ca-certificate + impersonationSignerSecret: pinniped-concierge-impersonation-proxy-signer-ca-certificate + agentServiceAccount: pinniped-concierge-kube-cert-agent +labels: {"app": "pinniped-concierge"} +kubeCertAgent: + namePrefix: pinniped-concierge-kube-cert-agent- + image: projects.registry.vmware.com/pinniped/pinniped-server:latest diff --git a/go.mod b/go.mod index 9ae466b..d7cf21f 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/homeport/dyff -go 1.20 +go 1.22.0 + +toolchain go1.23.2 require ( github.com/davecgh/go-spew v1.1.1 diff --git a/go.sum b/go.sum index 565ee60..c90b3f8 100644 --- a/go.sum +++ b/go.sum @@ -5,6 +5,7 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= +github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= @@ -26,6 +27,7 @@ github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQuYXQ+6EN5stWmeY/AZqtM8xk9k= github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI= +github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= @@ -44,12 +46,15 @@ github.com/mitchellh/go-ps v1.0.0/go.mod h1:J4lOc8z8yJs6vUwklHw2XEIiT4z4C40KtWVN github.com/mitchellh/hashstructure v1.1.0 h1:P6P1hdjqAAknpY/M1CGipelZgp+4y9ja9kmUZPXP+H0= github.com/mitchellh/hashstructure v1.1.0/go.mod h1:xUDAozZz0Wmdiufv0uyhnHkUTN6/6d8ulp4AwfLKrmA= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= +github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= github.com/onsi/ginkgo/v2 v2.20.1 h1:YlVIbqct+ZmnEph770q9Q7NVAz4wwIiVNahee6JyUzo= github.com/onsi/ginkgo/v2 v2.20.1/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI= github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= @@ -62,6 +67,7 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/texttheater/golang-levenshtein v1.0.1 h1:+cRNoVrfiwufQPhoMzB6N0Yf/Mqajr6t1lOv8GyGE2U= github.com/texttheater/golang-levenshtein v1.0.1/go.mod h1:PYAKrbF5sAiq9wd+H82hs7gNaen0CplQ9uvm6+enD/8= github.com/virtuald/go-ordered-json v0.0.0-20170621173500-b18e6e673d74 h1:JwtAtbp7r/7QSyGz8mKUbYJBg2+6Cd7OjM8o/GNOcVo= @@ -82,10 +88,12 @@ golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/tools v0.25.0 h1:oFU9pkj/iJgs+0DT+VMHrx+oBKs/LJMV+Uvg78sl+fE= golang.org/x/tools v0.25.0/go.mod h1:/vtpO8WL1N9cQC3FN5zPqb//fRXskFHbLKk4OW1Q7rg= google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= +google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/internal/cmd/common.go b/internal/cmd/common.go index 9b0b47c..933a602 100644 --- a/internal/cmd/common.go +++ b/internal/cmd/common.go @@ -97,6 +97,8 @@ func applyReportOptionsFlags(cmd *cobra.Command) { cmd.Flags().BoolVarP(&reportOptions.noTableStyle, "no-table-style", "l", defaults.noTableStyle, "do not place blocks next to each other, always use one row per text block") cmd.Flags().BoolVarP(&reportOptions.doNotInspectCerts, "no-cert-inspection", "x", defaults.doNotInspectCerts, "disable x509 certificate inspection, compare as raw text") cmd.Flags().BoolVarP(&reportOptions.useGoPatchPaths, "use-go-patch-style", "g", defaults.useGoPatchPaths, "use Go-Patch style paths in outputs") + cmd.Flags().Float64VarP(&reportOptions.minorChangeThreshold, "minor-change-threshold", "", defaults.minorChangeThreshold, "minor change threshold") + cmd.Flags().IntVarP(&reportOptions.multilineContextLines, "multi-line-context-lines", "", defaults.multilineContextLines, "multi-line context lines") // Deprecated cmd.Flags().BoolVar(&reportOptions.exitWithCode, "set-exit-status", defaults.exitWithCode, "set program exit code, with 0 meaning no difference, 1 for differences detected, and 255 for program error") diff --git a/pkg/dyff/core.go b/pkg/dyff/core.go index 77c300e..a37d387 100644 --- a/pkg/dyff/core.go +++ b/pkg/dyff/core.go @@ -22,6 +22,7 @@ package dyff import ( "fmt" + "github.com/homeport/dyff/pkg/dyff/rename" "sort" "strings" @@ -136,11 +137,12 @@ func CompareInputFiles(from ytbx.InputFile, to ytbx.InputFile, compareOptions .. from.Documents, from.Names = fromDocs, fromNames to.Documents, to.Names = toDocs, toNames - // Compare the document nodes, in case of an error it will fall back to the default - // implementation and continue to compare the files without any special semantics - if result, err := cmpr.documentNodes(from, to); err == nil { - return Report{from, to, result}, nil + // Compare the document nodes + result, err := cmpr.documentNodes(from, to) + if err != nil { + return Report{}, fmt.Errorf("comparing Kubernetes resources: %w", err) } + return Report{from, to, result}, nil } } @@ -283,8 +285,8 @@ func (compare *compare) documentNodes(from, to ytbx.InputFile) ([]Diff, error) { return nil, err } - removals := []*yamlv3.Node{} - additions := []*yamlv3.Node{} + var removals []doc + var additions []doc for _, name := range fromNames { var fromItem = fromLookUpMap[name] @@ -295,16 +297,14 @@ func (compare *compare) documentNodes(from, to ytbx.InputFile) ([]Diff, error) { followAlias(fromItem.node), followAlias(toItem.node), ) - if err != nil { return nil, err } result = append(result, diffs...) - } else { // `from` contain the `key`, but `to` does not -> removal - removals = append(removals, fromItem.node) + removals = append(removals, fromItem) } } @@ -312,55 +312,88 @@ func (compare *compare) documentNodes(from, to ytbx.InputFile) ([]Diff, error) { var toItem = toLookUpMap[name] if _, ok := fromLookUpMap[name]; !ok { // `to` contains a `key` that `from` does not have -> addition - additions = append(additions, toItem.node) + additions = append(additions, toItem) } } - diff := Diff{Details: []Detail{}} + // Detect content names by heuristic method + detector := newDocumentChanges( + mapSlice(removals, func(d doc) *renameCandidate { + return &renameCandidate{ + path: &ytbx.Path{Root: &from, DocumentIdx: d.idx}, + doc: d.node, + } + }), + mapSlice(additions, func(d doc) *renameCandidate { + return &renameCandidate{ + path: &ytbx.Path{Root: &to, DocumentIdx: d.idx}, + doc: d.node, + } + }), + ) + err = rename.DetectRenames(detector, nil) + if err != nil { + return nil, err + } - if len(removals) > 0 { - diff.Details = append(diff.Details, - Detail{ + // Push rename detection results + for _, modified := range detector.modifiedPairs { + diffs, err := compare.objects( + *modified.to.path, + followAlias(modified.from.doc), + followAlias(modified.to.doc), + ) + if err != nil { + return nil, err + } + result = append(result, diffs...) + + // Exclude from order change calculation + fromNames, _ = reject(fromNames, modified.from.Name()) + toNames, _ = reject(toNames, modified.to.Name()) + } + for _, removal := range detector.deleted { + result = append(result, Diff{ + Path: removal.path, + Details: []Detail{{ Kind: REMOVAL, From: &yamlv3.Node{ Kind: yamlv3.DocumentNode, - Content: removals, + Content: []*yamlv3.Node{removal.doc}, }, To: nil, - }, - ) + }}, + }) } - - if len(additions) > 0 { - diff.Details = append(diff.Details, - Detail{ + for _, addition := range detector.added { + result = append(result, Diff{ + Path: addition.path, + Details: []Detail{{ Kind: ADDITION, From: nil, To: &yamlv3.Node{ Kind: yamlv3.DocumentNode, - Content: additions, + Content: []*yamlv3.Node{addition.doc}, }, - }, - ) + }}, + }) } if !compare.settings.IgnoreOrderChanges && len(fromNames) == len(toNames) { for i := range fromNames { if fromNames[i] != toNames[i] { - diff.Details = append(diff.Details, Detail{ - Kind: ORDERCHANGE, - From: AsSequenceNode(fromNames...), - To: AsSequenceNode(toNames...), + result = append(result, Diff{ + Details: []Detail{{ + Kind: ORDERCHANGE, + From: AsSequenceNode(fromNames...), + To: AsSequenceNode(toNames...), + }}, }) break } } } - if len(diff.Details) > 0 { - result = append([]Diff{diff}, result...) - } - return result, nil } diff --git a/pkg/dyff/output_human.go b/pkg/dyff/output_human.go index d3d9dd2..ffa71a2 100644 --- a/pkg/dyff/output_human.go +++ b/pkg/dyff/output_human.go @@ -164,6 +164,12 @@ func (report *HumanReport) generateHumanDetailOutputAddition(detail Detail) (str var output bytes.Buffer switch detail.To.Kind { + case yamlv3.DocumentNode: + _, _ = fmt.Fprint(&output, yellow("%c %s added:\n", + ADDITION, + text.Plural(len(detail.To.Content), "document"), + )) + case yamlv3.SequenceNode: _, _ = output.WriteString(yellow("%c %s added:\n", ADDITION, diff --git a/pkg/dyff/output_human_test.go b/pkg/dyff/output_human_test.go index 9bece94..e8def1e 100644 --- a/pkg/dyff/output_human_test.go +++ b/pkg/dyff/output_human_test.go @@ -131,6 +131,24 @@ input: |+ true, ) }) + + It("should report each file level change separately for better readability", func() { + compareAgainstExpectedHuman( + assets("kubernetes/multi-docs-file-level/from.yaml"), + assets("kubernetes/multi-docs-file-level/to.yaml"), + assets("kubernetes/multi-docs-file-level/expected-dyff.human"), + false, + ) + }) + + It("should detect renames for kubernetes documents", func() { + compareAgainstExpectedHuman( + assets("kubernetes/rename/from.yaml"), + assets("kubernetes/rename/to.yaml"), + assets("kubernetes/rename/expected-dyff.human"), + false, + ) + }) }) Context("nicely colored human readable differences", func() { diff --git a/pkg/dyff/rename/index.go b/pkg/dyff/rename/index.go new file mode 100644 index 0000000..8baff9a --- /dev/null +++ b/pkg/dyff/rename/index.go @@ -0,0 +1,290 @@ +package rename + +import ( + "errors" + "io" + "sort" +) + +const ( + keyShift = 32 + maxCountValue = (1 << keyShift) - 1 +) + +var errIndexFull = errors.New("index is full") + +// similarityIndex is an index structure of lines/blocks in one file. +// This structure can be used to compute an approximation of the similarity +// between two files. +// To save space in memory, this index uses a space efficient encoding which +// will not exceed 1MiB per instance. The index starts out at a smaller size +// (closer to 2KiB), but may grow as more distinct blocks within the scanned +// file are discovered. +// see: https://github.com/eclipse/jgit/blob/master/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java +type similarityIndex struct { + hashed uint64 + // number of non-zero entries in hashes + numHashes int + growAt int + hashes []keyCountPair + hashBits int +} + +func fileSimilarityIndex(f File) (*similarityIndex, error) { + idx := newSimilarityIndex() + if err := idx.hash(f); err != nil { + return nil, err + } + + sort.Stable(keyCountPairs(idx.hashes)) + + return idx, nil +} + +func newSimilarityIndex() *similarityIndex { + return &similarityIndex{ + hashBits: 8, + hashes: make([]keyCountPair, 1<<8), + growAt: shouldGrowAt(8), + } +} + +func (i *similarityIndex) hash(f File) error { + r, err := f.Reader() + if err != nil { + return err + } + + defer checkClose(r, &err) + + size, err := f.Size() + if err != nil { + return err + } + return i.hashContent(r, size) +} + +func (i *similarityIndex) hashContent(r io.Reader, size int64) error { + var buf = make([]byte, 4096) + var ptr, cnt int + remaining := size + + for 0 < remaining { + hash := 5381 + var blockHashedCnt uint64 + + // Hash one line or block, whatever happens first + n := int64(0) + for { + if ptr == cnt { + ptr = 0 + var err error + cnt, err = io.ReadFull(r, buf) + if err != nil && !errors.Is(err, io.ErrUnexpectedEOF) { + return err + } + + if cnt == 0 { + return io.EOF + } + } + n++ + c := buf[ptr] & 0xff + ptr++ + + // Ignore CR in CRLF sequence + if c == '\r' && ptr < cnt && buf[ptr] == '\n' { + continue + } + blockHashedCnt++ + + if c == '\n' { + break + } + + hash = (hash << 5) + hash + int(c) + + if n >= 64 || n >= remaining { + break + } + } + i.hashed += blockHashedCnt + if err := i.add(hash, blockHashedCnt); err != nil { + return err + } + remaining -= n + } + + return nil +} + +// score computes the similarity score between this index and another one. +// A region of a file is defined as a line in a text file or a fixed-size +// block in a binary file. To prepare an index, each region in the file is +// hashed; the values and counts of hashes are retained in a sorted table. +// Define the similarity fraction F as the count of matching regions between +// the two files divided between the maximum count of regions in either file. +// The similarity score is F multiplied by the maxScore constant, yielding a +// range [0, maxScore]. It is defined as maxScore for the degenerate case of +// two empty files. +// The similarity score is symmetrical; i.e. a.score(b) == b.score(a). +func (i *similarityIndex) score(other *similarityIndex, maxScore int) int { + var maxHashed = i.hashed + if maxHashed < other.hashed { + maxHashed = other.hashed + } + if maxHashed == 0 { + return maxScore + } + + return int(i.common(other) * uint64(maxScore) / maxHashed) +} + +func (i *similarityIndex) common(dst *similarityIndex) uint64 { + srcIdx, dstIdx := 0, 0 + if i.numHashes == 0 || dst.numHashes == 0 { + return 0 + } + + var common uint64 + srcKey, dstKey := i.hashes[srcIdx].key(), dst.hashes[dstIdx].key() + + for { + if srcKey == dstKey { + srcCnt, dstCnt := i.hashes[srcIdx].count(), dst.hashes[dstIdx].count() + if srcCnt < dstCnt { + common += srcCnt + } else { + common += dstCnt + } + + srcIdx++ + if srcIdx == len(i.hashes) { + break + } + srcKey = i.hashes[srcIdx].key() + + dstIdx++ + if dstIdx == len(dst.hashes) { + break + } + dstKey = dst.hashes[dstIdx].key() + } else if srcKey < dstKey { + // Region of src that is not in dst + srcIdx++ + if srcIdx == len(i.hashes) { + break + } + srcKey = i.hashes[srcIdx].key() + } else { + // Region of dst that is not in src + dstIdx++ + if dstIdx == len(dst.hashes) { + break + } + dstKey = dst.hashes[dstIdx].key() + } + } + + return common +} + +func (i *similarityIndex) add(key int, cnt uint64) error { + key = int(uint32(key) * 0x9e370001 >> 1) + + j := i.slot(key) + for { + v := i.hashes[j] + if v == 0 { + // It's an empty slot, so we can store it here. + if i.growAt <= i.numHashes { + if err := i.grow(); err != nil { + return err + } + j = i.slot(key) + continue + } + + var err error + i.hashes[j], err = newKeyCountPair(key, cnt) + if err != nil { + return err + } + i.numHashes++ + return nil + } else if v.key() == key { + // It's the same key, so increment the counter. + var err error + i.hashes[j], err = newKeyCountPair(key, v.count()+cnt) + return err + } else if j+1 >= len(i.hashes) { + j = 0 + } else { + j++ + } + } +} + +type keyCountPair uint64 + +func newKeyCountPair(key int, cnt uint64) (keyCountPair, error) { + if cnt > maxCountValue { + return 0, errIndexFull + } + + return keyCountPair((uint64(key) << keyShift) | cnt), nil +} + +func (p keyCountPair) key() int { + return int(p >> keyShift) +} + +func (p keyCountPair) count() uint64 { + return uint64(p) & maxCountValue +} + +func (i *similarityIndex) slot(key int) int { + // We use 31 - hashBits because the upper bit was already forced + // to be 0 and we want the remaining high bits to be used as the + // table slot. + return int(uint32(key) >> uint(31-i.hashBits)) +} + +func shouldGrowAt(hashBits int) int { + return (1 << uint(hashBits)) * (hashBits - 3) / hashBits +} + +func (i *similarityIndex) grow() error { + if i.hashBits == 30 { + return errIndexFull + } + + old := i.hashes + + i.hashBits++ + i.growAt = shouldGrowAt(i.hashBits) + + // TODO: find a way to check if it will OOM and return errIndexFull instead. + i.hashes = make([]keyCountPair, 1<= len(i.hashes) { + j = 0 + } + } + i.hashes[j] = v + } + } + + return nil +} + +type keyCountPairs []keyCountPair + +func (p keyCountPairs) Len() int { return len(p) } +func (p keyCountPairs) Swap(i, j int) { p[i], p[j] = p[j], p[i] } +func (p keyCountPairs) Less(i, j int) bool { return p[i] < p[j] } diff --git a/pkg/dyff/rename/rename.go b/pkg/dyff/rename/rename.go new file mode 100644 index 0000000..eec4fdf --- /dev/null +++ b/pkg/dyff/rename/rename.go @@ -0,0 +1,300 @@ +// Package rename contains modified code from go-git's rename detection logic. +// https://github.com/go-git/go-git/blob/master/plumbing/object/rename.go +// +// go-git is licensed under Apache License 2.0, and you may obtain a copy of their original code and license from: +// https://github.com/go-git/go-git +package rename + +import ( + "errors" + "io" + "sort" + "strings" +) + +type DetectOptions struct { + // RenameScore is the threshold to of similarity between files to consider + // that a pair of delete and insert are a rename. The number must be + // exactly between 0 and 100. + RenameScore uint + // RenameLimit is the maximum amount of files that can be compared when + // detecting renames. The number of comparisons that have to be performed + // is equal to the number of deleted files * the number of added files. + // That means, that if 100 files were deleted and 50 files were added, 5000 + // file comparisons may be needed. So, if the rename limit is 50, the number + // of both deleted and added needs to be equal or less than 50. + // A value of 0 means no limit. + RenameLimit uint +} + +// DefaultDetectOptions are the default and recommended options. +var DefaultDetectOptions = &DetectOptions{ + RenameScore: 60, + RenameLimit: 50, +} + +type Changes interface { + Deleted() []File + Added() []File + + MarkAsRename(deleted, added File) error +} + +type File interface { + Name() string + Reader() (io.ReadCloser, error) + Size() (int64, error) +} + +// DetectRenames detects the renames in the given changes on two trees with +// the given options. It will return the given changes grouping additions and +// deletions into modifications when possible. +// If options is nil, the default diff tree options will be used. +func DetectRenames( + changes Changes, + opts *DetectOptions, +) error { + if opts == nil { + opts = DefaultDetectOptions + } + + detector := &renameDetector{ + c: changes, + deleted: changes.Deleted(), + added: changes.Added(), + renameScore: int(opts.RenameScore), + renameLimit: int(opts.RenameLimit), + } + + return detector.detect() +} + +// renameDetector will detect and resolve renames in a set of changes. +// see: https://github.com/eclipse/jgit/blob/master/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java +type renameDetector struct { + c Changes + deleted []File + added []File + + renameScore int + renameLimit int +} + +func (d *renameDetector) detect() error { + if len(d.added) > 0 && len(d.deleted) > 0 { + return d.detectContentRenames() + } + return nil +} + +// detectContentRenames detects renames based on the similarity of the content +// in the files by building a matrix of pairs between sources and destinations +// and matching by the highest score. +// see: https://github.com/eclipse/jgit/blob/master/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java +func (d *renameDetector) detectContentRenames() error { + cnt := max(len(d.added), len(d.deleted)) + if d.renameLimit > 0 && cnt > d.renameLimit { + return nil + } + + srcs, dsts := d.deleted, d.added + matrix, err := buildSimilarityMatrix(srcs, dsts, d.renameScore) + if err != nil { + return err + } + + // Match rename pairs on a first-come-first-serve basis until + // we have looked at everything that is above the minimum score. + for i := len(matrix) - 1; i >= 0; i-- { + pair := matrix[i] + src := srcs[pair.deleted] + dst := dsts[pair.added] + + if dst == nil || src == nil { + // It was already matched before + continue + } + + if err = d.c.MarkAsRename(src, dst); err != nil { + return err + } + + // Mark as matched + srcs[pair.deleted] = nil + dsts[pair.added] = nil + } + return nil +} + +func nameSimilarityScore(a, b string) int { + aDirLen := strings.LastIndexByte(a, '/') + 1 + bDirLen := strings.LastIndexByte(b, '/') + 1 + + dirMin := min(aDirLen, bDirLen) + dirMax := max(aDirLen, bDirLen) + + var dirScoreLtr, dirScoreRtl int + if dirMax == 0 { + dirScoreLtr = 100 + dirScoreRtl = 100 + } else { + var dirSim int + + for ; dirSim < dirMin; dirSim++ { + if a[dirSim] != b[dirSim] { + break + } + } + + dirScoreLtr = dirSim * 100 / dirMax + + if dirScoreLtr == 100 { + dirScoreRtl = 100 + } else { + for dirSim = 0; dirSim < dirMin; dirSim++ { + if a[aDirLen-1-dirSim] != b[bDirLen-1-dirSim] { + break + } + } + dirScoreRtl = dirSim * 100 / dirMax + } + } + + fileMin := min(len(a)-aDirLen, len(b)-bDirLen) + fileMax := max(len(a)-aDirLen, len(b)-bDirLen) + + fileSim := 0 + for ; fileSim < fileMin; fileSim++ { + if a[len(a)-1-fileSim] != b[len(b)-1-fileSim] { + break + } + } + fileScore := fileSim * 100 / fileMax + + return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100 +} + +type similarityMatrix []similarityPair + +func (m similarityMatrix) Len() int { return len(m) } +func (m similarityMatrix) Swap(i, j int) { m[i], m[j] = m[j], m[i] } +func (m similarityMatrix) Less(i, j int) bool { + if m[i].score == m[j].score { + if m[i].added == m[j].added { + return m[i].deleted < m[j].deleted + } + return m[i].added < m[j].added + } + return m[i].score < m[j].score +} + +type similarityPair struct { + // index of the added file + added int + // index of the deleted file + deleted int + // similarity score + score int +} + +const maxMatrixSize = 10000 + +func buildSimilarityMatrix(srcs, dsts []File, renameScore int) (similarityMatrix, error) { + // Allocate for the worst-case scenario where every pair has a score + // that we need to consider. We might not need that many. + matrixSize := len(srcs) * len(dsts) + if matrixSize > maxMatrixSize { + matrixSize = maxMatrixSize + } + matrix := make(similarityMatrix, 0, matrixSize) + srcSizes := make([]int64, len(srcs)) + dstSizes := make([]int64, len(dsts)) + dstIndices := make([]*similarityIndex, len(dsts)) + dstTooLarge := make(map[int]bool) + + // Consider each pair of files, if the score is above the minimum + // threshold we need to record that scoring in the matrix so we can + // later find the best matches. +outerLoop: + for srcIdx, src := range srcs { + // Declare the from file and the similarity index here to be able to + // reuse it inside the inner loop. The reason to not initialize them + // here is so we can skip the initialization in case they happen to + // not be needed later. They will be initialized inside the inner + // loop if and only if they're needed and reused in subsequent passes. + var s *similarityIndex + var err error + for dstIdx, dst := range dsts { + if dstTooLarge[dstIdx] { + continue + } + + srcSize := srcSizes[srcIdx] + if srcSize == 0 { + srcSize, err = src.Size() + if err != nil { + return nil, err + } + srcSize += 1 + srcSizes[srcIdx] = srcSize + } + + dstSize := dstSizes[dstIdx] + if dstSize == 0 { + dstSize, err = dst.Size() + if err != nil { + return nil, err + } + dstSize += 1 + dstSizes[dstIdx] = dstSize + } + + minSize := min(srcSize, dstSize) + maxSize := max(srcSize, dstSize) + + if int(minSize*100/maxSize) < renameScore { + // File sizes are too different to be a match + continue + } + + if s == nil { + s, err = fileSimilarityIndex(src) + if err != nil { + if errors.Is(err, errIndexFull) { + continue outerLoop + } + return nil, err + } + } + + di := dstIndices[dstIdx] + if di == nil { + di, err = fileSimilarityIndex(dst) + if err != nil { + if errors.Is(err, errIndexFull) { + dstTooLarge[dstIdx] = true + continue + } + return nil, err + } + dstIndices[dstIdx] = di + } + + contentScore := s.score(di, 10000) + // The name score returns a value between 0 and 100, so we need to + // convert it to the same range as the content score. + nameScore := nameSimilarityScore(src.Name(), dst.Name()) * 100 + score := (contentScore*99 + nameScore*1) / 10000 + + if score < renameScore { + continue + } + + matrix = append(matrix, similarityPair{added: dstIdx, deleted: srcIdx, score: score}) + } + } + + sort.Stable(matrix) + + return matrix, nil +} diff --git a/pkg/dyff/rename/util.go b/pkg/dyff/rename/util.go new file mode 100644 index 0000000..5389afb --- /dev/null +++ b/pkg/dyff/rename/util.go @@ -0,0 +1,12 @@ +package rename + +import "io" + +// checkClose calls Close on the given io.Closer. If the given *error points to +// nil, it will be assigned the error returned by Close. Otherwise, any error +// returned by Close will be ignored. checkClose is usually called with defer. +func checkClose(c io.Closer, err *error) { + if cerr := c.Close(); cerr != nil && *err == nil { + *err = cerr + } +} diff --git a/pkg/dyff/rename_detect.go b/pkg/dyff/rename_detect.go new file mode 100644 index 0000000..8e0a75e --- /dev/null +++ b/pkg/dyff/rename_detect.go @@ -0,0 +1,110 @@ +package dyff + +import ( + "bytes" + "errors" + "github.com/gonvenience/ytbx" + "github.com/homeport/dyff/pkg/dyff/rename" + yamlv3 "gopkg.in/yaml.v3" + "io" +) + +func mapSlice[E any, S ~[]E, T any](slice S, fn func(e E) T) []T { + ret := make([]T, len(slice)) + for i, e := range slice { + ret[i] = fn(e) + } + return ret +} + +func reject[E comparable, S ~[]E](slice S, elt E) (ret S, ok bool) { + ret = make(S, 0, len(slice)) + for _, e := range slice { + if elt == e { + ok = true + } else { + ret = append(ret, e) + } + } + return +} + +type modifiedPair struct { + from *renameCandidate + to *renameCandidate +} + +type documentChanges struct { + deleted []*renameCandidate + added []*renameCandidate + + modifiedPairs []modifiedPair +} + +func newDocumentChanges(deleted []*renameCandidate, added []*renameCandidate) *documentChanges { + return &documentChanges{ + deleted: deleted, + added: added, + } +} + +func (d *documentChanges) Deleted() []rename.File { + return mapSlice(d.deleted, func(r *renameCandidate) rename.File { return r }) +} + +func (d *documentChanges) Added() []rename.File { + return mapSlice(d.added, func(r *renameCandidate) rename.File { return r }) +} + +func (d *documentChanges) MarkAsRename(deleted, added rename.File) error { + var ok bool + d.deleted, ok = reject(d.deleted, deleted.(*renameCandidate)) + if !ok { + return errors.New("deleted element not found") + } + d.added, ok = reject(d.added, added.(*renameCandidate)) + if !ok { + return errors.New("added element not found") + } + d.modifiedPairs = append(d.modifiedPairs, modifiedPair{ + from: deleted.(*renameCandidate), + to: added.(*renameCandidate), + }) + return nil +} + +type renameCandidate struct { + path *ytbx.Path + doc *yamlv3.Node + + content []byte +} + +func (r *renameCandidate) Name() string { + name, _ := k8sItem.Name(r.doc) + return name +} + +func (r *renameCandidate) Reader() (io.ReadCloser, error) { + if r.content == nil { + if err := r.marshal(); err != nil { + return nil, err + } + } + return io.NopCloser(bytes.NewReader(r.content)), nil +} + +func (r *renameCandidate) Size() (int64, error) { + if r.content == nil { + if err := r.marshal(); err != nil { + return 0, err + } + } + return int64(len(r.content)), nil +} + +func (r *renameCandidate) marshal() error { + var err error + r.content, err = yamlv3.Marshal(r.doc) + return err +}