From bdddf29c2dffc5a4fb35c1eb6e8cd0d1a14d5255 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 21 Nov 2024 11:01:57 +0100 Subject: [PATCH] keep original key/value pair during de-duplicatation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suppose the same key/value pair gets logged multiple times: WithValues("sameKey", 1).WithValues("newKey", 2).WithValues("sameKey", 1) Previously, the de-duplication code would have emitted it near the end: "hello world" newKey=2 sameKey=1 Now, it gets emitted at the place where it first appeared: "hello world" sameKey=1 newKey=2 This makes log entries more consistent when some repeat the key/value pair and others don't. Performance is about the same as before. goos: linux goarch: amd64 pkg: k8s.io/klog/examples/benchmarks cpu: Intel(R) Core(TM) i9-7980XE CPU @ 2.60GHz │ after1 │ after2 │ │ sec/op │ sec/op vs base │ ... geomean 3.442µ 3.445µ +0.09% pkg: k8s.io/klog/examples/output_test geomean 5.646µ 5.631µ -0.26% pkg: k8s.io/klog/v2 geomean 260.6n 260.5n -0.07% pkg: k8s.io/klog/v2/textlogger TextloggerOutput/odd_WithValues-36 6.956µ ± 4% 7.263µ ± 4% +4.42% (p=0.041 n=6) ... TextloggerOutput/log_with_multiple_names_and_values-36 3.353µ ± 4% 3.172µ ± 2% -5.40% (p=0.002 n=6) ... TextloggerOutput/duplicates-36 17.06µ ± 4% 17.16µ ± 9% ~ (p=0.937 n=6) ... TextloggerOutput/mixed_duplicates-36 19.59µ ± 7% 20.35µ ± 5% ~ (p=0.065 n=6) ... TextloggerOutput/print_duplicate_keys_in_arguments-36 2.388µ ± 6% 2.218µ ± 7% -7.12% (p=0.026 n=6) ... geomean 2.860µ 2.849µ -0.38% --- go.mod | 2 +- internal/serialize/keyvalues.go | 29 +++++++++++++++++++---------- test/output.go | 4 ++-- test/zapr.go | 8 ++++---- 4 files changed, 26 insertions(+), 17 deletions(-) diff --git a/go.mod b/go.mod index c0a06788..ced285f9 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,5 @@ module k8s.io/klog/v2 -go 1.18 +go 1.21 require github.com/go-logr/logr v1.4.1 diff --git a/internal/serialize/keyvalues.go b/internal/serialize/keyvalues.go index 313fc9f1..79287679 100644 --- a/internal/serialize/keyvalues.go +++ b/internal/serialize/keyvalues.go @@ -20,6 +20,7 @@ import ( "bytes" "encoding/json" "fmt" + "slices" "strconv" "github.com/go-logr/logr" @@ -144,16 +145,24 @@ func (f Formatter) formatKVs(b *bytes.Buffer, kvs ...[]interface{}) { e.end = b.Len() i := findObsoleteEntry(existing, e.key) if i >= 0 { - // The old entry gets obsoleted. This ensures - // that if the more recent one has a different value, - // that value remains. If the value is the same, - // then we could also keep the old entry. That - // would lead to a more natural order of key/value - // pairs in the output (shared key/value in multiple - // log entries always at the beginning) but at the - // cost of another memory comparison. - obsolete = append(obsolete, existing[i].interval) - existing[i].interval = e.interval + data := b.Bytes() + if bytes.Compare(data[existing[i].start:existing[i].end], data[e.start:e.end]) == 0 { + // The new entry gets obsoleted because it's identical. + // This has the advantage that key/value pairs from + // a WithValues call always come first, even if the same + // pair gets added again later. This makes different log + // entries more consistent. + // + // The new entry has a higher start index and thus can be appended. + obsolete = append(obsolete, e.interval) + } else { + // The old entry gets obsoleted because it's value is different. + // + // Sort order is not guaranteed, we have to insert at the right place. + index, _ := slices.BinarySearchFunc(obsolete, existing[i].interval, func(a, b interval) int { return a.start - b.start }) + obsolete = slices.Insert(obsolete, index, existing[i].interval) + existing[i].interval = e.interval + } } else { // Instead of appending at the end and doing a // linear search in findEntry, we could keep diff --git a/test/output.go b/test/output.go index 40994f42..5c12cea6 100644 --- a/test/output.go +++ b/test/output.go @@ -446,9 +446,9 @@ I output.go:] "duplicates" trace="101112131415161718191a1b1c1d1e1f" span=" text: "duplicates", expectedOutput: `I output.go:] "duplicates" trace="101112131415161718191a1b1c1d1e1f" span="0102030405060708" a=1 -I output.go:] "duplicates" a=1 b=2 trace="101112131415161718191a1b1c1d1e1f" span="1112131415161718" +I output.go:] "duplicates" trace="101112131415161718191a1b1c1d1e1f" a=1 b=2 span="1112131415161718" I output.go:] "duplicates" trace="101112131415161718191a1b1c1d1e1f" span="0102030405060708" a=1 -I output.go:] "duplicates" a=1 c=3 trace="101112131415161718191a1b1c1d1e1f" span="2122232425262728" d=4 +I output.go:] "duplicates" trace="101112131415161718191a1b1c1d1e1f" a=1 c=3 span="2122232425262728" d=4 `, }, } diff --git a/test/zapr.go b/test/zapr.go index bc9e03ab..1e669fa5 100644 --- a/test/zapr.go +++ b/test/zapr.go @@ -275,9 +275,9 @@ I output.go:] "duplicates" trace="101112131415161718191a1b1c1d1e1f" span=" `, `I output.go:] "duplicates" trace="101112131415161718191a1b1c1d1e1f" span="0102030405060708" a=1 -I output.go:] "duplicates" a=1 b=2 trace="101112131415161718191a1b1c1d1e1f" span="1112131415161718" +I output.go:] "duplicates" trace="101112131415161718191a1b1c1d1e1f" a=1 b=2 span="1112131415161718" I output.go:] "duplicates" trace="101112131415161718191a1b1c1d1e1f" span="0102030405060708" a=1 -I output.go:] "duplicates" a=1 c=3 trace="101112131415161718191a1b1c1d1e1f" span="2122232425262728" d=4 +I output.go:] "duplicates" trace="101112131415161718191a1b1c1d1e1f" a=1 c=3 span="2122232425262728" d=4 `: `{"caller":"test/output.go:","msg":"duplicates","trace":"101112131415161718191a1b1c1d1e1f","span":"0102030405060708","a":1,"v":0} {"caller":"test/output.go:","msg":"duplicates","trace":"101112131415161718191a1b1c1d1e1f","span":"0102030405060708","a":1,"b":2,"trace":"101112131415161718191a1b1c1d1e1f","span":"1112131415161718","v":0} {"caller":"test/output.go:","msg":"duplicates","trace":"101112131415161718191a1b1c1d1e1f","span":"0102030405060708","a":1,"v":0} @@ -383,9 +383,9 @@ I output.go:] "duplicates" trace="101112131415161718191a1b1c1d1e1f" span=" `, `I output.go:] "duplicates" trace="101112131415161718191a1b1c1d1e1f" span="0102030405060708" a=1 -I output.go:] "duplicates" a=1 b=2 trace="101112131415161718191a1b1c1d1e1f" span="1112131415161718" +I output.go:] "duplicates" trace="101112131415161718191a1b1c1d1e1f" a=1 b=2 span="1112131415161718" I output.go:] "duplicates" trace="101112131415161718191a1b1c1d1e1f" span="0102030405060708" a=1 -I output.go:] "duplicates" a=1 c=3 trace="101112131415161718191a1b1c1d1e1f" span="2122232425262728" d=4 +I output.go:] "duplicates" trace="101112131415161718191a1b1c1d1e1f" a=1 c=3 span="2122232425262728" d=4 `: `{"caller":"test/output.go:","msg":"duplicates","v":0,"trace":"101112131415161718191a1b1c1d1e1f","span":"0102030405060708","a":1} {"caller":"test/output.go:","msg":"duplicates","v":0,"trace":"101112131415161718191a1b1c1d1e1f","span":"0102030405060708","a":1,"b":2,"trace":"101112131415161718191a1b1c1d1e1f","span":"1112131415161718"} {"caller":"test/output.go:","msg":"duplicates","v":0,"trace":"101112131415161718191a1b1c1d1e1f","span":"0102030405060708","a":1}