Skip to content

Commit

Permalink
de-duplicate all key/value pairs
Browse files Browse the repository at this point in the history
Previously, de-duplication was limited such that keys passed to a log call as
parameters overwrote keys passed earlier to WithValues. Using the same key in
different WithValues calls or multiple times in a WithValues call or the
parameters led to duplicates in the output.

Now all key/value pairs get checked for duplicates. The last one wins and is
printed at the place where it was added. A potential improvement would be to
keep it at the place where it first appeared if the value is the same because
then different log entries are more likely to have it at the beginning of their
key/value pairs because it was added by a common WithValues call.

Because the new implementation avoids memory allocations, the performance
impact is minimal. Some cases even become faster.

    goos: linux
    goarch: amd64
    pkg: k8s.io/klog/examples/benchmarks
    cpu: Intel(R) Core(TM) i9-7980XE CPU @ 2.60GHz
                                                                            │    before     │               after1               │
                                                                            │    sec/op     │    sec/op     vs base              │
    Output/klog/fail-verbosity-non-standard-struct-key-check/objects/0-36      385.5n ±  9%   358.9n ±  5%  -6.91% (p=0.041 n=6)
    Output/klog/fail-verbosity-non-standard-struct-key-check/objects/10-36     374.3n ±  4%   357.3n ±  5%  -4.55% (p=0.015 n=6)
    Output/klog/fail-verbosity-non-standard-struct-key-check/objects/20-36     388.4n ±  3%   359.2n ±  4%  -7.53% (p=0.002 n=6)
    ...
    Output/zapr/fail-verbosity-non-standard-int-key-check/objects/90-36        380.9n ±  4%   379.4n ±  2%       ~ (p=0.937 n=6)
    Output/zapr/fail-verbosity-non-standard-int-key-check/objects/100-36       375.1n ±  3%   381.9n ±  4%       ~ (p=0.093 n=6)
    geomean                                                                    3.461µ         3.442µ        -0.55%

    pkg: k8s.io/klog/examples/output_test

    ZaprOutput/regular_error_types_as_value-36                                 3.270µ ± 10%   3.530µ ±  9%       ~ (p=0.132 n=6)
    ZaprOutput/struct_values-36                                                3.932µ ± 11%   4.107µ ± 10%       ~ (p=1.000 n=6)
    ZaprOutput/klog.Format-36                                                  4.276µ ± 12%   4.439µ ±  7%       ~ (p=0.132 n=6)
    ZaprOutput/regular_error_types_when_using_logr.Error-36                    2.790µ ±  7%   2.834µ ± 15%       ~ (p=0.699 n=6)
    ZaprOutput/map_values-36                                                   5.802µ ±  9%   5.829µ ±  3%       ~ (p=1.000 n=6)
    ZaprOutput/log_with_name_and_values-36                                     4.051µ ±  4%   3.864µ ±  6%  -4.63% (p=0.015 n=6)
    ...
    geomean                                                                    5.581µ         5.646µ        +1.16%

    pkg: k8s.io/klog/v2

    Header-36                                                                   1.339µ ±  5%    1.311µ ±  6%        ~ (p=0.513 n=6)
    ...
    KlogOutput/klog.Format-36                                                   4.207µ ± 10%    4.667µ ±  3%  +10.95% (p=0.009 n=6)
    KlogOutput/cyclic_list-36                                                   151.3µ ±  1%    151.1µ ±  1%        ~ (p=0.818 n=6)
    KlogOutput/override_single_value-36                                         2.308µ ±  9%    3.501µ ± 15%  +51.66% (p=0.002 n=6)
    KlogOutput/multiple_WithValues-36                                           10.91µ ±  9%    12.46µ ±  7%  +14.16% (p=0.004 n=6)
    ...
    KlogOutput/duplicates-36                                                    21.34µ ±  2%    22.52µ ±  7%        ~ (p=0.065 n=6)
    ...
    geomean                                                                     252.6n          260.6n         +3.18%

    pkg: k8s.io/klog/v2/textlogger

    TextloggerOutput/html_characters-36                                              1.982µ ± 12%   1.978µ ±  6%        ~ (p=0.937 n=6)
    TextloggerOutput/map_values-36                                                   3.215µ ±  6%   3.429µ ±  4%   +6.67% (p=0.041 n=6)
    ...
    TextloggerOutput/mixed_duplicates-36                                             19.33µ ±  5%   19.59µ ±  7%        ~ (p=0.818 n=6)
    ...
    TextloggerOutput/print_duplicate_keys_in_arguments-36                            2.067µ ±  7%   2.388µ ±  6%  +15.53% (p=0.002 n=6)
    ...
    TextloggerOutput/duplicates-36                                                   16.80µ ± 11%   17.06µ ±  4%        ~ (p=0.310 n=6)
    ...
    geomean                                                                          2.798µ         2.860µ         +2.24%
  • Loading branch information
pohly committed Nov 21, 2024
1 parent 48fdaa7 commit b856257
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 118 deletions.
183 changes: 116 additions & 67 deletions internal/serialize/keyvalues.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,24 +71,8 @@ func MergeKVs(first, second []interface{}) []interface{} {
return second
}

// Determine which keys are in the second slice so that we can skip
// them when iterating over the first one. The code intentionally
// favors performance over completeness: we assume that keys are string
// constants and thus compare equal when the string values are equal. A
// string constant being overridden by, for example, a fmt.Stringer is
// not handled.
overrides := map[interface{}]bool{}
for i := 0; i < len(second); i += 2 {
overrides[second[i]] = true
}
merged := make([]interface{}, 0, maxLength)
for i := 0; i+1 < len(first); i += 2 {
key := first[i]
if overrides[key] {
continue
}
merged = append(merged, key, first[i+1])
}
merged = append(merged, first...)
merged = append(merged, second...)
if len(merged)%2 != 0 {
merged = append(merged, missingValue)
Expand All @@ -105,46 +89,7 @@ type AnyToStringFunc func(v interface{}) string
// MergeKVsInto is a variant of MergeKVs which directly formats the key/value
// pairs into a buffer.
func (f Formatter) MergeAndFormatKVs(b *bytes.Buffer, first, second []interface{}) {
if len(first) == 0 && len(second) == 0 {
// Nothing to do at all.
return
}

if len(first) == 0 && len(second)%2 == 0 {
// Nothing to be overridden, second slice is well-formed
// and can be used directly.
for i := 0; i < len(second); i += 2 {
f.KVFormat(b, second[i], second[i+1])
}
return
}

// Determine which keys are in the second slice so that we can skip
// them when iterating over the first one. The code intentionally
// favors performance over completeness: we assume that keys are string
// constants and thus compare equal when the string values are equal. A
// string constant being overridden by, for example, a fmt.Stringer is
// not handled.
overrides := map[interface{}]bool{}
for i := 0; i < len(second); i += 2 {
overrides[second[i]] = true
}
for i := 0; i < len(first); i += 2 {
key := first[i]
if overrides[key] {
continue
}
f.KVFormat(b, key, first[i+1])
}
// Round down.
l := len(second)
l = l / 2 * 2
for i := 1; i < l; i += 2 {
f.KVFormat(b, second[i-1], second[i])
}
if len(second)%2 == 1 {
f.KVFormat(b, second[len(second)-1], missingValue)
}
f.formatKVs(b, first, second)
}

func MergeAndFormatKVs(b *bytes.Buffer, first, second []interface{}) {
Expand All @@ -156,16 +101,7 @@ const missingValue = "(MISSING)"
// KVListFormat serializes all key/value pairs into the provided buffer.
// A space gets inserted before the first pair and between each pair.
func (f Formatter) KVListFormat(b *bytes.Buffer, keysAndValues ...interface{}) {
for i := 0; i < len(keysAndValues); i += 2 {
var v interface{}
k := keysAndValues[i]
if i+1 < len(keysAndValues) {
v = keysAndValues[i+1]
} else {
v = missingValue
}
f.KVFormat(b, k, v)
}
f.formatKVs(b, keysAndValues)
}

func KVListFormat(b *bytes.Buffer, keysAndValues ...interface{}) {
Expand All @@ -176,6 +112,119 @@ func KVFormat(b *bytes.Buffer, k, v interface{}) {
Formatter{}.KVFormat(b, k, v)
}

// formatKVs formats all key/value pairs such that the output contains no
// duplicates ("last one wins").
func (f Formatter) formatKVs(b *bytes.Buffer, kvs ...[]interface{}) {
// De-duplication is done by optimistically formatting all key value
// pairs and then cutting out the output of those key/value pairs which
// got overwritten later.
//
// In the common case of no duplicates, the only overhead is tracking
// previous keys. This uses a slice with a simple linear search because
// the number of entries is typically so low that allocating a map or
// keeping a sorted slice with binary search aren't justified.
//
// Using a fixed size here makes the Go compiler use the stack as
// initial backing store for the slice, which is crucial for
// performance.
existing := make([]obsoleteKV, 0, 32)
obsolete := make([]interval, 0, 32) // Sorted by start index.
for _, keysAndValues := range kvs {
for i := 0; i < len(keysAndValues); i += 2 {
var v interface{}
k := keysAndValues[i]
if i+1 < len(keysAndValues) {
v = keysAndValues[i+1]
} else {
v = missingValue
}
var e obsoleteKV
e.start = b.Len()
e.key = f.KVFormat(b, k, v)
e.end = b.Len()
i := findObsoleteEntry(existing, e.key)
if i >= 0 {
// The old entry gets obsoleted. This ensures
// that if the more recent one has a different value,
// that value remains. If the value is the same,
// then we could also keep the old entry. That
// would lead to a more natural order of key/value
// pairs in the output (shared key/value in multiple
// log entries always at the beginning) but at the
// cost of another memory comparison.
obsolete = append(obsolete, existing[i].interval)
existing[i].interval = e.interval
} else {
// Instead of appending at the end and doing a
// linear search in findEntry, we could keep
// the slice sorted by key and do a binary search.
//
// Above:
// i, ok := slices.BinarySearchFunc(existing, e, func(a, b entry) int { return strings.Compare(a.key, b.key) })
// Here:
// existing = slices.Insert(existing, i, e)
//
// But that adds a dependency on the slices package
// and made performance slightly worse, presumably
// because the cost of shifting entries around
// did not pay of with faster lookups.
existing = append(existing, e)
}
}
}

// If we need to remove some obsolete key/value pairs then move the memory.
if len(obsolete) > 0 {
// Potentially the next remaining output (might itself be obsolete).
from := obsolete[0].end
// Next obsolete entry.
nextObsolete := 1
// This is the source buffer, before truncation.
all := b.Bytes()
b.Truncate(obsolete[0].start)

for nextObsolete < len(obsolete) {
if from == obsolete[nextObsolete].start {
// Skip also the next obsolete key/value.
from = obsolete[nextObsolete].end
nextObsolete++
continue
}

// Preserve some output. Write uses copy, which
// explicitly allows source and destination to overlap.
// That could happen here.
valid := all[from:obsolete[nextObsolete].start]
b.Write(valid)
from = obsolete[nextObsolete].end
nextObsolete++
}
// Copy end of buffer.
valid := all[from:]
b.Write(valid)
}
}

type obsoleteKV struct {
key string
interval
}

// interval includes the start and excludes the end.
type interval struct {
start int
end int
}

func findObsoleteEntry(entries []obsoleteKV, key string) int {
for i, entry := range entries {
if entry.key == key {
return i
}
}
return -1
}

// formatAny is the fallback formatter for a value. It supports a hook (for
// example, for YAML encoding) and itself uses JSON encoding.
func (f Formatter) formatAny(b *bytes.Buffer, v interface{}) {
Expand Down
10 changes: 7 additions & 3 deletions internal/serialize/keyvalues_no_slog.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import (

// KVFormat serializes one key/value pair into the provided buffer.
// A space gets inserted before the pair.
func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) {
func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) string {
// This is the version without slog support. Must be kept in sync with
// the version in keyvalues_slog.go.

Expand All @@ -37,13 +37,15 @@ func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) {
// https://github.com/kubernetes/community/blob/master/contributors/devel/sig-instrumentation/migration-to-structured-logging.md#name-arguments
// for the sake of performance. Keys with spaces,
// special characters, etc. will break parsing.
var key string
if sK, ok := k.(string); ok {
// Avoid one allocation when the key is a string, which
// normally it should be.
b.WriteString(sK)
key = sK
} else {
b.WriteString(fmt.Sprintf("%s", k))
key = fmt.Sprintf("%s", k)
}
b.WriteString(key)

// The type checks are sorted so that more frequently used ones
// come first because that is then faster in the common
Expand Down Expand Up @@ -94,4 +96,6 @@ func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) {
default:
f.formatAny(b, v)
}

return key
}
12 changes: 8 additions & 4 deletions internal/serialize/keyvalues_slog.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ import (
)

// KVFormat serializes one key/value pair into the provided buffer.
// A space gets inserted before the pair.
func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) {
// A space gets inserted before the pair. It returns the key.
func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) string {
// This is the version without slog support. Must be kept in sync with
// the version in keyvalues_slog.go.

Expand All @@ -39,13 +39,15 @@ func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) {
// https://github.com/kubernetes/community/blob/master/contributors/devel/sig-instrumentation/migration-to-structured-logging.md#name-arguments
// for the sake of performance. Keys with spaces,
// special characters, etc. will break parsing.
var key string
if sK, ok := k.(string); ok {
// Avoid one allocation when the key is a string, which
// normally it should be.
b.WriteString(sK)
key = sK
} else {
b.WriteString(fmt.Sprintf("%s", k))
key = fmt.Sprintf("%s", k)
}
b.WriteString(key)

// The type checks are sorted so that more frequently used ones
// come first because that is then faster in the common
Expand Down Expand Up @@ -112,6 +114,8 @@ func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) {
default:
f.formatAny(b, v)
}

return key
}

// generateJSON has the same preference for plain strings as KVFormat.
Expand Down
8 changes: 4 additions & 4 deletions klogr/klogr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,22 +69,22 @@ func testOutput(t *testing.T, format string) {
expectedKlogOutput: `"test" logger="hello.world" akey="avalue"
`,
},
"may print duplicate keys with the same value": {
"de-duplicate keys with the same value": {
klogr: createLogger().V(0),
text: "test",
keysAndValues: []interface{}{"akey", "avalue", "akey", "avalue"},
expectedOutput: `"msg"="test" "akey"="avalue"
`,
expectedKlogOutput: `"test" akey="avalue" akey="avalue"
expectedKlogOutput: `"test" akey="avalue"
`,
},
"may print duplicate keys when the values are passed to Info": {
"de-duplicate keys when the values are passed to Info": {
klogr: createLogger().V(0),
text: "test",
keysAndValues: []interface{}{"akey", "avalue", "akey", "avalue2"},
expectedOutput: `"msg"="test" "akey"="avalue2"
`,
expectedKlogOutput: `"test" akey="avalue" akey="avalue2"
expectedKlogOutput: `"test" akey="avalue2"
`,
},
"should only print the duplicate key that is passed to Info if one was passed to the logger": {
Expand Down
6 changes: 3 additions & 3 deletions ktesting/testinglogger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,13 @@ func TestInfo(t *testing.T) {
"should not print duplicate keys with the same value": {
text: "test",
keysAndValues: []interface{}{"akey", "avalue", "akey", "avalue"},
expectedOutput: `Ixxx test akey="avalue" akey="avalue"
expectedOutput: `Ixxx test akey="avalue"
`,
},
"should only print the last duplicate key when the values are passed to Info": {
"should print no duplicate keys when the values are passed to Info": {
text: "test",
keysAndValues: []interface{}{"akey", "avalue", "akey", "avalue2"},
expectedOutput: `Ixxx test akey="avalue" akey="avalue2"
expectedOutput: `Ixxx test akey="avalue2"
`,
},
"should only print the duplicate key that is passed to Info if one was passed to the logger": {
Expand Down
Loading

0 comments on commit b856257

Please sign in to comment.