Skip to content

Commit

Permalink
gzip: Use sync.pool
Browse files Browse the repository at this point in the history
This commit uses a sync.Pool for botht he gzip writer and reader
so that we reduce the number of allocations and time GC takes as previously
every mutation that required to be gzipped would make a call to newWriter
and allocate a new object. This in turn spent a lot of time and created
extra objects on the heap that were un needed which drove up GC time
  • Loading branch information
Jeremyyang920 committed Nov 26, 2024
1 parent ce1dc45 commit 2eed567
Showing 1 changed file with 24 additions and 3 deletions.
27 changes: 24 additions & 3 deletions internal/staging/stage/gzip.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"bytes"
"compress/gzip"
"io"
"sync"

"github.com/pkg/errors"
)
Expand All @@ -29,14 +30,30 @@ import (
// with some benefit for tables that have very wide rows or values.
const gzipMinSize = 1024

// Create a sync.Pool for gzip.Writer instances
var gzipWriterPool = sync.Pool{
New: func() interface{} {
return gzip.NewWriter(nil)
},
}

var gzipReaderPool = sync.Pool{
New: func() interface{} {
return &gzip.Reader{}
},
}

// maybeGZip compresses the given data if it is larger than gzipMinSize.
func maybeGZip(data []byte) ([]byte, error) {
if len(data) <= gzipMinSize {
return data, nil
}

var buf bytes.Buffer
gzWriter := gzip.NewWriter(&buf)
gzWriter := gzipWriterPool.Get().(*gzip.Writer)
gzWriter.Reset(&buf)
defer gzipWriterPool.Put(gzWriter)

if _, err := gzWriter.Write(data); err != nil {
return nil, errors.WithStack(err)
}
Expand All @@ -55,9 +72,13 @@ func maybeGunzip(data []byte) ([]byte, error) {
if len(data) < 2 || data[0] != 0x1f || data[1] != 0x8b {
return data, nil
}
r, err := gzip.NewReader(bytes.NewReader(data))

gzReader := gzipReaderPool.Get().(*gzip.Reader)
defer gzipReaderPool.Put(gzReader)
err := gzReader.Reset(bytes.NewReader(data))
if err != nil {
return nil, errors.WithStack(err)
}
return io.ReadAll(r)
defer gzReader.Close()
return io.ReadAll(gzReader)
}

0 comments on commit 2eed567

Please sign in to comment.