diff --git a/README.md b/README.md index e7ac553..0a03141 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# Amazon kinesis producer [![Build status][travis-image]][travis-url] [![License][license-image]][license-url] [![GoDoc][godoc-img]][godoc-url] -> A KPL-like batch producer for Amazon Kinesis built on top of the official Go AWS SDK -and using the same aggregation format that [KPL][kpl-url] use. +# Amazon kinesis producer [![License][license-image]][license-url] [![GoDoc][godoc-img]][godoc-url] +> A KPL-like batch producer for Amazon Kinesis built on top of the official Go AWS SDK +and using the same aggregation format that [KPL][kpl-url] use. ### Useful links - [Documentation][godoc-url] @@ -16,7 +16,7 @@ import ( "time" "github.com/sirupsen/logrus" - "github.com/a8m/kinesis-producer" + "github.com/fhaze/kinesis-producer" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/kinesis" @@ -32,10 +32,11 @@ func main() { pr.Start() + failures := pr.NotifyFailures() + // Handle failures go func() { - for r := range pr.NotifyFailures() { - // r contains `Data`, `PartitionKey` and `Error()` + for r := range failures { log.Error(r) } }() @@ -54,10 +55,158 @@ func main() { } ``` -#### Specifying logger implementation +### Shard Mapping + +The `Producer` supports aggregation based on a shard map. UserRecords get mapped to a shard using the md5 hash of the Partition Key or a provided Explicit Hash Key. Records mapped to the same shard are aggregated together. + +By default, shard mapping is disabled. To use the shard mapping feature, you need to set `Config.GetShards`. This function will be called on producer initialization to populate the shard map. You can optionally provide a refresh interval `Config.ShardRefreshInterval` to update the map. Note that Puts to the Producer are blocked while it is updating the shard map so that it can reaggregate requests based on the new map. It is only blocking during the reaggregation phase. + +This package provides a GetShards function `GetKinesisShardsFunc` that uses an AWS client to call the `ListShards` API to get the shard list. + +**Note** At the time of writing, using the shard map feature adds significant overhead. Depending on the configuration and your record set, this can be more than 2x slower. Providing an explicit hash key for user records can help reduce this by quite a bit. Take a look at the benchmarks in `producer_test.go` for examples. + +#### Example +```go +package main + +import ( + "time" + + "github.com/fhaze/kinesis-producer" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/kinesis" + "github.com/google/uuid" +) + +func main() { + client := kinesis.New(session.New(aws.NewConfig())) + pr := producer.New(&producer.Config{ + StreamName: "test", + BacklogCount: 2000, + Client: client, + GetShards: producer.GetKinesisShardsFunc(client, "test"), + ShardRefreshInterval: 5 * time.Second, + }) + + pr.Start() + + failures := pr.NotifyFailures() + + // Handle failures + go func() { + for r := range failures { + log.Error(r) + } + }() + + go func() { + for i := 0; i < 1000; i++ { + pk := uuid.New().String() + for j := 0; j < 5; j++ { + err := pr.Put([]byte("foo"), pk) + if err != nil { + log.WithError(err).Fatal("error producing") + } + } + } + }() + + time.Sleep(3 * time.Second) + pr.Stop() +} + +``` + +### UserRecord interface + +You can optionally define a custom struct that implements the `UserRecord` interface and put using `Producer.PutUserRecord`. The producer will hold onto the reference in case of any failures. Do not attempt to modify or use the reference after passing it to the producer until you receive it back in a failure record, otherwise thread issues may occur. + +#### Example +```go +package main + +import ( + "encoding/json" + "math/big" + "time" + + "github.com/fhaze/kinesis-producer" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/kinesis" + "github.com/google/uuid" +) + +type myExampleUserRecord struct { + Id string `json:"id"` + Key string `json:"key"` + Val string `json:"val"` + data []byte `json:"-"` +} + +func (r *myExampleUserRecord) PartitionKey() string { return r.id } +func (r *myExampleUserRecord) ExplicitHashKey() *big.Int { return nil } +func (r *myExampleUserRecord) Data() []byte { return r.data } +func (r *myExampleUserRecord) Size() int { return len(r.data) } + +func newMyExampleUserRecord(key, val string) (*myExampleUserRecord, error) { + r := &myExampleUserRecord{ + Id: uuid.New().String(), + Key: key, + Val: val, + } + data, err := json.Marshal(r) + if err != nil { + return nil, err + } + r.data = data + return r, nil +} + +func main() { + client := kinesis.New(session.New(aws.NewConfig())) + pr := producer.New(&producer.Config{ + StreamName: "test", + BacklogCount: 2000, + Client: client, + GetShards: producer.GetKinesisShardsFunc(client, "test"), + ShardRefreshInterval: 5 * time.Second, + }) + + pr.Start() + + failures := pr.NotifyFailures() + + // Handle failures + go func() { + for r := range failures { + log.Error(r) + } + }() + + go func() { + for i := 0; i < 5000; i++ { + record, err := newMyExampleUserRecord("foo", "bar") + if err != nil { + log.WithError(err).Fatal("error creating user record") + } + err = pr.PutUserRecord(record) + if err != nil { + log.WithError(err).Fatal("error producing") + } + } + }() + + time.Sleep(3 * time.Second) + pr.Stop() +} +``` + +### Specifying logger implementation `producer.Config` takes an optional `logging.Logger` implementation. -##### Using a custom logger +#### Using a custom logger ```go customLogger := &CustomLogger{} @@ -74,8 +223,8 @@ customLogger := &CustomLogger{} ```go import ( "github.com/sirupsen/logrus" - producer "github.com/a8m/kinesis-producer" - "github.com/a8m/kinesis-producer/loggers" + producer "github.com/fhaze/kinesis-producer" + "github.com/fhaze/kinesis-producer/loggers" ) log := logrus.New() @@ -97,14 +246,12 @@ kinesis-producer ships with three logger implementations. ### License MIT -[godoc-url]: https://godoc.org/github.com/a8m/kinesis-producer +[godoc-url]: https://godoc.org/github.com/fhaze/kinesis-producer [godoc-img]: https://img.shields.io/badge/godoc-reference-blue.svg?style=flat-square [kpl-url]: https://github.com/awslabs/amazon-kinesis-producer [de-aggregation]: http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-kpl-consumer-deaggregation.html [kpl-aggregation]: http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-producer-adv-aggregation.html -[aggregation-format-url]: https://github.com/a8m/kinesis-producer/blob/master/aggregation-format.md +[aggregation-format-url]: https://github.com/fhaze/kinesis-producer/blob/master/aggregation-format.md [license-image]: https://img.shields.io/badge/license-MIT-blue.svg?style=flat-square [license-url]: LICENSE -[travis-image]: https://img.shields.io/travis/a8m/kinesis-producer.svg?style=flat-square -[travis-url]: https://travis-ci.org/a8m/kinesis-producer diff --git a/aggregator.go b/aggregator.go index ea66394..00e25ef 100644 --- a/aggregator.go +++ b/aggregator.go @@ -1,10 +1,11 @@ package producer import ( - "bytes" "crypto/md5" + "sync" k "github.com/aws/aws-sdk-go/service/kinesis" + "github.com/fhaze/kinesis-producer/pb" "github.com/golang/protobuf/proto" ) @@ -12,10 +13,42 @@ var ( magicNumber = []byte{0xF3, 0x89, 0x9A, 0xC2} ) +// Contains the AWS Kinesis PutRecordsRequestEntry and UserRecords that are aggregated into +// the request. UserRecords are provided for more control over failure notifications +type AggregatedRecordRequest struct { + Entry *k.PutRecordsRequestEntry + UserRecords []UserRecord +} + +func NewAggregatedRecordRequest(data []byte, partitionKey, explicitHashKey *string, userRecords []UserRecord) *AggregatedRecordRequest { + return &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + Data: data, + PartitionKey: partitionKey, + ExplicitHashKey: explicitHashKey, + }, + UserRecords: userRecords, + } +} + type Aggregator struct { - buf []*Record - pkeys []string - nbytes int + // Aggregator holds onto its own RWMutex, but the caller of Aggregator methods is expected + // to call Lock/Unlock + sync.RWMutex + // explicitHashKey will be used for aggregated PutRecordsRequestEntry + explicitHashKey *string + buf []UserRecord + pkeys []string + pkeysIndex map[string]int + nbytes int +} + +// NewAggregator initializes a new Aggregator with the given partitionKey +func NewAggregator(explicitHashKey *string) *Aggregator { + a := new(Aggregator) + a.explicitHashKey = explicitHashKey + a.pkeysIndex = make(map[string]int) + return a } // Size return how many bytes stored in the aggregator. @@ -30,72 +63,177 @@ func (a *Aggregator) Count() int { } // Put record using `data` and `partitionKey`. This method is thread-safe. -func (a *Aggregator) Put(data []byte, partitionKey string) { - a.pkeys = append(a.pkeys, partitionKey) - a.nbytes += len([]byte(partitionKey)) - keyIndex := uint64(len(a.pkeys) - 1) - - a.nbytes++ // protobuf message index and wire type - a.nbytes += partitionKeyIndexSize - a.buf = append(a.buf, &Record{ - Data: data, - PartitionKeyIndex: &keyIndex, - }) - a.nbytes += len(data) +func (a *Aggregator) Put(userRecord UserRecord) { + nbytes, addPartitionKey := a.userRecordNBytes(userRecord) + // The protobuf message allows more efficient partition and explicit hash key packing + // by allowing multiple records to point to the same key in a table. + if addPartitionKey { + partitionKey := userRecord.PartitionKey() + // nbytes already includes the length of the partition key + a.pkeys = append(a.pkeys, partitionKey) + a.pkeysIndex[partitionKey] = len(a.pkeys) - 1 + } + + a.buf = append(a.buf, userRecord) + a.nbytes += nbytes } // Drain create an aggregated `kinesis.PutRecordsRequestEntry` // that compatible with the KCL's deaggregation logic. // // If you interested to know more about it. see: aggregation-format.md -func (a *Aggregator) Drain() (*k.PutRecordsRequestEntry, error) { +func (a *Aggregator) Drain() (*AggregatedRecordRequest, error) { if a.nbytes == 0 { return nil, nil } - data, err := proto.Marshal(&AggregatedRecord{ + + data, err := proto.Marshal(&pb.AggregatedRecord{ PartitionKeyTable: a.pkeys, - Records: a.buf, + Records: a.aggregateUserRecords(), }) if err != nil { - return nil, err + drainErr := &DrainError{ + Err: err, + UserRecords: a.buf, + } + // Q: Should we clear the aggregator on drain error? Otherwise I would expect Marshal + // to fail indefinitely until the buffer is cleared + a.clear() + return nil, drainErr } + h := md5.New() h.Write(data) checkSum := h.Sum(nil) aggData := append(magicNumber, data...) aggData = append(aggData, checkSum...) - entry := &k.PutRecordsRequestEntry{ - Data: aggData, - PartitionKey: &a.pkeys[0], - } + + request := NewAggregatedRecordRequest(aggData, &a.pkeys[0], a.explicitHashKey, a.buf) a.clear() - return entry, nil + return request, nil +} + +// WillOverflow checks if the aggregator will exceed max record size by attempting to Put +// the user record. If true, the aggregator should be drained before attempting a Put. +func (a *Aggregator) WillOverflow(userRecord UserRecord) bool { + if a.nbytes == 0 { + return false + } + + newbytes, _ := a.userRecordNBytes(userRecord) + + size := len(magicNumber) + size += a.nbytes + size += newbytes + size += md5.Size + // need to also add length of partition key that will be sent in the + // kinesis.PutRecordsRequestEntry + size += len(a.pkeys[0]) + + return size > maxRecordSize +} + +// userRecordNBytes calculates the number of bytes that will be added when adding the +// user record to the aggregator. It also returns a bool indicating if the size of the +// partition key is included in the results. +func (a *Aggregator) userRecordNBytes(userRecord UserRecord) (int, bool) { + var ( + nbytes int + partitionKeyIndex int + includesPkSize bool + ) + + partitionKey := userRecord.PartitionKey() + if index, ok := a.pkeysIndex[partitionKey]; ok { + partitionKeyIndex = index + } else { + // partition key was not found, so we must add the additional size of adding + // the repeated field to the AggregatedRecord for the new key + nbytes += calculateStringFieldSize(partitionKey) + includesPkSize = true + partitionKeyIndex = len(a.pkeys) + } + + nbytes += calculateRecordFieldSize(partitionKeyIndex, userRecord.Data()) + + return nbytes, includesPkSize +} + +func (a *Aggregator) aggregateUserRecords() []*pb.Record { + count := len(a.buf) + records := make([]*pb.Record, count) + for i := 0; i < count; i++ { + userRecord := a.buf[i] + keyIndex := uint64(a.pkeysIndex[userRecord.PartitionKey()]) + records[i] = &pb.Record{ + Data: userRecord.Data(), + PartitionKeyIndex: &keyIndex, + } + } + return records } func (a *Aggregator) clear() { - a.buf = make([]*Record, 0) + a.buf = make([]UserRecord, 0) a.pkeys = make([]string, 0) + a.pkeysIndex = make(map[string]int, 0) a.nbytes = 0 } -// Test if a given entry is aggregated record. -func isAggregated(entry *k.PutRecordsRequestEntry) bool { - return bytes.HasPrefix(entry.Data, magicNumber) +func calculateRecordFieldSize(keyIndex int, data []byte) (size int) { + recordBytes := calculateUint64FieldSize(uint64(keyIndex)) + recordBytes += calculateBytesFieldSize(data) + + // protobuf message index and wire type for Record + size += 1 + size += calculateVarIntSize(uint64(recordBytes)) + size += recordBytes + return } -func extractRecords(entry *k.PutRecordsRequestEntry) (out []*k.PutRecordsRequestEntry) { - src := entry.Data[len(magicNumber) : len(entry.Data)-md5.Size] - dest := new(AggregatedRecord) - err := proto.Unmarshal(src, dest) - if err != nil { +func calculateStringFieldSize(val string) (size int) { + strLen := len(val) + // protobuf message index and wire type + size += 1 + size += calculateVarIntSize(uint64(strLen)) + size += strLen + return +} + +func calculateBytesFieldSize(val []byte) (size int) { + dataLen := len(val) + // protobuf message index and wire type + size += 1 + size += calculateVarIntSize(uint64(dataLen)) + size += dataLen + return +} + +func calculateUint64FieldSize(val uint64) (size int) { + // protobuf message index and wire type + size += 1 + size += calculateVarIntSize(val) + return +} + +func calculateVarIntSize(val uint64) (size int) { + if val == 0 { + size = 1 return } - for i := range dest.Records { - r := dest.Records[i] - out = append(out, &k.PutRecordsRequestEntry{ - Data: r.GetData(), - PartitionKey: &dest.PartitionKeyTable[r.GetPartitionKeyIndex()], - }) + + var bitsNeeded int + + for val > 0 { + bitsNeeded++ + val = val >> 1 + } + + // varints use 7 bits of the byte for the value + // see https://developers.google.com/protocol-buffers/docs/encoding + size = bitsNeeded / 7 + if bitsNeeded%7 > 0 { + size++ } return } diff --git a/aggregator_test.go b/aggregator_test.go index 0001194..334f231 100644 --- a/aggregator_test.go +++ b/aggregator_test.go @@ -1,64 +1,146 @@ package producer import ( - "math/rand" + "fmt" "strconv" - "sync" "testing" -) -func assert(t *testing.T, val bool, msg string) { - if !val { - t.Error(msg) - } -} + k "github.com/aws/aws-sdk-go/service/kinesis" + "github.com/fhaze/kinesis-producer/deaggregation" + "github.com/stretchr/testify/require" +) func TestSizeAndCount(t *testing.T) { - a := new(Aggregator) - assert(t, a.Size()+a.Count() == 0, "size and count should equal to 0 at the beginning") - data := []byte("hello") - pkey := "world" - n := rand.Intn(100) - for i := 0; i < n; i++ { - a.Put(data, pkey) + a := NewAggregator(nil) + require.Equal(t, 0, a.Size()+a.Count(), "size and count should equal to 0 at the beginning") + + var ( + keyCount = 100 + recordsPerKey = 10 + + keySize = 32 + // message wire/index type + varint of keysize + keysize + keySizeProto = 1 + 1 + keySize + + keyIndexSizeProto = 1 + 1 + dataSize = 512 + // message wire/index type + varint of datasize + datasize + dataSizeProto = 1 + 2 + dataSize + + recordSizeProto = 1 + 2 + keyIndexSizeProto + dataSizeProto + + expectedCount = keyCount * recordsPerKey + expectedSize = (keySizeProto * keyCount) + (recordSizeProto * expectedCount) + ) + + for k := 0; k < keyCount; k++ { + key := fmt.Sprintf("%0[2]*[1]d", k, keySize) + for i := 0; i < recordsPerKey; i++ { + a.Put(NewDataRecord(make([]byte, dataSize), key)) + } } - assert(t, a.Size() == n+5*n+5*n+8*n, "size should equal to size of data, partition-keys, partition key indexes, and protobuf wire type") - assert(t, a.Count() == n, "count should be equal to the number of Put calls") + + require.Equal(t, expectedCount, a.Count(), "count should be equal to the number of Put calls") + require.Equal(t, expectedSize, a.Size(), "size should equal to size of data, partition-keys, partition key indexes, and protobuf wire type") } func TestAggregation(t *testing.T) { - var wg sync.WaitGroup - a := new(Aggregator) - n := 50 - wg.Add(n) - for i := 0; i < n; i++ { - c := strconv.Itoa(i) - data := []byte("hello-" + c) - a.Put(data, c) - wg.Done() + testCases := []struct { + name string + userRecordCount int + explicitHashKey string + }{ + { + name: "Drain empty aggregator causes no error", + userRecordCount: 0, + }, + { + name: "Aggregates user records", + userRecordCount: 50, + }, + { + name: "Aggregates user records with explicitHashKey", + userRecordCount: 50, + explicitHashKey: "123", + }, } - wg.Wait() - record, err := a.Drain() + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var explicitHashKey *string = nil + if tc.explicitHashKey != "" { + explicitHashKey = &tc.explicitHashKey + } + a := NewAggregator(explicitHashKey) + + userRecords := make([]UserRecord, tc.userRecordCount) + for i := 0; i < tc.userRecordCount; i++ { + pk := strconv.Itoa(i) + data := []byte("hello-" + pk) + ur := NewDataRecord(data, pk) + userRecords[i] = ur + a.Put(ur) + } + + record, err := a.Drain() + require.NoError(t, err) + if tc.userRecordCount == 0 { + require.Nil(t, record) + return + } + + require.Equal(t, 0, a.Size()+a.Count(), "size and count should be cleared on drain") + require.True(t, deaggregation.IsAggregatedRecord(record.Entry.Data), "should return an agregated record") + require.Equal(t, "0", *record.Entry.PartitionKey, "Entry should user first PartitionKey") + if explicitHashKey == nil { + require.Nil(t, record.Entry.ExplicitHashKey) + } else { + require.NotNil(t, record.Entry.ExplicitHashKey) + require.Equal(t, *explicitHashKey, *record.Entry.ExplicitHashKey, "Entry should contain ExplicitHashKey") + } + records := extractRecords(record.Entry) + require.Equal(t, tc.userRecordCount, len(records), "AggregatedRecord count does not match") + + for i := 0; i < tc.userRecordCount; i++ { + var ( + expectedPartitionKey = strconv.Itoa(i) + expectedData = fmt.Sprintf("hello-%d", i) + rdata = string(records[i].Data) + urdata = string(userRecords[i].Data()) + rpartitionKey = *records[i].PartitionKey + urpartitionKey = userRecords[i].PartitionKey() + ) + require.Equal(t, expectedData, rdata, "`Data` field contains invalid value") + require.Equal(t, urdata, rdata, "Record data does not match UserRecord data") + require.Equal(t, expectedPartitionKey, rpartitionKey, "`PartitionKey` field contains invalid value") + require.Equal(t, urpartitionKey, rpartitionKey, "Record partition key does not match UserRecord partition key") + } + }) + } +} + +func extractRecords(entry *k.PutRecordsRequestEntry) (out []*k.PutRecordsRequestEntry) { + dest, err := deaggregation.Unmarshal(entry.Data) if err != nil { - t.Error(err) + return } - assert(t, isAggregated(record), "should return an agregated record") - records := extractRecords(record) - for i := 0; i < n; i++ { - c := strconv.Itoa(i) - found := false - for _, record := range records { - if string(record.Data) == "hello-"+c { - assert(t, string(record.Data) == "hello-"+c, "`Data` field contains invalid value") - found = true - } - } - assert(t, found, "record not found after extracting: "+c) + for i := range dest.Records { + r := dest.Records[i] + out = append(out, &k.PutRecordsRequestEntry{ + Data: r.GetData(), + PartitionKey: &dest.PartitionKeyTable[r.GetPartitionKeyIndex()], + }) } + return } -func TestDrainEmptyAggregator(t *testing.T) { - a := new(Aggregator) - _, err := a.Drain() - assert(t, err == nil, "should not return an error") +func TestAggregatorWillOverflow(t *testing.T) { + a := NewAggregator(nil) + + record := NewDataRecord(mockData("", maxRecordSize/2), "foo") + require.False(t, a.WillOverflow(record)) + + a.Put(record) + record = NewDataRecord(mockData("", maxRecordSize/2), "foo") + require.True(t, a.WillOverflow(record)) } diff --git a/config.go b/config.go index d38604c..6b55c42 100644 --- a/config.go +++ b/config.go @@ -27,6 +27,15 @@ type Putter interface { PutRecords(*k.PutRecordsInput) (*k.PutRecordsOutput, error) } +// GetShardsFunc is called to populate the shard map on initialization and during refresh +// shard interval. GetShardsFunc will be called with the current shard list. During +// initialization, this will be nil. GetShardsFunc should return a shard list, a bool +// indicating if the shards should be updated and an error. If false bool or error is +// returned, shards will not be updated. +type GetShardsFunc func(old []*k.Shard) ([]*k.Shard, bool, error) + +func defaultGetShardsFunc(old []*k.Shard) ([]*k.Shard, bool, error) { return nil, false, nil } + // Config is the Producer configuration. type Config struct { // StreamName is the Kinesis stream. @@ -35,6 +44,17 @@ type Config struct { // FlushInterval is a regular interval for flushing the buffer. Defaults to 5s. FlushInterval time.Duration + // ShardRefreshInterval is a regular interval for refreshing the ShardMap. + // Config.GetShards will be called at this interval. A value of 0 means no refresh + // occurs. Default is 0 + ShardRefreshInterval time.Duration + + // GetShards is called on NewProducer to initialze the ShardMap. + // If ShardRefreshInterval is non-zero, GetShards will be called at that interval. + // The default function returns a nil list of shards, which results in all records being + // aggregated to a single record. + GetShards GetShardsFunc + // BatchCount determine the maximum number of items to pack in batch. // Must not exceed length. Defaults to 500. BatchCount int @@ -54,6 +74,8 @@ type Config struct { BacklogCount int // Number of requests to sent concurrently. Default to 24. + // If you are using the ListShards API in your GetShards function, those connections + // will not be counted in MaxConnections. MaxConnections int // Logger is the logger used. Default to producer.Logger. @@ -89,7 +111,7 @@ func (c *Config) defaults() { if c.AggregateBatchSize == 0 { c.AggregateBatchSize = defaultAggregationSize } - falseOrPanic(c.AggregateBatchSize > maxAggregationSize, "kinesis: AggregateBatchSize exceeds 50KB") + falseOrPanic(c.AggregateBatchSize > maxAggregationSize, "kinesis: AggregateBatchSize exceeds 1MiB") if c.MaxConnections == 0 { c.MaxConnections = defaultMaxConnections } @@ -98,6 +120,9 @@ func (c *Config) defaults() { c.FlushInterval = defaultFlushInterval } falseOrPanic(len(c.StreamName) == 0, "kinesis: StreamName length must be at least 1") + if c.GetShards == nil { + c.GetShards = defaultGetShardsFunc + } } func falseOrPanic(p bool, msg string) { diff --git a/deaggregation/deaggregation.go b/deaggregation/deaggregation.go new file mode 100644 index 0000000..3f7903d --- /dev/null +++ b/deaggregation/deaggregation.go @@ -0,0 +1,65 @@ +// deaggregation package from +// https://github.com/kimutansk/go-kinesis-deaggregation/blob/9d28647d1ff4d296bdd7c12c0cad272c9303d2fc/deaggregator.go +package deaggregation + +import ( + "bytes" + "crypto/md5" + + "github.com/fhaze/kinesis-producer/pb" + "google.golang.org/protobuf/proto" +) + +var magicNumber = []byte{0xF3, 0x89, 0x9A, 0xC2} + +// IsAggregatedRecord judges whether input message is Kinesis Aggregated Record or not. +func IsAggregatedRecord(target []byte) bool { + length := int32(len(target)) + if length < md5.Size { + return false + } + + if !bytes.Equal(magicNumber, target[0:len(magicNumber)]) { + return false + } + + md5Hash := md5.New() + md5Hash.Write(target[len(magicNumber) : length-md5.Size]) + checkSum := md5Hash.Sum(nil) + + if !bytes.Equal(target[length-md5.Size:length], checkSum) { + return false + } + + return true +} + +// ExtractRecordDatas extracts Record.Data slice from Kinesis Aggregated Record. +func ExtractRecordDatas(target []byte) ([][]byte, error) { + length := int32(len(target)) + aggregated := &pb.AggregatedRecord{} + + if err := proto.Unmarshal(target[len(magicNumber):length-md5.Size], aggregated); err != nil { + return nil, err + } + + records := aggregated.GetRecords() + recordDatas := [][]byte{} + for index := 0; index < len(records); index++ { + recordDatas = append(recordDatas, records[index].GetData()) + } + + return recordDatas, nil +} + +// Unmarshal extracts AggregatedRecord from Kinesis Aggregated Record. +func Unmarshal(target []byte) (*pb.AggregatedRecord, error) { + length := int32(len(target)) + aggregated := &pb.AggregatedRecord{} + + if err := proto.Unmarshal(target[len(magicNumber):length-md5.Size], aggregated); err != nil { + return nil, err + } + + return aggregated, nil +} diff --git a/deaggregation/deaggregation_test.go b/deaggregation/deaggregation_test.go new file mode 100644 index 0000000..2f3b4c5 --- /dev/null +++ b/deaggregation/deaggregation_test.go @@ -0,0 +1,276 @@ +package deaggregation + +import ( + "crypto/md5" + "fmt" + "reflect" + "testing" + + "github.com/fhaze/kinesis-producer/pb" + "google.golang.org/protobuf/proto" +) + +// IsAggregatedRecord judges "NotAggregatedRecord" is not AggregatedRecord. +func Test_IsAggregatedRecord_JudgeNonAggregatedRecord(t *testing.T) { + target := "NotAggregatedRecord" + targetByteArray := []byte(target) + + actual := IsAggregatedRecord(targetByteArray) + if actual != false { + t.Errorf("IsAggregatedRecord(\"%v\") want %v but %v.", target, false, actual) + } +} + +// IsAggregatedRecord judges MinimumAggregatedRecord is AggregatedRecord. +func Test_IsAggregatedRecord_JudgeMinimumAggregatedRecord(t *testing.T) { + targetRecord := createMinimumAggregateRecordMarshaledBytes() + + md5Hash := md5.New() + md5Hash.Write(targetRecord) + checkSum := md5Hash.Sum(nil) + target := append(magicNumber, targetRecord...) + target = append(target, checkSum...) + + actual := IsAggregatedRecord(target) + if actual != true { + t.Errorf("IsAggregatedRecord(MinimumAggregatedRecord) want %v but %v.", true, actual) + } +} + +// IsAggregatedRecord judges FullAggregatedRecord is AggregatedRecord. +func Test_IsAggregatedRecord_JudgeFullAggregatedRecord(t *testing.T) { + targetRecord := createFullAggregateRecordMarshaledBytes() + + md5Hash := md5.New() + md5Hash.Write(targetRecord) + checkSum := md5Hash.Sum(nil) + targetBytes := append(magicNumber, targetRecord...) + targetBytes = append(targetBytes, checkSum...) + + actual := IsAggregatedRecord(targetBytes) + if actual != true { + t.Errorf("IsAggregatedRecord(FullAggregatedRecord) want %v but %v.", true, actual) + } +} + +func BenchmarkIsAggregatedNon_Short(b *testing.B) { + target := "NotAggregatedRecord" + targetByteArray := []byte(target) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + IsAggregatedRecord(targetByteArray) + } +} + +func BenchmarkIsAggregatedNon_Long(b *testing.B) { + targetByteArray := []byte("some data") + for i := 0; i < 1000; i++ { + targetByteArray = append(targetByteArray, []byte(", more data")...) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + IsAggregatedRecord(targetByteArray) + } +} + +func BenchmarkIsAggregatedMinimum(b *testing.B) { + targetRecord := createMinimumAggregateRecordMarshaledBytes() + md5Hash := md5.New() + md5Hash.Write(targetRecord) + checkSum := md5Hash.Sum(nil) + targetBytes := append(magicNumber, targetRecord...) + targetBytes = append(targetBytes, checkSum...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + IsAggregatedRecord(targetBytes) + } +} + +func BenchmarkIsAggregatedFull(b *testing.B) { + targetRecord := createFullAggregateRecordMarshaledBytes() + md5Hash := md5.New() + md5Hash.Write(targetRecord) + checkSum := md5Hash.Sum(nil) + targetBytes := append(magicNumber, targetRecord...) + targetBytes = append(targetBytes, checkSum...) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + IsAggregatedRecord(targetBytes) + } +} + +func Test_ExtractRecordDatas_MinimumAggregatedRecord(t *testing.T) { + targetRecord := createMinimumAggregateRecordMarshaledBytes() + + md5Hash := md5.New() + md5Hash.Write(targetRecord) + checkSum := md5Hash.Sum(nil) + targetBytes := append(magicNumber, targetRecord...) + targetBytes = append(targetBytes, checkSum...) + + expected := [][]byte{[]byte("MinimumAggregatedRecord")} + actual, _ := ExtractRecordDatas(targetBytes) + + if !reflect.DeepEqual(expected, actual) { + t.Errorf("ExtractRecordDatas(MinimumAggregatedRecord) want %v but %v.", expected, actual) + } +} + +func Test_ExtractRecordDatas_FullAggregatedRecord(t *testing.T) { + targetRecord := createFullAggregateRecordMarshaledBytes() + + md5Hash := md5.New() + md5Hash.Write(targetRecord) + checkSum := md5Hash.Sum(nil) + targetBytes := append(magicNumber, targetRecord...) + targetBytes = append(targetBytes, checkSum...) + + expected := [][]byte{[]byte("FullAggregatedRecord1"), []byte("FullAggregatedRecord2")} + actual, _ := ExtractRecordDatas(targetBytes) + + if !reflect.DeepEqual(expected, actual) { + t.Errorf("ExtractRecordDatas(FullAggregatedRecord) want %v but %v.", expected, actual) + } +} + +func Test_Unmarshal_MinimumAggregatedRecord(t *testing.T) { + targetRecord := createMinimumAggregateRecordMarshaledBytes() + + md5Hash := md5.New() + md5Hash.Write(targetRecord) + checkSum := md5Hash.Sum(nil) + targetBytes := append(magicNumber, targetRecord...) + targetBytes = append(targetBytes, checkSum...) + + actual, _ := Unmarshal(targetBytes) + + if len(actual.GetExplicitHashKeyTable()) != 0 { + t.Errorf("Unmarshal(MinimumAggregatedRecord)'s ExplicitHashKeyTable want %v results but %v results.", 0, actual.GetExplicitHashKeyTable()) + } + if len(actual.GetPartitionKeyTable()) != 0 { + t.Errorf("Unmarshal(MinimumAggregatedRecord)'s PartitionKeyTable want %v results but %v results.", 0, actual.GetPartitionKeyTable()) + } + if len(actual.GetRecords()) != 1 { + t.Errorf("Unmarshal(MinimumAggregatedRecord)'s Records want %v records but %v records.", 1, len(actual.GetRecords())) + } + + actualRecord := actual.GetRecords()[0] + if actualRecord.GetPartitionKeyIndex() != uint64(1111) { + t.Errorf("PartitionKeyIndex want %v but %v.", uint64(1111), actualRecord.GetPartitionKeyIndex()) + } + if actualRecord.GetExplicitHashKeyIndex() != uint64(0) { + t.Errorf("ExplicitHashKeyIndex want %v but %v.", uint64(0), actualRecord.GetExplicitHashKeyIndex()) + } + if len(actualRecord.GetTags()) != 0 { + t.Errorf("Tags want %v tags but %v tags.", 0, len(actualRecord.GetTags())) + } + expected := "MinimumAggregatedRecord" + if string(actualRecord.GetData()) != expected { + t.Errorf("Data want %v but %v.", expected, string(actualRecord.GetData())) + } +} + +func Test_Unmarshal_FullAggregatedRecord(t *testing.T) { + targetRecord := createFullAggregateRecordMarshaledBytes() + + md5Hash := md5.New() + md5Hash.Write(targetRecord) + checkSum := md5Hash.Sum(nil) + targetBytes := append(magicNumber, targetRecord...) + targetBytes = append(targetBytes, checkSum...) + + actual, _ := Unmarshal(targetBytes) + + expectedPartitionKeyTable := []string{"Table1", "Table2"} + if !reflect.DeepEqual(expectedPartitionKeyTable, actual.GetPartitionKeyTable()) { + t.Errorf("Unmarshal(FullAggregatedRecord)'s PartitionKeyTable want %v but %v.", expectedPartitionKeyTable, actual.GetPartitionKeyTable()) + } + expectedExplicitHashKeyTable := []string{"KeyTable1", "KeyTable2"} + if !reflect.DeepEqual(expectedExplicitHashKeyTable, actual.GetExplicitHashKeyTable()) { + t.Errorf("Unmarshal(FullAggregatedRecord)'s ExplicitHashKeyTable want %v but %v.", expectedExplicitHashKeyTable, actual.GetExplicitHashKeyTable()) + } + if len(actual.GetRecords()) != 2 { + t.Errorf("Unmarshal(FullAggregatedRecord)'s Records want %v records but %v records.", 2, len(actual.GetRecords())) + } + + actualRecord1 := actual.GetRecords()[0] + actualRecord2 := actual.GetRecords()[1] + if actualRecord1.GetPartitionKeyIndex() != uint64(1111) || actualRecord1.GetExplicitHashKeyIndex() != uint64(11111111) || string(actualRecord1.GetData()) != "FullAggregatedRecord1" || len(actualRecord1.GetTags()) != 2 { + t.Error("Record1 check failed.") + } + if actualRecord2.GetPartitionKeyIndex() != uint64(2222) || actualRecord2.GetExplicitHashKeyIndex() != uint64(22222222) || string(actualRecord2.GetData()) != "FullAggregatedRecord2" || len(actualRecord2.GetTags()) != 2 { + t.Error("Record2 check failed.") + } + + actualRecord1Tags := actualRecord1.GetTags() + actualRecord2Tags := actualRecord2.GetTags() + if actualRecord1Tags[0].GetKey() != "TagKey1" || actualRecord1Tags[0].GetValue() != "TagValue1" || actualRecord1Tags[1].GetKey() != "TagKey2" || actualRecord1Tags[1].GetValue() != "TagValue2" { + t.Error("Record1Tag check failed.") + } + if actualRecord2Tags[0].GetKey() != "TagKey2" || actualRecord2Tags[0].GetValue() != "TagValue2" || actualRecord2Tags[1].GetKey() != "TagKey1" || actualRecord2Tags[1].GetValue() != "TagValue1" { + t.Error("Record2Tag check failed.") + } +} + +func createMinimumAggregateRecordMarshaledBytes() []byte { + targetRecord := &pb.Record{ + PartitionKeyIndex: proto.Uint64(1111), + Data: []byte("MinimumAggregatedRecord"), + } + + targetAggregatedRecord := &pb.AggregatedRecord{ + Records: []*pb.Record{targetRecord}, + } + + targetBytes, err := proto.Marshal(targetAggregatedRecord) + if err != nil { + fmt.Print("unmarshaling error: ", err) + panic(err) + } + + return targetBytes +} + +func createFullAggregateRecordMarshaledBytes() []byte { + tag1 := &pb.Tag{ + Key: proto.String("TagKey1"), + Value: proto.String("TagValue1"), + } + + tag2 := &pb.Tag{ + Key: proto.String("TagKey2"), + Value: proto.String("TagValue2"), + } + + targetRecord1 := &pb.Record{ + PartitionKeyIndex: proto.Uint64(1111), + ExplicitHashKeyIndex: proto.Uint64(11111111), + Data: []byte("FullAggregatedRecord1"), + Tags: []*pb.Tag{tag1, tag2}, + } + + targetRecord2 := &pb.Record{ + PartitionKeyIndex: proto.Uint64(2222), + ExplicitHashKeyIndex: proto.Uint64(22222222), + Data: []byte("FullAggregatedRecord2"), + Tags: []*pb.Tag{tag2, tag1}, + } + + targetAggregatedRecord := &pb.AggregatedRecord{ + PartitionKeyTable: []string{"Table1", "Table2"}, + ExplicitHashKeyTable: []string{"KeyTable1", "KeyTable2"}, + Records: []*pb.Record{targetRecord1, targetRecord2}, + } + + targetBytes, err := proto.Marshal(targetAggregatedRecord) + if err != nil { + fmt.Print("unmarshaling error: ", err) + panic(err) + } + + return targetBytes +} diff --git a/errors.go b/errors.go new file mode 100644 index 0000000..bb1e098 --- /dev/null +++ b/errors.go @@ -0,0 +1,74 @@ +package producer + +import ( + "fmt" +) + +type ErrStoppedProducer struct { + UserRecord +} + +func (e *ErrStoppedProducer) Error() string { + return "Unable to Put record. Producer is already stopped" +} + +type ErrIllegalPartitionKey struct { + UserRecord +} + +func (e *ErrIllegalPartitionKey) Error() string { + return fmt.Sprintf("Invalid parition key. Length must be at least 1 and at most 256: %s", e.PartitionKey()) +} + +type ErrRecordSizeExceeded struct { + UserRecord +} + +func (e *ErrRecordSizeExceeded) Error() string { + return fmt.Sprintf("Data must be less than or equal to 1MB in size: %d", e.Size()) +} + +// Failure record type for failures from Kinesis PutRecords request +type FailureRecord struct { + Err error + // The PartitionKey that was used in the kinesis.PutRecordsRequestEntry + PartitionKey string + // The ExplicitHashKey that was used in the kinesis.PutRecordsRequestEntry. Will be the + // empty string if nil + ExplicitHashKey string + // UserRecords that were contained in the failed aggregated record request + UserRecords []UserRecord +} + +func (e *FailureRecord) Error() string { + return e.Err.Error() +} + +type DrainError struct { + Err error + // UserRecords in the buffer when drain attempt was made + UserRecords []UserRecord +} + +func (e *DrainError) Error() string { + return e.Err.Error() +} + +type ShardBucketError struct { + UserRecord +} + +func (s *ShardBucketError) Error() string { + if hk := s.ExplicitHashKey(); hk != nil { + return fmt.Sprintf("ExplicitHashKey outside shard key range: %s", hk.String()) + } + return fmt.Sprintf("PartitionKey outside shard key range: %s", s.PartitionKey()) +} + +type ShardRefreshError struct { + Err error +} + +func (s *ShardRefreshError) Error() string { + return fmt.Sprintf("ShardRefreshError: %v", s.Err) +} diff --git a/example_test.go b/example_test.go index a3a511b..3d07156 100644 --- a/example_test.go +++ b/example_test.go @@ -1,13 +1,16 @@ package producer import ( + "encoding/json" "log" + "math/big" "os" "time" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/kinesis" + "github.com/google/uuid" ) func ExampleSimple() { @@ -22,11 +25,12 @@ func ExampleSimple() { pr.Start() + failures := pr.NotifyFailures() + // Handle failures go func() { - for r := range pr.NotifyFailures() { - // r contains `Data`, `PartitionKey` and `Error()` - logger.Error("detected put failure", r.error) + for r := range failures { + logger.Error("detected put failure", r) } }() @@ -42,3 +46,108 @@ func ExampleSimple() { time.Sleep(3 * time.Second) pr.Stop() } + +func ExampleShardMap() { + logger := &StdLogger{log.New(os.Stdout, "", log.LstdFlags)} + client := kinesis.New(session.New(aws.NewConfig())) + pr := New(&Config{ + StreamName: "test", + BacklogCount: 2000, + Client: client, + GetShards: GetKinesisShardsFunc(client, "test"), + ShardRefreshInterval: 5 * time.Second, + Logger: logger, + }) + + pr.Start() + + failures := pr.NotifyFailures() + + // Handle failures + go func() { + for r := range failures { + logger.Error("detected put failure", r) + } + }() + + go func() { + for i := 0; i < 1000; i++ { + pk := uuid.New().String() + for j := 0; j < 5; j++ { + err := pr.Put([]byte("foo"), pk) + if err != nil { + logger.Error("error producing", err) + } + } + } + }() + + time.Sleep(3 * time.Second) + pr.Stop() +} + +type myExampleUserRecord struct { + Id string `json:"id"` + Key string `json:"key"` + Val string `json:"val"` + data []byte `json:"-"` +} + +func (r *myExampleUserRecord) PartitionKey() string { return r.Id } +func (r *myExampleUserRecord) ExplicitHashKey() *big.Int { return nil } +func (r *myExampleUserRecord) Data() []byte { return r.data } +func (r *myExampleUserRecord) Size() int { return len(r.data) } + +func newMyExampleUserRecord(key, val string) (*myExampleUserRecord, error) { + r := &myExampleUserRecord{ + Id: uuid.New().String(), + Key: key, + Val: val, + } + data, err := json.Marshal(r) + if err != nil { + return nil, err + } + r.data = data + return r, nil +} + +func ExampleUserRecord() { + logger := &StdLogger{log.New(os.Stdout, "", log.LstdFlags)} + client := kinesis.New(session.New(aws.NewConfig())) + pr := New(&Config{ + StreamName: "test", + BacklogCount: 2000, + Client: client, + GetShards: GetKinesisShardsFunc(client, "test"), + ShardRefreshInterval: 5 * time.Second, + Logger: logger, + }) + + pr.Start() + + failures := pr.NotifyFailures() + + // Handle failures + go func() { + for r := range failures { + logger.Error("detected put failure", r) + } + }() + + go func() { + for i := 0; i < 5000; i++ { + record, err := newMyExampleUserRecord("foo", "bar") + if err != nil { + logger.Error("error creating user record", err) + } + err = pr.PutUserRecord(record) + if err != nil { + logger.Error("error producing", err) + } + } + }() + + time.Sleep(3 * time.Second) + pr.Stop() +} diff --git a/go.mod b/go.mod index 49748b2..f0ff9fd 100644 --- a/go.mod +++ b/go.mod @@ -1,13 +1,18 @@ -module github.com/a8m/kinesis-producer +module github.com/fhaze/kinesis-producer require ( github.com/aws/aws-sdk-go v1.21.10 - github.com/golang/protobuf v1.3.2 + github.com/golang/protobuf v1.5.0 // indirect + github.com/google/uuid v1.1.1 github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7 github.com/pkg/errors v0.8.1 // indirect github.com/sirupsen/logrus v1.4.2 + github.com/stretchr/testify v1.2.2 go.uber.org/atomic v1.4.0 // indirect go.uber.org/multierr v1.1.0 // indirect go.uber.org/zap v1.10.0 golang.org/x/net v0.0.0-20190724013045-ca1201d0de80 // indirect + google.golang.org/protobuf v1.26.0 ) + +go 1.13 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..aef3842 --- /dev/null +++ b/go.sum @@ -0,0 +1,38 @@ +github.com/aws/aws-sdk-go v1.21.10 h1:lTRdgyxraKbnNhx7kWeoW/Uow1TKnSNDpQGTtEXJQgk= +github.com/aws/aws-sdk-go v1.21.10/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/golang/protobuf v1.5.0 h1:LUVKkCeviFUMKqHa4tXIIij/lbhnMbP7Fn5wKdKkRh4= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM= +github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= +github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7 h1:K//n/AqR5HjG3qxbrBCL4vJPW0MVFSs9CPK1OOJdRME= +github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7/go.mod h1:2iMrUgbbvHEiQClaW2NsSzMyGHqN+rDFqY705q49KG0= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +go.uber.org/atomic v1.4.0 h1:cxzIVoETapQEqDhQu3QfnvXAV4AlzcvUCxkVUFw3+EU= +go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= +go.uber.org/multierr v1.1.0 h1:HoEmRHQPVSqub6w2z2d2EOVs2fjyFRGyofhKuyDq0QI= +go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= +go.uber.org/zap v1.10.0 h1:ORx85nbTijNz8ljznvCMR1ZBIPKFn3jQrag10X2AsuM= +go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= diff --git a/helpers_test.go b/helpers_test.go new file mode 100644 index 0000000..972d0d9 --- /dev/null +++ b/helpers_test.go @@ -0,0 +1,26 @@ +package producer + +import ( + "encoding/json" + "io/ioutil" + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func loadJSONFromFile(t *testing.T, name string, out interface{}) { + require.NotNil(t, out) + b := loadBytesFromFile(t, name) + err := json.Unmarshal(b, out) + require.NoError(t, err) +} + +func loadBytesFromFile(t *testing.T, name string) []byte { + file, err := os.Open(name) + require.NoError(t, err) + defer file.Close() + b, err := ioutil.ReadAll(file) + require.NoError(t, err) + return b +} diff --git a/logger.go b/logger.go index c27dfee..b8069e2 100644 --- a/logger.go +++ b/logger.go @@ -19,7 +19,7 @@ type LogValue struct { } func (v LogValue) String() string { - return fmt.Sprintf(" %s=%s", v.Name, v.Value) + return fmt.Sprintf(" %s=%v", v.Name, v.Value) } // StdLogger implements the Logger interface using standard library loggers @@ -44,3 +44,8 @@ func (l *StdLogger) valuesToString(values ...LogValue) string { } return strings.Join(parts, ", ") } + +type NopLogger struct{} + +func (_ *NopLogger) Info(msg string, values ...LogValue) {} +func (_ *NopLogger) Error(msg string, err error, values ...LogValue) {} diff --git a/loggers/kplogrus/logrus.go b/loggers/kplogrus/logrus.go index 27f6792..cd188ac 100644 --- a/loggers/kplogrus/logrus.go +++ b/loggers/kplogrus/logrus.go @@ -1,7 +1,7 @@ package kplogrus import ( - producer "github.com/a8m/kinesis-producer" + producer "github.com/fhaze/kinesis-producer" "github.com/sirupsen/logrus" ) diff --git a/loggers/kpzap/zap.go b/loggers/kpzap/zap.go index a885437..c745123 100644 --- a/loggers/kpzap/zap.go +++ b/loggers/kpzap/zap.go @@ -3,7 +3,7 @@ package kpzap import ( "go.uber.org/zap" - producer "github.com/a8m/kinesis-producer" + producer "github.com/fhaze/kinesis-producer" ) // Logger implements a zap.Logger logger for kinesis-producer diff --git a/messages.pb.go b/messages.pb.go deleted file mode 100644 index d1ffc8a..0000000 --- a/messages.pb.go +++ /dev/null @@ -1,152 +0,0 @@ -// Code generated by protoc-gen-go. -// source: messages.proto -// DO NOT EDIT! - -/* -Package producer is a generated protocol buffer package. - -It is generated from these files: - messages.proto - -It has these top-level messages: - AggregatedRecord - Tag - Record -*/ -package producer - -import proto "github.com/golang/protobuf/proto" -import fmt "fmt" -import math "math" - -// Reference imports to suppress errors if they are not otherwise used. -var _ = proto.Marshal -var _ = fmt.Errorf -var _ = math.Inf - -// This is a compile-time assertion to ensure that this generated file -// is compatible with the proto package it is being compiled against. -const _ = proto.ProtoPackageIsVersion1 - -type AggregatedRecord struct { - PartitionKeyTable []string `protobuf:"bytes,1,rep,name=partition_key_table" json:"partition_key_table,omitempty"` - ExplicitHashKeyTable []string `protobuf:"bytes,2,rep,name=explicit_hash_key_table" json:"explicit_hash_key_table,omitempty"` - Records []*Record `protobuf:"bytes,3,rep,name=records" json:"records,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *AggregatedRecord) Reset() { *m = AggregatedRecord{} } -func (m *AggregatedRecord) String() string { return proto.CompactTextString(m) } -func (*AggregatedRecord) ProtoMessage() {} -func (*AggregatedRecord) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{0} } - -func (m *AggregatedRecord) GetPartitionKeyTable() []string { - if m != nil { - return m.PartitionKeyTable - } - return nil -} - -func (m *AggregatedRecord) GetExplicitHashKeyTable() []string { - if m != nil { - return m.ExplicitHashKeyTable - } - return nil -} - -func (m *AggregatedRecord) GetRecords() []*Record { - if m != nil { - return m.Records - } - return nil -} - -type Tag struct { - Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"` - Value *string `protobuf:"bytes,2,opt,name=value" json:"value,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *Tag) Reset() { *m = Tag{} } -func (m *Tag) String() string { return proto.CompactTextString(m) } -func (*Tag) ProtoMessage() {} -func (*Tag) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{1} } - -func (m *Tag) GetKey() string { - if m != nil && m.Key != nil { - return *m.Key - } - return "" -} - -func (m *Tag) GetValue() string { - if m != nil && m.Value != nil { - return *m.Value - } - return "" -} - -type Record struct { - PartitionKeyIndex *uint64 `protobuf:"varint,1,req,name=partition_key_index" json:"partition_key_index,omitempty"` - ExplicitHashKeyIndex *uint64 `protobuf:"varint,2,opt,name=explicit_hash_key_index" json:"explicit_hash_key_index,omitempty"` - Data []byte `protobuf:"bytes,3,req,name=data" json:"data,omitempty"` - Tags []*Tag `protobuf:"bytes,4,rep,name=tags" json:"tags,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *Record) Reset() { *m = Record{} } -func (m *Record) String() string { return proto.CompactTextString(m) } -func (*Record) ProtoMessage() {} -func (*Record) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{2} } - -func (m *Record) GetPartitionKeyIndex() uint64 { - if m != nil && m.PartitionKeyIndex != nil { - return *m.PartitionKeyIndex - } - return 0 -} - -func (m *Record) GetExplicitHashKeyIndex() uint64 { - if m != nil && m.ExplicitHashKeyIndex != nil { - return *m.ExplicitHashKeyIndex - } - return 0 -} - -func (m *Record) GetData() []byte { - if m != nil { - return m.Data - } - return nil -} - -func (m *Record) GetTags() []*Tag { - if m != nil { - return m.Tags - } - return nil -} - -func init() { - proto.RegisterType((*AggregatedRecord)(nil), "producer.AggregatedRecord") - proto.RegisterType((*Tag)(nil), "producer.Tag") - proto.RegisterType((*Record)(nil), "producer.Record") -} - -var fileDescriptor0 = []byte{ - // 216 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x74, 0x8e, 0x4d, 0x4e, 0xc3, 0x30, - 0x10, 0x85, 0xd5, 0xd8, 0xfc, 0x74, 0xda, 0xa2, 0xca, 0x2c, 0x88, 0xc4, 0x02, 0x9a, 0x55, 0x56, - 0x59, 0x70, 0x03, 0xae, 0x80, 0xd8, 0x47, 0x43, 0x3c, 0x72, 0x2d, 0x42, 0x1d, 0xec, 0x09, 0x0a, - 0xb7, 0x67, 0xe2, 0x08, 0xc1, 0x02, 0x56, 0x23, 0x8d, 0xde, 0xfb, 0xde, 0x07, 0x57, 0x6f, 0x94, - 0x12, 0x3a, 0x4a, 0xcd, 0x10, 0x03, 0x07, 0x73, 0x29, 0xc7, 0x8e, 0x1d, 0xc5, 0x2a, 0xc1, 0xfe, - 0xd1, 0xb9, 0x48, 0x0e, 0x99, 0xec, 0x13, 0x75, 0x21, 0x5a, 0x73, 0x0b, 0xd7, 0x03, 0x46, 0xf6, - 0xec, 0xc3, 0xa9, 0x7d, 0xa5, 0xcf, 0x96, 0xf1, 0xa5, 0xa7, 0x72, 0x75, 0xaf, 0xea, 0xb5, 0xb9, - 0x83, 0x1b, 0x9a, 0x86, 0xde, 0x77, 0x9e, 0xdb, 0x23, 0xa6, 0xe3, 0xaf, 0x40, 0x91, 0x03, 0x07, - 0xb8, 0x88, 0x99, 0x93, 0x4a, 0x25, 0x8f, 0xcd, 0xc3, 0xbe, 0xf9, 0x5e, 0x6b, 0x96, 0x81, 0xea, - 0x00, 0xea, 0x19, 0x9d, 0xd9, 0x80, 0x92, 0xb2, 0x70, 0x0b, 0xa9, 0xed, 0xe0, 0xec, 0x03, 0xfb, - 0x71, 0xa6, 0xac, 0xea, 0x75, 0xf5, 0x0e, 0xe7, 0xff, 0xd9, 0xf8, 0x93, 0xa5, 0x29, 0xb7, 0xf4, - 0xdf, 0x36, 0x4b, 0x60, 0xe6, 0x68, 0xb3, 0x05, 0x6d, 0x91, 0x51, 0x54, 0x8a, 0x7a, 0x2b, 0x2c, - 0xcd, 0xe8, 0x52, 0xa9, 0xb3, 0xd8, 0xee, 0x47, 0x4c, 0x74, 0xbe, 0x02, 0x00, 0x00, 0xff, 0xff, - 0x0c, 0x0d, 0x0e, 0xd0, 0x25, 0x01, 0x00, 0x00, -} diff --git a/pb/messages.pb.go b/pb/messages.pb.go new file mode 100644 index 0000000..f2b7697 --- /dev/null +++ b/pb/messages.pb.go @@ -0,0 +1,334 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.26.0 +// protoc v3.15.8 +// source: messages.proto + +package pb + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type AggregatedRecord struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + PartitionKeyTable []string `protobuf:"bytes,1,rep,name=partition_key_table,json=partitionKeyTable" json:"partition_key_table,omitempty"` + ExplicitHashKeyTable []string `protobuf:"bytes,2,rep,name=explicit_hash_key_table,json=explicitHashKeyTable" json:"explicit_hash_key_table,omitempty"` + Records []*Record `protobuf:"bytes,3,rep,name=records" json:"records,omitempty"` +} + +func (x *AggregatedRecord) Reset() { + *x = AggregatedRecord{} + if protoimpl.UnsafeEnabled { + mi := &file_messages_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *AggregatedRecord) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*AggregatedRecord) ProtoMessage() {} + +func (x *AggregatedRecord) ProtoReflect() protoreflect.Message { + mi := &file_messages_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use AggregatedRecord.ProtoReflect.Descriptor instead. +func (*AggregatedRecord) Descriptor() ([]byte, []int) { + return file_messages_proto_rawDescGZIP(), []int{0} +} + +func (x *AggregatedRecord) GetPartitionKeyTable() []string { + if x != nil { + return x.PartitionKeyTable + } + return nil +} + +func (x *AggregatedRecord) GetExplicitHashKeyTable() []string { + if x != nil { + return x.ExplicitHashKeyTable + } + return nil +} + +func (x *AggregatedRecord) GetRecords() []*Record { + if x != nil { + return x.Records + } + return nil +} + +type Tag struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"` + Value *string `protobuf:"bytes,2,opt,name=value" json:"value,omitempty"` +} + +func (x *Tag) Reset() { + *x = Tag{} + if protoimpl.UnsafeEnabled { + mi := &file_messages_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Tag) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Tag) ProtoMessage() {} + +func (x *Tag) ProtoReflect() protoreflect.Message { + mi := &file_messages_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Tag.ProtoReflect.Descriptor instead. +func (*Tag) Descriptor() ([]byte, []int) { + return file_messages_proto_rawDescGZIP(), []int{1} +} + +func (x *Tag) GetKey() string { + if x != nil && x.Key != nil { + return *x.Key + } + return "" +} + +func (x *Tag) GetValue() string { + if x != nil && x.Value != nil { + return *x.Value + } + return "" +} + +type Record struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + PartitionKeyIndex *uint64 `protobuf:"varint,1,req,name=partition_key_index,json=partitionKeyIndex" json:"partition_key_index,omitempty"` + ExplicitHashKeyIndex *uint64 `protobuf:"varint,2,opt,name=explicit_hash_key_index,json=explicitHashKeyIndex" json:"explicit_hash_key_index,omitempty"` + Data []byte `protobuf:"bytes,3,req,name=data" json:"data,omitempty"` + Tags []*Tag `protobuf:"bytes,4,rep,name=tags" json:"tags,omitempty"` +} + +func (x *Record) Reset() { + *x = Record{} + if protoimpl.UnsafeEnabled { + mi := &file_messages_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Record) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Record) ProtoMessage() {} + +func (x *Record) ProtoReflect() protoreflect.Message { + mi := &file_messages_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Record.ProtoReflect.Descriptor instead. +func (*Record) Descriptor() ([]byte, []int) { + return file_messages_proto_rawDescGZIP(), []int{2} +} + +func (x *Record) GetPartitionKeyIndex() uint64 { + if x != nil && x.PartitionKeyIndex != nil { + return *x.PartitionKeyIndex + } + return 0 +} + +func (x *Record) GetExplicitHashKeyIndex() uint64 { + if x != nil && x.ExplicitHashKeyIndex != nil { + return *x.ExplicitHashKeyIndex + } + return 0 +} + +func (x *Record) GetData() []byte { + if x != nil { + return x.Data + } + return nil +} + +func (x *Record) GetTags() []*Tag { + if x != nil { + return x.Tags + } + return nil +} + +var File_messages_proto protoreflect.FileDescriptor + +var file_messages_proto_rawDesc = []byte{ + 0x0a, 0x0e, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x12, 0x02, 0x70, 0x62, 0x22, 0x9f, 0x01, 0x0a, 0x10, 0x41, 0x67, 0x67, 0x72, 0x65, 0x67, 0x61, + 0x74, 0x65, 0x64, 0x52, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x12, 0x2e, 0x0a, 0x13, 0x70, 0x61, 0x72, + 0x74, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6b, 0x65, 0x79, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, + 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x11, 0x70, 0x61, 0x72, 0x74, 0x69, 0x74, 0x69, 0x6f, + 0x6e, 0x4b, 0x65, 0x79, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x12, 0x35, 0x0a, 0x17, 0x65, 0x78, 0x70, + 0x6c, 0x69, 0x63, 0x69, 0x74, 0x5f, 0x68, 0x61, 0x73, 0x68, 0x5f, 0x6b, 0x65, 0x79, 0x5f, 0x74, + 0x61, 0x62, 0x6c, 0x65, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x14, 0x65, 0x78, 0x70, 0x6c, + 0x69, 0x63, 0x69, 0x74, 0x48, 0x61, 0x73, 0x68, 0x4b, 0x65, 0x79, 0x54, 0x61, 0x62, 0x6c, 0x65, + 0x12, 0x24, 0x0a, 0x07, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x0a, 0x2e, 0x70, 0x62, 0x2e, 0x52, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x52, 0x07, 0x72, + 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x22, 0x2d, 0x0a, 0x03, 0x54, 0x61, 0x67, 0x12, 0x10, 0x0a, + 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x02, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, + 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, + 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0xa0, 0x01, 0x0a, 0x06, 0x52, 0x65, 0x63, 0x6f, 0x72, 0x64, + 0x12, 0x2e, 0x0a, 0x13, 0x70, 0x61, 0x72, 0x74, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6b, 0x65, + 0x79, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, 0x01, 0x20, 0x02, 0x28, 0x04, 0x52, 0x11, 0x70, + 0x61, 0x72, 0x74, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x4b, 0x65, 0x79, 0x49, 0x6e, 0x64, 0x65, 0x78, + 0x12, 0x35, 0x0a, 0x17, 0x65, 0x78, 0x70, 0x6c, 0x69, 0x63, 0x69, 0x74, 0x5f, 0x68, 0x61, 0x73, + 0x68, 0x5f, 0x6b, 0x65, 0x79, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x14, 0x65, 0x78, 0x70, 0x6c, 0x69, 0x63, 0x69, 0x74, 0x48, 0x61, 0x73, 0x68, 0x4b, + 0x65, 0x79, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x12, 0x12, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, + 0x03, 0x20, 0x02, 0x28, 0x0c, 0x52, 0x04, 0x64, 0x61, 0x74, 0x61, 0x12, 0x1b, 0x0a, 0x04, 0x74, + 0x61, 0x67, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x07, 0x2e, 0x70, 0x62, 0x2e, 0x54, + 0x61, 0x67, 0x52, 0x04, 0x74, 0x61, 0x67, 0x73, 0x42, 0x27, 0x5a, 0x25, 0x67, 0x69, 0x74, 0x68, + 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x6a, 0x6e, 0x65, 0x69, 0x6c, 0x2f, 0x6b, 0x69, + 0x6e, 0x65, 0x73, 0x69, 0x73, 0x2d, 0x70, 0x72, 0x6f, 0x64, 0x75, 0x63, 0x65, 0x72, 0x2f, 0x70, + 0x62, +} + +var ( + file_messages_proto_rawDescOnce sync.Once + file_messages_proto_rawDescData = file_messages_proto_rawDesc +) + +func file_messages_proto_rawDescGZIP() []byte { + file_messages_proto_rawDescOnce.Do(func() { + file_messages_proto_rawDescData = protoimpl.X.CompressGZIP(file_messages_proto_rawDescData) + }) + return file_messages_proto_rawDescData +} + +var file_messages_proto_msgTypes = make([]protoimpl.MessageInfo, 3) +var file_messages_proto_goTypes = []interface{}{ + (*AggregatedRecord)(nil), // 0: pb.AggregatedRecord + (*Tag)(nil), // 1: pb.Tag + (*Record)(nil), // 2: pb.Record +} +var file_messages_proto_depIdxs = []int32{ + 2, // 0: pb.AggregatedRecord.records:type_name -> pb.Record + 1, // 1: pb.Record.tags:type_name -> pb.Tag + 2, // [2:2] is the sub-list for method output_type + 2, // [2:2] is the sub-list for method input_type + 2, // [2:2] is the sub-list for extension type_name + 2, // [2:2] is the sub-list for extension extendee + 0, // [0:2] is the sub-list for field type_name +} + +func init() { file_messages_proto_init() } +func file_messages_proto_init() { + if File_messages_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_messages_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*AggregatedRecord); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_messages_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Tag); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_messages_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Record); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_messages_proto_rawDesc, + NumEnums: 0, + NumMessages: 3, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_messages_proto_goTypes, + DependencyIndexes: file_messages_proto_depIdxs, + MessageInfos: file_messages_proto_msgTypes, + }.Build() + File_messages_proto = out.File + file_messages_proto_rawDesc = nil + file_messages_proto_goTypes = nil + file_messages_proto_depIdxs = nil +} diff --git a/messages.proto b/pb/messages.proto similarity index 83% rename from messages.proto rename to pb/messages.proto index f798a7d..e30b3ad 100644 --- a/messages.proto +++ b/pb/messages.proto @@ -1,4 +1,7 @@ -package producer; +syntax = "proto2"; +package pb; + +option go_package = "github.com/fhaze/kinesis-producer/pb"; message AggregatedRecord { repeated string partition_key_table = 1; diff --git a/producer.go b/producer.go index 07e5aaf..6b8059c 100644 --- a/producer.go +++ b/producer.go @@ -7,51 +7,47 @@ package producer import ( - "crypto/md5" - "errors" - "fmt" "sync" "time" - - "github.com/aws/aws-sdk-go/service/kinesis" - "github.com/jpillora/backoff" -) - -// Errors -var ( - ErrStoppedProducer = errors.New("Unable to Put record. Producer is already stopped") - ErrIllegalPartitionKey = errors.New("Invalid parition key. Length must be at least 1 and at most 256") - ErrRecordSizeExceeded = errors.New("Data must be less than or equal to 1MB in size") ) -// Producer batches records. type Producer struct { sync.RWMutex *Config - aggregator *Aggregator - semaphore semaphore - records chan *kinesis.PutRecordsRequestEntry - failure chan *FailureRecord - done chan struct{} - - // Current state of the Producer - // notify set to true after calling to `NotifyFailures` - notify bool - // stopped set to true after `Stop`ing the Producer. - // This will prevent from user to `Put` any new data. - stopped bool + + shardMap *ShardMap + + // semaphore controling size of Put backlog before blocking + backlog semaphore + + pool *WorkerPool + + // stopped signals that the producer is no longer accepting Puts + stopped chan struct{} + + // signal for the main loop that stop has been called and it should drain the backlog + done chan struct{} + + failures chan error } -// New creates new producer with the given config. func New(config *Config) *Producer { config.defaults() - return &Producer{ - Config: config, - done: make(chan struct{}), - records: make(chan *kinesis.PutRecordsRequestEntry, config.BacklogCount), - semaphore: make(chan struct{}, config.MaxConnections), - aggregator: new(Aggregator), + p := &Producer{ + Config: config, + backlog: make(chan struct{}, config.BacklogCount), + pool: NewWorkerPool(config), + stopped: make(chan struct{}), + done: make(chan struct{}), } + shards, _, err := p.GetShards(nil) + if err != nil { + // TODO: maybe just log and continue or fallback to default? if ShardRefreshInterval + // is set, it may succeed a later time + panic(err) + } + p.shardMap = NewShardMap(shards, p.AggregateBatchCount) + return p } // Put `data` using `partitionKey` asynchronously. This method is thread-safe. @@ -62,256 +58,216 @@ func New(config *Config) *Producer { // doesn't exist), the message will returned by the Producer. // Add a listener with `Producer.NotifyFailures` to handle undeliverable messages. func (p *Producer) Put(data []byte, partitionKey string) error { - p.RLock() - stopped := p.stopped - p.RUnlock() - if stopped { - return ErrStoppedProducer + return p.PutUserRecord(NewDataRecord(data, partitionKey)) +} + +func (p *Producer) PutUserRecord(userRecord UserRecord) error { + select { + case <-p.stopped: + return userRecord.(*ErrStoppedProducer) + // same as p.backlog.acquire() but using channel primative for select case + case p.backlog <- struct{}{}: } - if len(data) > maxRecordSize { - return ErrRecordSizeExceeded + + var release = true + defer func() { + if release { + p.backlog.release() + } + }() + + partitionKey := userRecord.PartitionKey() + partitionKeySize := len(partitionKey) + if partitionKeySize < 1 || partitionKeySize > 256 { + return userRecord.(*ErrIllegalPartitionKey) } - if l := len(partitionKey); l < 1 || l > 256 { - return ErrIllegalPartitionKey + + // Kinesis counts partition key size towards size limits + recordSize := userRecord.Size() + partitionKeySize + if recordSize > maxRecordSize { + return userRecord.(*ErrRecordSizeExceeded) } - nbytes := len(data) + len([]byte(partitionKey)) + + var ( + record *AggregatedRecordRequest + err error + ) // if the record size is bigger than aggregation size // handle it as a simple kinesis record - if nbytes > p.AggregateBatchSize { - p.records <- &kinesis.PutRecordsRequestEntry{ - Data: data, - PartitionKey: &partitionKey, - } + // TODO: this logic is not enforced when doing reaggreation after shard refresh + if recordSize > p.AggregateBatchSize { + record = NewAggregatedRecordRequest(userRecord.Data(), &partitionKey, nil, []UserRecord{userRecord}) } else { - p.Lock() - needToDrain := nbytes+p.aggregator.Size()+md5.Size+len(magicNumber)+partitionKeyIndexSize > maxRecordSize || p.aggregator.Count() >= p.AggregateBatchCount - var ( - record *kinesis.PutRecordsRequestEntry - err error - ) - if needToDrain { - if record, err = p.aggregator.Drain(); err != nil { - p.Logger.Error("drain aggregator", err) - } - } - p.aggregator.Put(data, partitionKey) - p.Unlock() - // release the lock and then pipe the record to the records channel - // we did it, because the "send" operation blocks when the backlog is full - // and this can cause deadlock(when we never release the lock) - if needToDrain && record != nil { - p.records <- record - } + record, err = p.shardMap.Put(userRecord) } - return nil -} -// Failure record type -type FailureRecord struct { - error - Data []byte - PartitionKey string -} - -// NotifyFailures registers and return listener to handle undeliverable messages. -// The incoming struct has a copy of the Data and the PartitionKey along with some -// error information about why the publishing failed. -func (p *Producer) NotifyFailures() <-chan *FailureRecord { - p.Lock() - defer p.Unlock() - if !p.notify { - p.notify = true - p.failure = make(chan *FailureRecord, p.BacklogCount) + if record != nil { + // if we are going to send a record over the records channel + // we hold the semaphore until that record has been sent + // this way we can rely on p.backlog.wait() to mean all waiting puts complete and + // future puts are blocked + release = false + go func() { + p.pool.Add(record) + p.backlog.release() + }() } - return p.failure + + return err } -// Start the producer func (p *Producer) Start() { - p.Logger.Info("starting producer", LogValue{"stream", p.StreamName}) + poolErrs := p.pool.Errors() + // listen for errors from the worker pool p.notify() will send on the failures + // channel if p.NotifyFailures() has been called + go func() { + for err := range poolErrs { + p.notify(err) + } + // we can close p.failure after pool error channel has closed + // because + p.Lock() + if p.failures != nil { + close(p.failures) + p.failures = nil + } + p.Unlock() + }() + p.pool.Start() go p.loop() } -// Stop the producer gracefully. Flushes any in-flight data. func (p *Producer) Stop() { - p.Lock() - p.stopped = true - p.Unlock() - p.Logger.Info("stopping producer", LogValue{"backlog", len(p.records)}) - - // drain - if record, ok := p.drainIfNeed(); ok { - p.records <- record - } + // signal to stop any future Puts + close(p.stopped) + // signal to main loop to begin cleanup process + p.done <- struct{}{} + // wait for the worker pool to complete + p.pool.Wait() + // send another signal to main loop to exit p.done <- struct{}{} - close(p.records) - - // wait <-p.done - p.semaphore.wait() +} - // close the failures channel if we notify - p.RLock() - if p.notify { - close(p.failure) +// NotifyFailures registers and return listener to handle undeliverable messages. +// The incoming struct has a copy of the Data and the PartitionKey along with some +// error information about why the publishing failed. +func (p *Producer) NotifyFailures() <-chan error { + p.Lock() + defer p.Unlock() + if p.failures == nil { + p.failures = make(chan error, p.BacklogCount) } - p.RUnlock() - p.Logger.Info("stopped producer") + return p.failures } -// loop and flush at the configured interval, or when the buffer is exceeded. func (p *Producer) loop() { - size := 0 - drain := false - buf := make([]*kinesis.PutRecordsRequestEntry, 0, p.BatchCount) - tick := time.NewTicker(p.FlushInterval) - - flush := func(msg string) { - p.semaphore.acquire() - go p.flush(buf, msg) - buf = nil - size = 0 - } + var ( + stop chan struct{} + done chan struct{} = p.done + flushTick *time.Ticker = time.NewTicker(p.FlushInterval) + flushTickC <-chan time.Time = flushTick.C + shardTick *time.Ticker + shardTickC <-chan time.Time + ) - bufAppend := func(record *kinesis.PutRecordsRequestEntry) { - // the record size limit applies to the total size of the - // partition key and data blob. - rsize := len(record.Data) + len([]byte(*record.PartitionKey)) - if size+rsize > p.BatchSize { - flush("batch size") - } - size += rsize - buf = append(buf, record) - if len(buf) >= p.BatchCount { - flush("batch length") - } + if p.ShardRefreshInterval != 0 { + shardTick = time.NewTicker(p.ShardRefreshInterval) + shardTickC = shardTick.C + defer shardTick.Stop() } - defer tick.Stop() + defer flushTick.Stop() defer close(p.done) + flush := func() { + records := p.drain() + for _, record := range records { + p.pool.Add(record) + } + p.pool.Flush() + } + for { select { - case record, ok := <-p.records: - if drain && !ok { - if size > 0 { - flush("drain") - } - p.Logger.Info("backlog drained") - return - } - bufAppend(record) - case <-tick.C: - if record, ok := p.drainIfNeed(); ok { - bufAppend(record) + case <-flushTickC: + flush() + case <-shardTickC: + err := p.updateShards(done == nil) + if err != nil { + p.Logger.Error("UpdateShards error", err) + p.notify(err) } - // if the buffer is still containing records - if size > 0 { - flush("interval") - } - case <-p.done: - drain = true + case <-done: + // after waiting for the pool to finish, Stop() will send another signal to the done + // channel, the second time signaling its safe to end this go routine + stop, done = done, nil + // once we are done we no longer need flush tick as we are already + // flushing the backlog + flushTickC = nil + // block any more puts from happening + p.backlog.wait(p.BacklogCount) + // backlog is flushed and no more records are incomming + // flush any remaining records in the aggregator + flush() + // with puts blocked and flush complete, we can close input channel safely + p.pool.Close() + case <-stop: + return } } } -func (p *Producer) drainIfNeed() (*kinesis.PutRecordsRequestEntry, bool) { - p.RLock() - needToDrain := p.aggregator.Size() > 0 - p.RUnlock() - if needToDrain { - p.Lock() - record, err := p.aggregator.Drain() - p.Unlock() - if err != nil { - p.Logger.Error("drain aggregator", err) - } else { - return record, true - } +func (p *Producer) updateShards(done bool) error { + old := p.shardMap.Shards() + shards, updated, err := p.GetShards(old) + if err != nil { + return err } - return nil, false -} - -// flush records and retry failures if necessary. -// for example: when we get "ProvisionedThroughputExceededException" -func (p *Producer) flush(records []*kinesis.PutRecordsRequestEntry, reason string) { - b := &backoff.Backoff{ - Jitter: true, + if !updated { + return nil } - defer p.semaphore.release() - - for { - p.Logger.Info("flushing records", LogValue{"reason", reason}, LogValue{"records", len(records)}) - out, err := p.Client.PutRecords(&kinesis.PutRecordsInput{ - StreamName: &p.StreamName, - Records: records, - }) - - if err != nil { - p.Logger.Error("flush", err) - p.RLock() - notify := p.notify - p.RUnlock() - if notify { - p.dispatchFailures(records, err) - } - return - } - - if p.Verbose { - for i, r := range out.Records { - values := make([]LogValue, 2) - if r.ErrorCode != nil { - values[0] = LogValue{"ErrorCode", *r.ErrorCode} - values[1] = LogValue{"ErrorMessage", *r.ErrorMessage} - } else { - values[0] = LogValue{"ShardId", *r.ShardId} - values[1] = LogValue{"SequenceNumber", *r.SequenceNumber} - } - p.Logger.Info(fmt.Sprintf("Result[%d]", i), values...) - } - } + if !done { + // if done signal has not been received yet, flush all backlogged puts into the worker + // pool and block additional puts + p.backlog.wait(p.BacklogCount) + } - failed := *out.FailedRecordCount - if failed == 0 { - return - } + // pause and drain the worker pool + pending := p.pool.Pause() - duration := b.Duration() + // update the shards and reaggregate pending records + records, err := p.shardMap.UpdateShards(shards, pending) - p.Logger.Info( - "put failures", - LogValue{"failures", failed}, - LogValue{"backoff", duration.String()}, - ) - time.Sleep(duration) + // resume the worker pool + p.pool.Resume(records) - // change the logging state for the next itertion - reason = "retry" - records = failures(records, out.Records) + if !done { + // if done signal has not been received yet, re-open the backlog to accept more Puts + p.backlog.open(p.BacklogCount) } + + return err } -// dispatchFailures gets batch of records, extract them, and push them -// into the failure channel -func (p *Producer) dispatchFailures(records []*kinesis.PutRecordsRequestEntry, err error) { - for _, r := range records { - if isAggregated(r) { - p.dispatchFailures(extractRecords(r), err) - } else { - p.failure <- &FailureRecord{err, r.Data, *r.PartitionKey} - } +func (p *Producer) drain() []*AggregatedRecordRequest { + if p.shardMap.Size() == 0 { + return nil + } + records, errs := p.shardMap.Drain() + if len(errs) > 0 { + p.notify(errs...) } + return records } -// failures returns the failed records as indicated in the response. -func failures(records []*kinesis.PutRecordsRequestEntry, - response []*kinesis.PutRecordsResultEntry) (out []*kinesis.PutRecordsRequestEntry) { - for i, record := range response { - if record.ErrorCode != nil { - out = append(out, records[i]) +func (p *Producer) notify(errs ...error) { + p.RLock() + if p.failures != nil { + for _, err := range errs { + p.failures <- err } } - return + p.RUnlock() } diff --git a/producer_test.go b/producer_test.go index fff8662..a07492f 100644 --- a/producer_test.go +++ b/producer_test.go @@ -2,11 +2,16 @@ package producer import ( "errors" + "fmt" + "runtime" "sync" "testing" + "time" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/kinesis" k "github.com/aws/aws-sdk-go/service/kinesis" + "github.com/google/uuid" ) type responseMock struct { @@ -156,17 +161,30 @@ func TestProducer(t *testing.T) { test.config.MaxConnections = 1 test.config.Client = test.putter p := New(test.config) + failures := p.NotifyFailures() + var ewg sync.WaitGroup + ewg.Add(1) + go func() { + defer ewg.Done() + for err := range failures { + t.Error(err) + } + }() p.Start() var wg sync.WaitGroup wg.Add(len(test.records)) for _, r := range test.records { go func(s string) { - p.Put([]byte(s), s) + err := p.Put([]byte(s), s) + if err != nil { + t.Error(err) + } wg.Done() }(r) } wg.Wait() p.Stop() + ewg.Wait() for k, v := range test.putter.incoming { if len(v) != len(test.outgoing[k]) { t.Errorf("failed test: %s\n\texcpeted:%v\n\tactual: %v", test.name, @@ -190,29 +208,399 @@ func TestNotify(t *testing.T) { }) p.Start() records := genBulk(10, "bar") - var wg sync.WaitGroup - wg.Add(len(records)) + failures := p.NotifyFailures() failed := 0 - done := make(chan bool, 1) + done := make(chan struct{}) + // timeout test after 10 seconds + timeout := time.After(10 * time.Second) + var wg sync.WaitGroup + wg.Add(1) go func() { - for _ = range p.NotifyFailures() { - failed++ - wg.Done() + defer wg.Done() + for { + select { + case failure, ok := <-failures: + if !ok { + // expect producer close the failures channel + close(done) + return + } + failed += len(failure.(*FailureRecord).UserRecords) + case <-timeout: + return + } } - // expect producer close the failures channel - done <- true }() for _, r := range records { p.Put([]byte(r), r) } - wg.Wait() p.Stop() + wg.Wait() if failed != len(records) { t.Errorf("failed test: NotifyFailure\n\texcpeted:%v\n\tactual:%v", failed, len(records)) } - if !<-done { + select { + case <-done: + default: t.Error("failed test: NotifyFailure\n\texpect failures channel to be closed") } } + +func mockGetShards(startingShards, shards []*k.Shard, updated bool, err error) GetShardsFunc { + calls := 0 + return func(_ []*k.Shard) ([]*k.Shard, bool, error) { + calls++ + switch calls { + case 1: + // first call happens on producer creation + return startingShards, true, nil + case 2: + // second call on first refresh + return shards, updated, err + default: + // any subsequent calls, return no update + return nil, false, nil + } + } +} + +type mockThrottleClient struct { + done chan struct{} +} + +// Error all records to mock in flight requests retry due to throttles +func (c *mockThrottleClient) PutRecords(input *k.PutRecordsInput) (*k.PutRecordsOutput, error) { + select { + case <-c.done: + failed := int64(0) + return &k.PutRecordsOutput{ + FailedRecordCount: &failed, + }, nil + default: + fmt.Println("put records throttle") + failed := int64(len(input.Records)) + code := "errorcode" + var records []*kinesis.PutRecordsResultEntry + for range input.Records { + records = append(records, &kinesis.PutRecordsResultEntry{ + ErrorCode: &code, + }) + } + return &k.PutRecordsOutput{ + FailedRecordCount: &failed, + Records: records, + }, nil + } +} + +// TODO fix this test +// func TestProducerUpdateShards(t *testing.T) { +// testCases := []struct { +// name string +// startingShards string +// aggregateBatchCount int +// records []UserRecord +// getShardsShards string +// getShardsUpdated bool +// getShardsError string +// updateDrained []*AggregatedRecordRequest +// expectedError string +// }{ +// { +// name: "returns error from GetShardsFunc", +// startingShards: "testdata/TestProducerUpdateShards/error/startingShards.json", +// aggregateBatchCount: 2, +// records: []UserRecord{ +// newTestUserRecord("foo", "100141183460469231731687303715884105727", []byte("hello")), +// newTestUserRecord("bar", "200141183460469231731687303715884105727", []byte("world")), +// newTestUserRecord("foo", "110141183460469231731687303715884105727", []byte("hello")), +// newTestUserRecord("bar", "210141183460469231731687303715884105727", []byte("world")), +// }, +// getShardsError: "getShards error", +// expectedError: "getShards error", +// }, +// { +// name: "does not drain in flight records", +// startingShards: "testdata/TestProducerUpdateShards/no_update/startingShards.json", +// aggregateBatchCount: 2, +// records: []UserRecord{ +// newTestUserRecord("foo", "100141183460469231731687303715884105727", []byte("hello")), +// newTestUserRecord("bar", "200141183460469231731687303715884105727", []byte("world")), +// newTestUserRecord("foo", "110141183460469231731687303715884105727", []byte("hello")), +// newTestUserRecord("bar", "210141183460469231731687303715884105727", []byte("world")), +// }, +// getShardsUpdated: false, +// }, +// { +// name: "updates shards and redistributes inflight records", +// startingShards: "testdata/TestProducerUpdateShards/update/startingShards.json", +// aggregateBatchCount: 1, +// records: []UserRecord{ +// newTestUserRecord("foo", "100141183460469231731687303715884105727", []byte("hello")), +// newTestUserRecord("bar", "200141183460469231731687303715884105727", []byte("world")), +// newTestUserRecord("fuzz", "110141183460469231731687303715884105727", []byte("hello")), +// newTestUserRecord("buzz", "210141183460469231731687303715884105727", []byte("world")), +// }, +// getShardsShards: "testdata/TestProducerUpdateShards/update/getShardsShards.json", +// getShardsUpdated: true, +// updateDrained: []*AggregatedRecordRequest{ +// &AggregatedRecordRequest{ +// Entry: &k.PutRecordsRequestEntry{ +// // StartingHashKey of first shard +// ExplicitHashKey: aws.String("0"), +// }, +// UserRecords: []UserRecord{ +// newTestUserRecord("foo", "", []byte("hello")), +// }, +// }, +// &AggregatedRecordRequest{ +// Entry: &k.PutRecordsRequestEntry{ +// // StartingHashKey of second shard +// ExplicitHashKey: aws.String("170141183460469231731687303715884105728"), +// }, +// UserRecords: []UserRecord{ +// newTestUserRecord("bar", "", []byte("world")), +// }, +// }, +// }, +// }, +// } + +// for _, tc := range testCases { +// t.Run(tc.name, func(t *testing.T) { +// var ( +// startingShards []*k.Shard +// getShardsShards []*k.Shard +// getShardsError error +// ) +// if tc.startingShards != "" { +// startingShards = make([]*k.Shard, 0) +// loadJSONFromFile(t, tc.startingShards, &startingShards) +// } +// if tc.getShardsShards != "" { +// getShardsShards = make([]*k.Shard, 0) +// loadJSONFromFile(t, tc.getShardsShards, &getShardsShards) +// } +// if tc.getShardsError != "" { +// getShardsError = errors.New(tc.getShardsError) +// } +// getShards := mockGetShards(startingShards, getShardsShards, tc.getShardsUpdated, getShardsError) + +// client := &mockThrottleClient{ +// done: make(chan struct{}), +// } + +// producer := New(&Config{ +// AggregateBatchCount: tc.aggregateBatchCount, +// GetShards: getShards, +// FlushInterval: time.Duration(1) * time.Second, +// StreamName: tc.name, +// Client: client, +// }) + +// var ( +// failures = producer.NotifyFailures() +// done = make(chan struct{}) +// timeout = time.After(10 * time.Second) +// wg sync.WaitGroup +// ) +// wg.Add(1) +// go func() { +// defer wg.Done() +// for { +// select { +// case failure, ok := <-failures: +// if !ok { +// // expect producer close the failures channel +// close(done) +// return +// } +// t.Fatal(failure.Error()) +// case <-timeout: +// return +// } +// } +// }() + +// producer.Start() + +// // populate initial records +// // this will persist in flight due to throttle client +// for _, record := range tc.records { +// err := producer.PutUserRecord(record) +// require.NoError(t, err) +// } + +// // wait long enough for flush tick to occur +// time.Sleep(time.Duration(2) * time.Second) + +// gotUpdateDrained, gotError := producer.updateShards() + +// if tc.expectedError != "" { +// require.EqualError(t, gotError, tc.expectedError) +// } else if !tc.getShardsUpdated { +// require.Nil(t, gotUpdateDrained) +// require.Nil(t, gotError) +// } else { +// require.Nil(t, gotError) +// } + +// compareAggregatedRecordRequests(t, tc.updateDrained, gotUpdateDrained) +// close(client.done) +// producer.Stop() +// wg.Wait() +// select { +// case <-done: +// default: +// t.Error("failed test: \n\texpect failures channel to be closed") +// } +// }) +// } +// } + +type mockBenchmarkClient struct { + b *testing.B +} + +func (_ *mockBenchmarkClient) PutRecords(_ *k.PutRecordsInput) (*k.PutRecordsOutput, error) { + failed := int64(0) + return &k.PutRecordsOutput{ + FailedRecordCount: &failed, + }, nil +} + +func simpleUUIDRecords(dataSize int) func(int) ([]UserRecord, error) { + return func(count int) ([]UserRecord, error) { + records := make([]UserRecord, count) + for i := 0; i < count; i++ { + records[i] = newTestUserRecord(uuid.New().String(), "", mockData("foobar", dataSize)) + } + return records, nil + } +} + +func explicitHashKeyRecords(getShards GetShardsFunc, dataSize int) func(int) ([]UserRecord, error) { + return func(count int) ([]UserRecord, error) { + shards, _, err := getShards(nil) + if err != nil { + return nil, err + } + + shardCount := len(shards) + records := make([]UserRecord, count) + for i := 0; i < count; i++ { + bucket := i % shardCount + shard := shards[bucket] + records[i] = newTestUserRecord( + uuid.New().String(), + *shard.HashKeyRange.StartingHashKey, + mockData("foobar", dataSize)) + } + return records, nil + } +} + +func BenchmarkProducer(b *testing.B) { + testCases := []struct { + name string + config *Config + records func(count int) ([]UserRecord, error) + }{ + { + name: "default producer", + config: &Config{ + StreamName: "default producer", + BacklogCount: 10000, + }, + records: simpleUUIDRecords(1024), + }, + { + name: "10 shard count", + config: &Config{ + StreamName: "10 shard count", + GetShards: StaticGetShardsFunc(10), + BacklogCount: 10000, + }, + records: simpleUUIDRecords(1024), + }, + { + name: "500 shard count", + config: &Config{ + StreamName: "500 shard count", + GetShards: StaticGetShardsFunc(500), + BacklogCount: 10000, + }, + records: simpleUUIDRecords(1024), + }, + { + name: "10 shard count using explicit hash key", + config: &Config{ + StreamName: "10 shard count", + GetShards: StaticGetShardsFunc(10), + BacklogCount: 10000, + }, + records: explicitHashKeyRecords(StaticGetShardsFunc(10), 1024), + }, + { + name: "500 shard count using explicit hash key", + config: &Config{ + StreamName: "500 shard count", + GetShards: StaticGetShardsFunc(500), + BacklogCount: 10000, + }, + records: explicitHashKeyRecords(StaticGetShardsFunc(500), 1024), + }, + } + + for _, tc := range testCases { + b.Run(tc.name, func(b *testing.B) { + tc.config.Client = &mockBenchmarkClient{ + b: b, + } + tc.config.Logger = &NopLogger{} + + p := New(tc.config) + + failures := p.NotifyFailures() + failuresDone := make(chan struct{}) + go func() { + defer close(failuresDone) + for f := range failures { + b.Fatal(f.Error()) + } + }() + + workerWG := new(sync.WaitGroup) + workers := runtime.NumCPU() + each := b.N / workers + workerWG.Add(workers) + + records, err := tc.records(b.N) + if err != nil { + b.Fatal(err) + } + + p.Start() + + b.ResetTimer() + + for i := 0; i < workers; i++ { + go func(index int) { + for j := 0; j < each; j++ { + record := records[index*each+j] + err := p.PutUserRecord(record) + if err != nil { + b.Fatal(err) + } + } + workerWG.Done() + }(i) + } + workerWG.Wait() + p.Stop() + <-failuresDone + }) + } +} diff --git a/semaphore.go b/semaphore.go index 0055b8a..268f156 100644 --- a/semaphore.go +++ b/semaphore.go @@ -15,8 +15,16 @@ func (s semaphore) release() { } // wait block until the last goroutine release the lock -func (s semaphore) wait() { - for i := 0; i < cap(s); i++ { +func (s semaphore) wait(count int) { + for i := 0; i < count; i++ { s <- struct{}{} } } + +// releases the semaphore for use again after a wait call +// only use this after calling wait() +func (s semaphore) open(count int) { + for i := 0; i < count; i++ { + <-s + } +} diff --git a/shard_map.go b/shard_map.go new file mode 100644 index 0000000..5db1fae --- /dev/null +++ b/shard_map.go @@ -0,0 +1,344 @@ +package producer + +import ( + "crypto/md5" + "math/big" + "sort" + "sync" + + k "github.com/aws/aws-sdk-go/service/kinesis" +) + +// 2^128 exclusive upper bound +// Hash key ranges are 0 indexed, so true max is 2^128 - 1 +const maxHashKeyRange = "340282366920938463463374607431768211455" + +// ShardLister is the interface that wraps the KinesisAPI.ListShards method. +type ShardLister interface { + ListShards(input *k.ListShardsInput) (*k.ListShardsOutput, error) +} + +// GetKinesisShardsFunc gets the active list of shards from Kinesis.ListShards API +func GetKinesisShardsFunc(client ShardLister, streamName string) GetShardsFunc { + return func(old []*k.Shard) ([]*k.Shard, bool, error) { + var ( + shards []*k.Shard + next *string + ) + + for { + input := &k.ListShardsInput{} + if next != nil { + input.NextToken = next + } else { + input.StreamName = &streamName + } + + resp, err := client.ListShards(input) + if err != nil { + return nil, false, err + } + + for _, shard := range resp.Shards { + // There may be many shards with overlapping HashKeyRanges due to prior merge and + // split operations. The currently open shards are the ones that do not have a + // SequenceNumberRange.EndingSequenceNumber. + if shard.SequenceNumberRange.EndingSequenceNumber == nil { + shards = append(shards, shard) + } + } + + next = resp.NextToken + if next == nil { + break + } + } + + sort.Sort(ShardSlice(shards)) + + if shardsEqual(old, shards) { + return nil, false, nil + } + return shards, true, nil + } +} + +// StaticGetShardsFunc returns a GetShardsFunc that when called, will generate a static +// list of shards with length count whos HashKeyRanges are evenly distributed +func StaticGetShardsFunc(count int) GetShardsFunc { + return func(old []*k.Shard) ([]*k.Shard, bool, error) { + if count == 0 { + return nil, false, nil + } + + step := big.NewInt(int64(0)) + step, _ = step.SetString(maxHashKeyRange, 10) + bCount := big.NewInt(int64(count)) + step = step.Div(step, bCount) + b1 := big.NewInt(int64(1)) + + shards := make([]*k.Shard, count) + key := big.NewInt(int64(0)) + for i := 0; i < count; i++ { + shard := new(k.Shard) + hkRange := new(k.HashKeyRange) + + bI := big.NewInt(int64(i)) + // starting key range (step * i) + key = key.Mul(bI, step) + hkRange = hkRange.SetStartingHashKey(key.String()) + // ending key range ((step * (i + 1)) - 1) + bINext := big.NewInt(int64(i + 1)) + key = key.Mul(bINext, step) + key = key.Sub(key, b1) + hkRange = hkRange.SetEndingHashKey(key.String()) + + // TODO: Is setting other shard properties necessary? + shard = shard.SetHashKeyRange(hkRange) + shards[i] = shard + } + // Set last shard end range to max to account for small rounding errors + shards[len(shards)-1].HashKeyRange.SetEndingHashKey(maxHashKeyRange) + return shards, false, nil + } +} + +type ShardSlice []*k.Shard + +func (p ShardSlice) Len() int { return len(p) } +func (p ShardSlice) Less(i, j int) bool { + a, _ := new(big.Int).SetString(*p[i].HashKeyRange.StartingHashKey, 10) + b, _ := new(big.Int).SetString(*p[j].HashKeyRange.StartingHashKey, 10) + // a < b + return a.Cmp(b) == -1 +} +func (p ShardSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +// Checks to see if the shards have the same hash key ranges +func shardsEqual(a, b []*k.Shard) bool { + if len(a) != len(b) { + return false + } + for i, ashard := range a { + bshard := b[i] + if *ashard.HashKeyRange.StartingHashKey != *bshard.HashKeyRange.StartingHashKey || + *ashard.HashKeyRange.EndingHashKey != *bshard.HashKeyRange.EndingHashKey { + return false + } + } + return true +} + +type ShardMap struct { + sync.RWMutex + shards []*k.Shard + aggregators []*Aggregator + // aggregateBatchCount determine the maximum number of items to pack into an aggregated record. + aggregateBatchCount int +} + +// NewShardMap initializes an aggregator for each shard. +// UserRecords that map to the same shard based on MD5 hash of their partition +// key (Same method used by Kinesis) will be aggregated together. Aggregators will use an +// ExplicitHashKey from their assigned shards when creating kinesis.PutRecordsRequestEntry. +// A ShardMap with an empty shards slice will return to unsharded behavior with a single +// aggregator. The aggregator will instead use the PartitionKey of the first UserRecord and +// no ExplicitHashKey. +func NewShardMap(shards []*k.Shard, aggregateBatchCount int) *ShardMap { + return &ShardMap{ + shards: shards, + aggregators: makeAggregators(shards), + aggregateBatchCount: aggregateBatchCount, + } +} + +// Put puts a UserRecord into the aggregator that maps to its partition key. +func (m *ShardMap) Put(userRecord UserRecord) (*AggregatedRecordRequest, error) { + m.RLock() + drained, err := m.put(userRecord) + // Not using defer to avoid runtime overhead + m.RUnlock() + return drained, err +} + +// Size return how many bytes stored in all the aggregators. +// including partition keys. +func (m *ShardMap) Size() int { + m.RLock() + size := 0 + for _, a := range m.aggregators { + a.RLock() + size += a.Size() + a.RUnlock() + } + m.RUnlock() + return size +} + +// Drain drains all the aggregators and returns a list of the results +func (m *ShardMap) Drain() ([]*AggregatedRecordRequest, []error) { + m.RLock() + var ( + requests []*AggregatedRecordRequest + errs []error + ) + for _, a := range m.aggregators { + a.Lock() + req, err := a.Drain() + a.Unlock() + if err != nil { + errs = append(errs, err) + } else if req != nil { + requests = append(requests, req) + } + } + m.RUnlock() + return requests, errs +} + +// Shards returns the list of shards +func (m *ShardMap) Shards() []*k.Shard { + m.RLock() + shards := m.shards + m.RUnlock() + return shards +} + +// Update the list of shards and redistribute buffered user records. +// Returns any records that were drained due to redistribution. +// Shards are not updated if an error occurs during redistribution. +// TODO: Can we optimize this? +// TODO: How to handle shard splitting? If a shard splits but we don't remap before sending +// records to the new shards, once we do update our mapping, user records may end up +// in a new shard and we would lose the shard ordering. Consumer can probably figure +// it out since we retain original partition keys (but not explicit hash keys) +// Shard merging should not be an issue since records from both shards should fall +// into the merged hash key range. +func (m *ShardMap) UpdateShards(shards []*k.Shard, pendingRecords []*AggregatedRecordRequest) ([]*AggregatedRecordRequest, error) { + m.Lock() + defer m.Unlock() + + update := NewShardMap(shards, m.aggregateBatchCount) + var drained []*AggregatedRecordRequest + + // first put any pending UserRecords from inflight requests + for _, record := range pendingRecords { + for _, userRecord := range record.UserRecords { + req, err := update.put(userRecord) + if err != nil { + // if we encounter an error trying to redistribute the records, return the pending + // records to the Producer tries to send them again. They won't be redistributed + // across new shards, but at least they won't be lost. + return pendingRecords, err + } + if req != nil { + drained = append(drained, req) + } + } + } + // then redistribute the records still being aggregated + for _, agg := range m.aggregators { + // We don't need to get the aggregator lock because we have the shard map write lock + for _, userRecord := range agg.buf { + req, err := update.put(userRecord) + if err != nil { + return pendingRecords, err + } + if req != nil { + drained = append(drained, req) + } + } + } + // Only update m if we successfully redistributed all the user records + m.shards = update.shards + m.aggregators = update.aggregators + return drained, nil +} + +// puts a UserRecord into the aggregator that maps to its partition key. +// Not thread safe. acquire lock before calling. +func (m *ShardMap) put(userRecord UserRecord) (*AggregatedRecordRequest, error) { + bucket := m.bucket(userRecord) + if bucket == -1 { + return nil, &ShardBucketError{UserRecord: userRecord} + } + a := m.aggregators[bucket] + a.Lock() + var ( + needToDrain = a.WillOverflow(userRecord) || a.Count() >= m.aggregateBatchCount + + drained *AggregatedRecordRequest + err error + ) + if needToDrain { + drained, err = a.Drain() + } + a.Put(userRecord) + a.Unlock() + return drained, err +} + +// bucket returns the index of the shard the given partition key maps to. +// Returns -1 if partition key is outside shard range. +// Assumes shards is ordered by contiguous HaskKeyRange ascending. If there are gaps in +// shard hash key ranges and the partition key falls into one of the gaps, it will be placed +// in the shard with the larger starting HashKeyRange +// Not thread safe. acquire lock before calling. +// TODO: Can we optimize this? Cache for pk -> bucket? +func (m *ShardMap) bucket(userRecord UserRecord) int { + if len(m.shards) == 0 { + return 0 + } + + hk := userRecord.ExplicitHashKey() + if hk == nil { + hk = hashKey(userRecord.PartitionKey()) + } + sortFunc := func(i int) bool { + shard := m.shards[i] + end := big.NewInt(int64(0)) + end, _ = end.SetString(*shard.HashKeyRange.EndingHashKey, 10) + // end >= hk + return end.Cmp(hk) > -1 + } + + // Search uses binary search to find and return the smallest index i in [0, n) + // at which f(i) is true + // See https://golang.org/pkg/sort/#Search + bucket := sort.Search(len(m.shards), sortFunc) + if bucket == len(m.shards) { + return -1 + } + return bucket +} + +// Calculate a new explicit hash key based on the given partition key. +// (following the algorithm from the original KPL). +// Copied from: https://github.com/a8m/kinesis-producer/issues/1#issuecomment-524620994 +func hashKey(pk string) *big.Int { + h := md5.New() + h.Write([]byte(pk)) + sum := h.Sum(nil) + hk := big.NewInt(int64(0)) + for i := 0; i < md5.Size; i++ { + p := big.NewInt(int64(sum[i])) + p = p.Lsh(p, uint((16-i-1)*8)) + hk = hk.Add(hk, p) + } + return hk +} + +func makeAggregators(shards []*k.Shard) []*Aggregator { + count := len(shards) + if count == 0 { + return []*Aggregator{NewAggregator(nil)} + } + + aggregators := make([]*Aggregator, count) + for i := 0; i < count; i++ { + shard := shards[i] + // Is using the StartingHashKey sufficient? + aggregators[i] = NewAggregator(shard.HashKeyRange.StartingHashKey) + } + return aggregators +} diff --git a/shard_map_test.go b/shard_map_test.go new file mode 100644 index 0000000..035deb4 --- /dev/null +++ b/shard_map_test.go @@ -0,0 +1,932 @@ +package producer + +import ( + "errors" + "math/big" + "math/rand" + "sort" + "sync" + "testing" + + "github.com/aws/aws-sdk-go/aws" + k "github.com/aws/aws-sdk-go/service/kinesis" + "github.com/stretchr/testify/require" +) + +type testUserRecord struct { + partitionKey string + explicitHashKey *big.Int + data []byte +} + +func newTestUserRecord(partitionKey string, explicitHashKey string, data []byte) *testUserRecord { + var ehk *big.Int + if explicitHashKey != "" { + ehk = big.NewInt(int64(0)) + ehk, _ = ehk.SetString(explicitHashKey, 10) + } + return &testUserRecord{ + partitionKey: partitionKey, + explicitHashKey: ehk, + data: data, + } +} + +func (r *testUserRecord) PartitionKey() string { return r.partitionKey } +func (r *testUserRecord) ExplicitHashKey() *big.Int { return r.explicitHashKey } +func (r *testUserRecord) Data() []byte { return r.data } +func (r *testUserRecord) Size() int { return len(r.data) } + +// make arbitrarly long data slice with data +func mockData(val string, length int) []byte { + ret := make([]byte, length) + copy(ret, val) + return ret +} + +func TestNewShardMap(t *testing.T) { + testCases := []struct { + name string + shards GetShardsFunc + }{ + { + name: "Nil shards creates one aggregator with nil ExplicitHashKey", + shards: StaticGetShardsFunc(0), + }, + { + name: "Creates aggregator for each shard using StartingHashKey for ExplicitHashKey", + shards: StaticGetShardsFunc(3), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + shards, _, _ := tc.shards(nil) + batchCount := rand.Intn(10) + shardMap := NewShardMap(shards, batchCount) + + require.Equal(t, batchCount, shardMap.aggregateBatchCount) + + if len(shards) == 0 { + require.Equal(t, 1, len(shardMap.aggregators)) + require.Nil(t, shardMap.aggregators[0].explicitHashKey) + return + } + + require.Equal(t, len(shards), len(shardMap.aggregators)) + for i, agg := range shardMap.aggregators { + require.Equal(t, shards[i].HashKeyRange.StartingHashKey, agg.explicitHashKey) + } + }) + } +} + +func TestShardMapPut(t *testing.T) { + testCases := []struct { + name string + shards GetShardsFunc + aggregateBatchCount int + records []UserRecord + putDrained []*AggregatedRecordRequest + postDrained []*AggregatedRecordRequest + }{ + { + name: "Aggregates all records together with no shards", + shards: StaticGetShardsFunc(0), + aggregateBatchCount: 2, + records: []UserRecord{ + newTestUserRecord("foo", "", []byte("hello")), + newTestUserRecord("foo", "", []byte("hello")), + }, + postDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{}, + UserRecords: []UserRecord{ + newTestUserRecord("foo", "", []byte("hello")), + newTestUserRecord("foo", "", []byte("hello")), + }, + }, + }, + }, + { + name: "Drains record when AggregateBatchCount exceeded", + shards: StaticGetShardsFunc(0), + aggregateBatchCount: 1, + records: []UserRecord{ + newTestUserRecord("foo", "", []byte("hello")), + newTestUserRecord("foo", "", []byte("hello")), + }, + putDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{}, + UserRecords: []UserRecord{ + newTestUserRecord("foo", "", []byte("hello")), + }, + }, + }, + postDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{}, + UserRecords: []UserRecord{ + newTestUserRecord("foo", "", []byte("hello")), + }, + }, + }, + }, + { + name: "Drains record when maxRecordSize exceeded", + shards: StaticGetShardsFunc(0), + aggregateBatchCount: 4, + records: []UserRecord{ + newTestUserRecord("foo", "", mockData("hello", (maxRecordSize-100)/3)), + newTestUserRecord("foo", "", mockData("hello", (maxRecordSize-100)/3)), + newTestUserRecord("foo", "", mockData("hello", (maxRecordSize-100)/3)), + newTestUserRecord("foo", "", mockData("hello", (maxRecordSize-100)/3)), + }, + putDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{}, + UserRecords: []UserRecord{ + newTestUserRecord("foo", "", mockData("hello", (maxRecordSize-100)/3)), + newTestUserRecord("foo", "", mockData("hello", (maxRecordSize-100)/3)), + newTestUserRecord("foo", "", mockData("hello", (maxRecordSize-100)/3)), + }, + }, + }, + postDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{}, + UserRecords: []UserRecord{ + newTestUserRecord("foo", "", mockData("hello", (maxRecordSize-100)/3)), + }, + }, + }, + }, + { + name: "Buckets UserRecords by ExplicitHashKey for aggregation", + shards: StaticGetShardsFunc(2), + aggregateBatchCount: 2, + records: []UserRecord{ + newTestUserRecord("foo", "100141183460469231731687303715884105727", []byte("hello")), + newTestUserRecord("bar", "200141183460469231731687303715884105727", []byte("world")), + newTestUserRecord("foo", "110141183460469231731687303715884105727", []byte("hello")), + newTestUserRecord("foo", "120141183460469231731687303715884105727", []byte("hello")), + newTestUserRecord("bar", "210141183460469231731687303715884105727", []byte("world")), + }, + putDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of first shard + ExplicitHashKey: aws.String("0"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("foo", "", []byte("hello")), + newTestUserRecord("foo", "", []byte("hello")), + }, + }, + }, + postDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of first shard + ExplicitHashKey: aws.String("0"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("foo", "", []byte("hello")), + }, + }, + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of second shard + ExplicitHashKey: aws.String("170141183460469231731687303715884105727"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("bar", "", []byte("world")), + newTestUserRecord("bar", "", []byte("world")), + }, + }, + }, + }, + { + name: "Buckets UserRecords by md5 hash of PartitionKey for aggregation", + shards: StaticGetShardsFunc(2), + aggregateBatchCount: 2, + records: []UserRecord{ + // md5 hash of a4c35... is 119180311785658537254883594002133722733 + newTestUserRecord("a4c35bc0b0474f3ca691bdbd526f1f50", "", []byte("hello")), + // md5 hash pf 4459a... is 269473573115900060929316357216383275764 + newTestUserRecord("4459a807a2ef401690812235bad5cbf5", "", []byte("world")), + newTestUserRecord("a4c35bc0b0474f3ca691bdbd526f1f50", "", []byte("hello")), + newTestUserRecord("a4c35bc0b0474f3ca691bdbd526f1f50", "", []byte("hello")), + newTestUserRecord("4459a807a2ef401690812235bad5cbf5", "", []byte("world")), + }, + putDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of first shard + ExplicitHashKey: aws.String("0"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("a4c35bc0b0474f3ca691bdbd526f1f50", "", []byte("hello")), + newTestUserRecord("a4c35bc0b0474f3ca691bdbd526f1f50", "", []byte("hello")), + }, + }, + }, + postDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of first shard + ExplicitHashKey: aws.String("0"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("a4c35bc0b0474f3ca691bdbd526f1f50", "", []byte("hello")), + }, + }, + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of second shard + ExplicitHashKey: aws.String("170141183460469231731687303715884105727"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("4459a807a2ef401690812235bad5cbf5", "", []byte("world")), + newTestUserRecord("4459a807a2ef401690812235bad5cbf5", "", []byte("world")), + }, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + shards, _, _ := tc.shards(nil) + shardMap := NewShardMap(shards, tc.aggregateBatchCount) + + var ( + putWg sync.WaitGroup + putDrainedWg sync.WaitGroup + putDrained []*AggregatedRecordRequest + putDrainedC = make(chan *AggregatedRecordRequest) + ) + putWg.Add(len(tc.records)) + + for _, r := range tc.records { + go func(record UserRecord) { + defer putWg.Done() + d, err := shardMap.Put(record) + require.NoError(t, err) + if d != nil { + putDrainedC <- d + } + }(r) + } + + // collect any drained records during Put + putDrainedWg.Add(1) + go func() { + for d := range putDrainedC { + putDrained = append(putDrained, d) + } + putDrainedWg.Done() + }() + + putWg.Wait() + close(putDrainedC) + putDrainedWg.Wait() + + compareAggregatedRecordRequests(t, tc.putDrained, putDrained) + + postDrained, errs := shardMap.Drain() + require.Nil(t, errs) + + compareAggregatedRecordRequests(t, tc.postDrained, postDrained) + }) + } +} + +type byExplicitHashKey []*AggregatedRecordRequest + +func (a byExplicitHashKey) Len() int { return len(a) } +func (a byExplicitHashKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a byExplicitHashKey) Less(i, j int) bool { + return *a[i].Entry.ExplicitHashKey < *a[j].Entry.ExplicitHashKey +} + +func compareAggregatedRecordRequests(t *testing.T, expected, actual []*AggregatedRecordRequest) { + require.Equal(t, len(expected), len(actual)) + if actual == nil { + return + } + + if actual[0].Entry.ExplicitHashKey != nil { + sort.Sort(byExplicitHashKey(actual)) + } + + for i, e := range expected { + a := actual[i] + + // We do not check PartitionKey or Data as they depend on order which we don't have + // control over + if e.Entry.ExplicitHashKey == nil { + require.Nil(t, a.Entry.ExplicitHashKey) + } else { + require.NotNil(t, a.Entry.ExplicitHashKey) + require.Equal(t, *e.Entry.ExplicitHashKey, *a.Entry.ExplicitHashKey) + } + + require.Equal(t, len(e.UserRecords), len(a.UserRecords)) + // Again because we cannot rely on order, we just compare Data. + // It is assumed for these tests that records expected to be aggregated together have + // the same data values + for j, expectedRecord := range e.UserRecords { + actualRecord := a.UserRecords[j] + require.Equal(t, expectedRecord.Data(), actualRecord.Data()) + } + } +} + +func TestShardMapSize(t *testing.T) { + shards, _, _ := StaticGetShardsFunc(2)(nil) + shardMap := NewShardMap(shards, 1) + + require.Equal(t, 0, shardMap.Size()) + record := newTestUserRecord("foo", "100141183460469231731687303715884105727", mockData("", 10)) + drained, err := shardMap.Put(record) + require.Nil(t, drained) + require.Nil(t, err) + + expectedSize := calculateRecordFieldSize(0, record.Data()) + calculateStringFieldSize("foo") + require.Equal(t, expectedSize, shardMap.Size()) + + record2 := newTestUserRecord("bar", "210141183460469231731687303715884105727", mockData("", 20)) + drained, err = shardMap.Put(record2) + require.Nil(t, drained) + require.Nil(t, err) + + { + expectedSize = calculateRecordFieldSize(0, record.Data()) + calculateStringFieldSize("foo") + expectedSize += calculateRecordFieldSize(0, record2.Data()) + calculateStringFieldSize("bar") + } + require.Equal(t, expectedSize, shardMap.Size()) + + record3 := newTestUserRecord("foo", "100141183460469231731687303715884105727", mockData("", 20)) + drained, err = shardMap.Put(record3) + require.NotNil(t, drained) + require.Nil(t, err) + + { + // record1 drained on put of record3 so don't include in size + expectedSize = calculateRecordFieldSize(0, record2.Data()) + calculateStringFieldSize("bar") + expectedSize += calculateRecordFieldSize(0, record3.Data()) + calculateStringFieldSize("foo") + } + require.Equal(t, expectedSize, shardMap.Size()) + + all, errs := shardMap.Drain() + require.Nil(t, errs) + require.Equal(t, 2, len(all)) + require.Equal(t, 0, shardMap.Size()) + +} + +func TestStaticGetShardsFunc(t *testing.T) { + testCases := []struct { + name string + count int + }{ + { + name: "Returns nil for count 0", + count: 0, + }, + { + name: "Creates evenly distributed shard list of size count", + count: 1, + }, + { + name: "Creates evenly distributed shard list of size count", + count: 10, + }, + { + name: "Creates evenly distributed shard list of size count", + count: 100, + }, + { + name: "Creates evenly distributed shard list of size count", + count: 1000, + }, + { + name: "Creates evenly distributed shard list of size count", + count: 10000, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + getShards := StaticGetShardsFunc(tc.count) + shards, updated, err := getShards(nil) + + require.False(t, updated) + require.Nil(t, err) + + if tc.count == 0 { + require.Nil(t, shards) + return + } + + require.Equal(t, tc.count, len(shards)) + require.Equal(t, maxHashKeyRange, *shards[len(shards)-1].HashKeyRange.EndingHashKey) + + var count, step, tolerance *big.Int + { + count = big.NewInt(int64(tc.count)) + step = big.NewInt(int64(0)) + step, _ = step.SetString(maxHashKeyRange, 10) + step = step.Div(step, count) + + // tolerate to account for small rounding errors + tolerance = big.NewInt(int64(tc.count)) + } + + for _, shard := range shards { + start := big.NewInt(int64(0)) + start, _ = start.SetString(*shard.HashKeyRange.StartingHashKey, 10) + end := big.NewInt(int64(0)) + end, _ = end.SetString(*shard.HashKeyRange.EndingHashKey, 10) + + diff := big.NewInt(int64(0)) + diff = diff.Sub(end, start) + + delta := big.NewInt(int64(0)) + delta = delta.Sub(step, diff) + delta = delta.Abs(delta) + delta = delta.Sub(tolerance, delta) + + // tolerance >= |(step - (end - start))| + require.True(t, delta.Sign() >= 0) + } + }) + } +} + +type mockShardLister struct { + t *testing.T + responses []*k.ListShardsOutput + callCount int + streamName string + next *string +} + +func (m *mockShardLister) ListShards(input *k.ListShardsInput) (*k.ListShardsOutput, error) { + m.callCount++ + if m.callCount > len(m.responses) { + return nil, errors.New("ListShards error") + } + + call := m.callCount - 1 + + if call == 0 { + // First call should include StreamName and no NextToken + require.NotNil(m.t, input.StreamName) + require.Equal(m.t, m.streamName, *input.StreamName) + require.Nil(m.t, input.NextToken) + } else { + // Subsequent calls should include NextToken and no StreamName + require.Nil(m.t, input.StreamName) + require.NotNil(m.t, input.NextToken) + require.NotNil(m.t, m.next) + require.Equal(m.t, *m.next, *input.NextToken) + } + + response := m.responses[call] + m.next = response.NextToken + + return response, nil +} + +func TestGetKinesisShardsFunc(t *testing.T) { + testCases := []struct { + name string + oldShards string + listShardsResponses string + expectedShards string + expectedUpdated bool + expectedError string + }{ + { + name: "returns error from ShardLister", + expectedError: "ListShards error", + }, + { + name: "returns list of new shards", + oldShards: "testdata/TestGetKinesisShardsFunc/new_shards/oldShards.json", + listShardsResponses: "testdata/TestGetKinesisShardsFunc/new_shards/listShardsResponses.json", + expectedShards: "testdata/TestGetKinesisShardsFunc/new_shards/expectedShards.json", + expectedUpdated: true, + }, + { + name: "handles unsorted response", + oldShards: "testdata/TestGetKinesisShardsFunc/unsorted/oldShards.json", + listShardsResponses: "testdata/TestGetKinesisShardsFunc/unsorted/listShardsResponses.json", + expectedShards: "testdata/TestGetKinesisShardsFunc/unsorted/expectedShards.json", + expectedUpdated: true, + }, + { + name: "returns false for equal shards", + oldShards: "testdata/TestGetKinesisShardsFunc/equal_shards/oldShards.json", + listShardsResponses: "testdata/TestGetKinesisShardsFunc/equal_shards/listShardsResponses.json", + expectedUpdated: false, + }, + { + name: "returns error from subsequent ListShards calls", + listShardsResponses: "testdata/TestGetKinesisShardsFunc/NextToken_error/listShardsResponses.json", + expectedError: "ListShards error", + }, + { + name: "calls ListShards with NextToken and returns new shards", + oldShards: "testdata/TestGetKinesisShardsFunc/NextToken_new_shards/oldShards.json", + listShardsResponses: "testdata/TestGetKinesisShardsFunc/NextToken_new_shards/listShardsResponses.json", + expectedShards: "testdata/TestGetKinesisShardsFunc/NextToken_new_shards/expectedShards.json", + expectedUpdated: true, + }, + { + name: "calls ListShards with NextToken and returns false for equal shards", + oldShards: "testdata/TestGetKinesisShardsFunc/NextToken_equal_shards/oldShards.json", + listShardsResponses: "testdata/TestGetKinesisShardsFunc/NextToken_equal_shards/listShardsResponses.json", + expectedUpdated: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var ( + listShardsResponses []*k.ListShardsOutput + oldShards []*k.Shard + expectedShards []*k.Shard + ) + + if tc.listShardsResponses != "" { + listShardsResponses = make([]*k.ListShardsOutput, 0) + loadJSONFromFile(t, tc.listShardsResponses, &listShardsResponses) + } + + if tc.oldShards != "" { + oldShards = make([]*k.Shard, 0) + loadJSONFromFile(t, tc.oldShards, &oldShards) + } + + if tc.expectedShards != "" { + expectedShards = make([]*k.Shard, 0) + loadJSONFromFile(t, tc.expectedShards, &expectedShards) + } + + client := &mockShardLister{ + t: t, + responses: listShardsResponses, + streamName: tc.name, + } + + undertest := GetKinesisShardsFunc(client, tc.name) + gotShards, gotUpdated, gotError := undertest(oldShards) + + if tc.expectedError != "" { + require.Nil(t, gotShards) + require.False(t, gotUpdated) + require.EqualError(t, gotError, tc.expectedError) + } else { + require.Equal(t, expectedShards, gotShards) + require.Equal(t, tc.expectedUpdated, gotUpdated) + require.Nil(t, gotError) + } + }) + } +} + +func TestShardMapUpdateShards(t *testing.T) { + testCases := []struct { + name string + startingShards string + aggregateBatchCount int + records []UserRecord + newShards string + pendingRecords []*AggregatedRecordRequest + updateDrained []*AggregatedRecordRequest + postDrained []*AggregatedRecordRequest + expectedError string + }{ + { + name: "returns error from pending record put", + startingShards: "testdata/TestShardMapUpdateShards/error_pending_put/startingShards.json", + aggregateBatchCount: 2, + records: []UserRecord{ + newTestUserRecord("foo", "100141183460469231731687303715884105727", []byte("hello")), + newTestUserRecord("bar", "230141183460469231731687303715884105727", []byte("world")), + newTestUserRecord("foo", "110141183460469231731687303715884105727", []byte("hello")), + }, + newShards: "testdata/TestShardMapUpdateShards/error_pending_put/newShards.json", + pendingRecords: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of second shard + ExplicitHashKey: aws.String("170141183460469231731687303715884105728"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("baz", "200141183460469231731687303715884105727", []byte("world")), + newTestUserRecord("baz", "210141183460469231731687303715884105727", []byte("world")), + }, + }, + }, + updateDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of second shard + ExplicitHashKey: aws.String("170141183460469231731687303715884105728"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("baz", "200141183460469231731687303715884105727", []byte("world")), + newTestUserRecord("baz", "210141183460469231731687303715884105727", []byte("world")), + }, + }, + }, + postDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of first shard + ExplicitHashKey: aws.String("0"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("foo", "", []byte("hello")), + newTestUserRecord("foo", "", []byte("hello")), + }, + }, + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of second shard + ExplicitHashKey: aws.String("170141183460469231731687303715884105728"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("bar", "", []byte("world")), + }, + }, + }, + expectedError: "ExplicitHashKey outside shard key range: 200141183460469231731687303715884105727", + }, + { + name: "returns error from aggregator record put", + startingShards: "testdata/TestShardMapUpdateShards/error_agg_put/startingShards.json", + aggregateBatchCount: 2, + records: []UserRecord{ + newTestUserRecord("foo", "100141183460469231731687303715884105727", []byte("hello")), + newTestUserRecord("bar", "230141183460469231731687303715884105727", []byte("world")), + newTestUserRecord("foo", "110141183460469231731687303715884105727", []byte("hello")), + }, + newShards: "testdata/TestShardMapUpdateShards/error_agg_put/newShards.json", + pendingRecords: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of second shard + ExplicitHashKey: aws.String("0"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("baz", "120141183460469231731687303715884105727", []byte("world")), + newTestUserRecord("baz", "130141183460469231731687303715884105727", []byte("world")), + }, + }, + }, + updateDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of second shard + ExplicitHashKey: aws.String("0"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("baz", "120141183460469231731687303715884105727", []byte("world")), + newTestUserRecord("baz", "130141183460469231731687303715884105727", []byte("world")), + }, + }, + }, + postDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of first shard + ExplicitHashKey: aws.String("0"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("foo", "", []byte("hello")), + newTestUserRecord("foo", "", []byte("hello")), + }, + }, + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of second shard + ExplicitHashKey: aws.String("170141183460469231731687303715884105728"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("bar", "", []byte("world")), + }, + }, + }, + expectedError: "ExplicitHashKey outside shard key range: 230141183460469231731687303715884105727", + }, + // { + // name: "does not update shards if updated false", + // startingShards: "testdata/TestShardMapUpdateShards/no_update/startingShards.json", + // aggregateBatchCount: 2, + // records: []UserRecord{ + // newTestUserRecord("foo", "100141183460469231731687303715884105727", []byte("hello")), + // newTestUserRecord("bar", "200141183460469231731687303715884105727", []byte("world")), + // newTestUserRecord("foo", "110141183460469231731687303715884105727", []byte("hello")), + // newTestUserRecord("bar", "210141183460469231731687303715884105727", []byte("world")), + // }, + // getShardsUpdated: false, + // postDrained: []*AggregatedRecordRequest{ + // &AggregatedRecordRequest{ + // Entry: &k.PutRecordsRequestEntry{ + // // StartingHashKey of first shard + // ExplicitHashKey: aws.String("0"), + // }, + // UserRecords: []UserRecord{ + // newTestUserRecord("foo", "", []byte("hello")), + // newTestUserRecord("foo", "", []byte("hello")), + // }, + // }, + // &AggregatedRecordRequest{ + // Entry: &k.PutRecordsRequestEntry{ + // // StartingHashKey of second shard + // ExplicitHashKey: aws.String("170141183460469231731687303715884105728"), + // }, + // UserRecords: []UserRecord{ + // newTestUserRecord("bar", "", []byte("world")), + // newTestUserRecord("bar", "", []byte("world")), + // }, + // }, + // }, + // }, + { + name: "updates shards and redistributes records", + startingShards: "testdata/TestShardMapUpdateShards/update/startingShards.json", + aggregateBatchCount: 4, + records: []UserRecord{ + newTestUserRecord("foo", "100141183460469231731687303715884105727", []byte("hello")), + newTestUserRecord("bar", "200141183460469231731687303715884105727", []byte("world")), + newTestUserRecord("foo", "110141183460469231731687303715884105727", []byte("hello")), + newTestUserRecord("bar", "210141183460469231731687303715884105727", []byte("world")), + }, + newShards: "testdata/TestShardMapUpdateShards/update/newShards.json", + pendingRecords: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of first shard + ExplicitHashKey: aws.String("0"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("foo", "120141183460469231731687303715884105727", []byte("hello")), + newTestUserRecord("foo", "130141183460469231731687303715884105727", []byte("hello")), + }, + }, + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of second shard + ExplicitHashKey: aws.String("170141183460469231731687303715884105728"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("bar", "220141183460469231731687303715884105727", []byte("world")), + newTestUserRecord("bar", "230141183460469231731687303715884105727", []byte("world")), + }, + }, + }, + postDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of first shard + ExplicitHashKey: aws.String("0"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("foo", "", []byte("hello")), + newTestUserRecord("foo", "", []byte("hello")), + newTestUserRecord("foo", "", []byte("hello")), + newTestUserRecord("foo", "", []byte("hello")), + }, + }, + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of second shard + ExplicitHashKey: aws.String("170141183460469231731687303715884105728"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("bar", "", []byte("world")), + newTestUserRecord("bar", "", []byte("world")), + newTestUserRecord("bar", "", []byte("world")), + newTestUserRecord("bar", "", []byte("world")), + }, + }, + }, + }, + { + name: "updates shards and redistributes records, returning drained records from the process", + startingShards: "testdata/TestShardMapUpdateShards/update_drained/startingShards.json", + aggregateBatchCount: 2, + records: []UserRecord{ + newTestUserRecord("foo", "100141183460469231731687303715884105727", []byte("hello")), + newTestUserRecord("bar", "200141183460469231731687303715884105727", []byte("world")), + newTestUserRecord("foo", "110141183460469231731687303715884105727", []byte("hello")), + newTestUserRecord("bar", "210141183460469231731687303715884105727", []byte("world")), + }, + newShards: "testdata/TestShardMapUpdateShards/update_drained/newShards.json", + pendingRecords: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of first shard + ExplicitHashKey: aws.String("0"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("fuzz", "120141183460469231731687303715884105727", []byte("hello")), + }, + }, + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of second shard + ExplicitHashKey: aws.String("170141183460469231731687303715884105728"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("buzz", "220141183460469231731687303715884105727", []byte("world")), + }, + }, + }, + updateDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of first shard + ExplicitHashKey: aws.String("0"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("fuzz", "", []byte("hello")), + newTestUserRecord("buzz", "", []byte("world")), + }, + }, + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of first shard + ExplicitHashKey: aws.String("0"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("foo", "", []byte("hello")), + newTestUserRecord("foo", "", []byte("hello")), + }, + }, + }, + postDrained: []*AggregatedRecordRequest{ + &AggregatedRecordRequest{ + Entry: &k.PutRecordsRequestEntry{ + // StartingHashKey of first shard + ExplicitHashKey: aws.String("0"), + }, + UserRecords: []UserRecord{ + newTestUserRecord("bar", "", []byte("world")), + newTestUserRecord("bar", "", []byte("world")), + }, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var ( + startingShards []*k.Shard + ) + if tc.startingShards != "" { + startingShards = make([]*k.Shard, 0) + loadJSONFromFile(t, tc.startingShards, &startingShards) + } + + shardMap := NewShardMap(startingShards, tc.aggregateBatchCount) + + for _, record := range tc.records { + // assume test config to not drain on setup Puts + _, err := shardMap.Put(record) + require.NoError(t, err) + } + + var newShards []*k.Shard + if tc.newShards != "" { + newShards = make([]*k.Shard, 0) + loadJSONFromFile(t, tc.newShards, &newShards) + } + + gotUpdateDrained, gotError := shardMap.UpdateShards(newShards, tc.pendingRecords) + if tc.expectedError != "" { + require.EqualError(t, gotError, tc.expectedError) + require.Equal(t, tc.pendingRecords, gotUpdateDrained) + require.Equal(t, startingShards, shardMap.shards) + } else { + require.Nil(t, gotError) + require.Equal(t, newShards, shardMap.shards) + } + + compareAggregatedRecordRequests(t, tc.updateDrained, gotUpdateDrained) + + gotPostDrained, gotErrors := shardMap.Drain() + require.Nil(t, gotErrors) + + compareAggregatedRecordRequests(t, tc.postDrained, gotPostDrained) + }) + } +} diff --git a/testdata/TestGetKinesisShardsFunc/NextToken_equal_shards/listShardsResponses.json b/testdata/TestGetKinesisShardsFunc/NextToken_equal_shards/listShardsResponses.json new file mode 100644 index 0000000..3277dc9 --- /dev/null +++ b/testdata/TestGetKinesisShardsFunc/NextToken_equal_shards/listShardsResponses.json @@ -0,0 +1,100 @@ +[ + { + "NextToken": "NextToken_1", + "Shards": [ + { + "ShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49595968993647867597933217504568283421571430797549764610", + "EndingSequenceNumber": "49596663835639358786393957172608352134099768717757906946" + } + }, + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138", + "EndingSequenceNumber": "49607779829788938711531110633687494410773686544326721554" + } + } + ] + }, + { + "NextToken": "NextToken_2", + "Shards": [ + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570", + "EndingSequenceNumber": "49607779829811239456729641256829030129046334905832701986" + } + }, + { + "ShardId": "shardId-000000000003", + "ParentShardId": "shardId-000000000001", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "85070591730234615865843651857942052863" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845878926372270955230306724070309094068785774642" + } + } + ] + }, + { + "NextToken": "NextToken_3", + "Shards": [ + { + "ShardId": "shardId-000000000004", + "ParentShardId": "shardId-000000000001", + "HashKeyRange": { + "StartingHashKey": "85070591730234615865843651857942052864", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845901227117469485853448259788581742430291755074" + } + }, + { + "ShardId": "shardId-000000000005", + "ParentShardId": "shardId-000000000002", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "255211775190703847597530955573826158591" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845923527862668016476589795506854390791797735506" + } + } + ] + }, + { + "Shards": [ + { + "ShardId": "shardId-000000000006", + "ParentShardId": "shardId-000000000002", + "HashKeyRange": { + "StartingHashKey": "255211775190703847597530955573826158592", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845945828607866547099731331225127039153303715938" + } + } + ] + } +] diff --git a/testdata/TestGetKinesisShardsFunc/NextToken_equal_shards/oldShards.json b/testdata/TestGetKinesisShardsFunc/NextToken_equal_shards/oldShards.json new file mode 100644 index 0000000..c5cdf2b --- /dev/null +++ b/testdata/TestGetKinesisShardsFunc/NextToken_equal_shards/oldShards.json @@ -0,0 +1,46 @@ +[ + { + "ShardId": "shardId-000000000003", + "ParentShardId": "shardId-000000000001", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "85070591730234615865843651857942052863" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845878926372270955230306724070309094068785774642" + } + }, + { + "ShardId": "shardId-000000000004", + "ParentShardId": "shardId-000000000001", + "HashKeyRange": { + "StartingHashKey": "85070591730234615865843651857942052864", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845901227117469485853448259788581742430291755074" + } + }, + { + "ShardId": "shardId-000000000005", + "ParentShardId": "shardId-000000000002", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "255211775190703847597530955573826158591" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845923527862668016476589795506854390791797735506" + } + }, + { + "ShardId": "shardId-000000000006", + "ParentShardId": "shardId-000000000002", + "HashKeyRange": { + "StartingHashKey": "255211775190703847597530955573826158592", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845945828607866547099731331225127039153303715938" + } + } +] diff --git a/testdata/TestGetKinesisShardsFunc/NextToken_error/listShardsResponses.json b/testdata/TestGetKinesisShardsFunc/NextToken_error/listShardsResponses.json new file mode 100644 index 0000000..489d555 --- /dev/null +++ b/testdata/TestGetKinesisShardsFunc/NextToken_error/listShardsResponses.json @@ -0,0 +1,34 @@ +[ + { + "NextToken": "NextToken_1", + "Shards": [ + { + "ShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49595968993647867597933217504568283421571430797549764610", + "EndingSequenceNumber": "49596663835639358786393957172608352134099768717757906946" + } + } + ] + }, + { + "NextToken": "NextToken_2", + "Shards": [ + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + } + ] + } +] diff --git a/testdata/TestGetKinesisShardsFunc/NextToken_new_shards/expectedShards.json b/testdata/TestGetKinesisShardsFunc/NextToken_new_shards/expectedShards.json new file mode 100644 index 0000000..c5cdf2b --- /dev/null +++ b/testdata/TestGetKinesisShardsFunc/NextToken_new_shards/expectedShards.json @@ -0,0 +1,46 @@ +[ + { + "ShardId": "shardId-000000000003", + "ParentShardId": "shardId-000000000001", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "85070591730234615865843651857942052863" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845878926372270955230306724070309094068785774642" + } + }, + { + "ShardId": "shardId-000000000004", + "ParentShardId": "shardId-000000000001", + "HashKeyRange": { + "StartingHashKey": "85070591730234615865843651857942052864", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845901227117469485853448259788581742430291755074" + } + }, + { + "ShardId": "shardId-000000000005", + "ParentShardId": "shardId-000000000002", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "255211775190703847597530955573826158591" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845923527862668016476589795506854390791797735506" + } + }, + { + "ShardId": "shardId-000000000006", + "ParentShardId": "shardId-000000000002", + "HashKeyRange": { + "StartingHashKey": "255211775190703847597530955573826158592", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845945828607866547099731331225127039153303715938" + } + } +] diff --git a/testdata/TestGetKinesisShardsFunc/NextToken_new_shards/listShardsResponses.json b/testdata/TestGetKinesisShardsFunc/NextToken_new_shards/listShardsResponses.json new file mode 100644 index 0000000..3277dc9 --- /dev/null +++ b/testdata/TestGetKinesisShardsFunc/NextToken_new_shards/listShardsResponses.json @@ -0,0 +1,100 @@ +[ + { + "NextToken": "NextToken_1", + "Shards": [ + { + "ShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49595968993647867597933217504568283421571430797549764610", + "EndingSequenceNumber": "49596663835639358786393957172608352134099768717757906946" + } + }, + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138", + "EndingSequenceNumber": "49607779829788938711531110633687494410773686544326721554" + } + } + ] + }, + { + "NextToken": "NextToken_2", + "Shards": [ + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570", + "EndingSequenceNumber": "49607779829811239456729641256829030129046334905832701986" + } + }, + { + "ShardId": "shardId-000000000003", + "ParentShardId": "shardId-000000000001", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "85070591730234615865843651857942052863" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845878926372270955230306724070309094068785774642" + } + } + ] + }, + { + "NextToken": "NextToken_3", + "Shards": [ + { + "ShardId": "shardId-000000000004", + "ParentShardId": "shardId-000000000001", + "HashKeyRange": { + "StartingHashKey": "85070591730234615865843651857942052864", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845901227117469485853448259788581742430291755074" + } + }, + { + "ShardId": "shardId-000000000005", + "ParentShardId": "shardId-000000000002", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "255211775190703847597530955573826158591" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845923527862668016476589795506854390791797735506" + } + } + ] + }, + { + "Shards": [ + { + "ShardId": "shardId-000000000006", + "ParentShardId": "shardId-000000000002", + "HashKeyRange": { + "StartingHashKey": "255211775190703847597530955573826158592", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845945828607866547099731331225127039153303715938" + } + } + ] + } +] diff --git a/testdata/TestGetKinesisShardsFunc/NextToken_new_shards/oldShards.json b/testdata/TestGetKinesisShardsFunc/NextToken_new_shards/oldShards.json new file mode 100644 index 0000000..b534f60 --- /dev/null +++ b/testdata/TestGetKinesisShardsFunc/NextToken_new_shards/oldShards.json @@ -0,0 +1,24 @@ +[ + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570" + } + } +] diff --git a/testdata/TestGetKinesisShardsFunc/equal_shards/listShardsResponses.json b/testdata/TestGetKinesisShardsFunc/equal_shards/listShardsResponses.json new file mode 100644 index 0000000..22e3494 --- /dev/null +++ b/testdata/TestGetKinesisShardsFunc/equal_shards/listShardsResponses.json @@ -0,0 +1,39 @@ +[ + { + "Shards": [ + { + "ShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49595968993647867597933217504568283421571430797549764610", + "EndingSequenceNumber": "49596663835639358786393957172608352134099768717757906946" + } + }, + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570" + } + } + ] + } +] diff --git a/testdata/TestGetKinesisShardsFunc/equal_shards/oldShards.json b/testdata/TestGetKinesisShardsFunc/equal_shards/oldShards.json new file mode 100644 index 0000000..b534f60 --- /dev/null +++ b/testdata/TestGetKinesisShardsFunc/equal_shards/oldShards.json @@ -0,0 +1,24 @@ +[ + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570" + } + } +] diff --git a/testdata/TestGetKinesisShardsFunc/new_shards/expectedShards.json b/testdata/TestGetKinesisShardsFunc/new_shards/expectedShards.json new file mode 100644 index 0000000..b534f60 --- /dev/null +++ b/testdata/TestGetKinesisShardsFunc/new_shards/expectedShards.json @@ -0,0 +1,24 @@ +[ + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570" + } + } +] diff --git a/testdata/TestGetKinesisShardsFunc/new_shards/listShardsResponses.json b/testdata/TestGetKinesisShardsFunc/new_shards/listShardsResponses.json new file mode 100644 index 0000000..22e3494 --- /dev/null +++ b/testdata/TestGetKinesisShardsFunc/new_shards/listShardsResponses.json @@ -0,0 +1,39 @@ +[ + { + "Shards": [ + { + "ShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49595968993647867597933217504568283421571430797549764610", + "EndingSequenceNumber": "49596663835639358786393957172608352134099768717757906946" + } + }, + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570" + } + } + ] + } +] diff --git a/testdata/TestGetKinesisShardsFunc/new_shards/oldShards.json b/testdata/TestGetKinesisShardsFunc/new_shards/oldShards.json new file mode 100644 index 0000000..cb4fac2 --- /dev/null +++ b/testdata/TestGetKinesisShardsFunc/new_shards/oldShards.json @@ -0,0 +1,12 @@ +[ + { + "ShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49595968993647867597933217504568283421571430797549764610" + } + } +] diff --git a/testdata/TestGetKinesisShardsFunc/unsorted/expectedShards.json b/testdata/TestGetKinesisShardsFunc/unsorted/expectedShards.json new file mode 100644 index 0000000..b534f60 --- /dev/null +++ b/testdata/TestGetKinesisShardsFunc/unsorted/expectedShards.json @@ -0,0 +1,24 @@ +[ + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570" + } + } +] diff --git a/testdata/TestGetKinesisShardsFunc/unsorted/listShardsResponses.json b/testdata/TestGetKinesisShardsFunc/unsorted/listShardsResponses.json new file mode 100644 index 0000000..f173e50 --- /dev/null +++ b/testdata/TestGetKinesisShardsFunc/unsorted/listShardsResponses.json @@ -0,0 +1,39 @@ +[ + { + "Shards": [ + { + "ShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49595968993647867597933217504568283421571430797549764610", + "EndingSequenceNumber": "49596663835639358786393957172608352134099768717757906946" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570" + } + }, + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + } + ] + } +] diff --git a/testdata/TestGetKinesisShardsFunc/unsorted/oldShards.json b/testdata/TestGetKinesisShardsFunc/unsorted/oldShards.json new file mode 100644 index 0000000..cb4fac2 --- /dev/null +++ b/testdata/TestGetKinesisShardsFunc/unsorted/oldShards.json @@ -0,0 +1,12 @@ +[ + { + "ShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49595968993647867597933217504568283421571430797549764610" + } + } +] diff --git a/testdata/TestProducerUpdateShards/error/startingShards.json b/testdata/TestProducerUpdateShards/error/startingShards.json new file mode 100644 index 0000000..b534f60 --- /dev/null +++ b/testdata/TestProducerUpdateShards/error/startingShards.json @@ -0,0 +1,24 @@ +[ + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570" + } + } +] diff --git a/testdata/TestProducerUpdateShards/no_update/startingShards.json b/testdata/TestProducerUpdateShards/no_update/startingShards.json new file mode 100644 index 0000000..b534f60 --- /dev/null +++ b/testdata/TestProducerUpdateShards/no_update/startingShards.json @@ -0,0 +1,24 @@ +[ + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570" + } + } +] diff --git a/testdata/TestProducerUpdateShards/update/getShardsShards.json b/testdata/TestProducerUpdateShards/update/getShardsShards.json new file mode 100644 index 0000000..b534f60 --- /dev/null +++ b/testdata/TestProducerUpdateShards/update/getShardsShards.json @@ -0,0 +1,24 @@ +[ + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570" + } + } +] diff --git a/testdata/TestProducerUpdateShards/update/startingShards.json b/testdata/TestProducerUpdateShards/update/startingShards.json new file mode 100644 index 0000000..cb4fac2 --- /dev/null +++ b/testdata/TestProducerUpdateShards/update/startingShards.json @@ -0,0 +1,12 @@ +[ + { + "ShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49595968993647867597933217504568283421571430797549764610" + } + } +] diff --git a/testdata/TestShardMapUpdateShards/error_agg_put/newShards.json b/testdata/TestShardMapUpdateShards/error_agg_put/newShards.json new file mode 100644 index 0000000..a3c75a3 --- /dev/null +++ b/testdata/TestShardMapUpdateShards/error_agg_put/newShards.json @@ -0,0 +1,13 @@ +[ + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + } +] diff --git a/testdata/TestShardMapUpdateShards/error_agg_put/startingShards.json b/testdata/TestShardMapUpdateShards/error_agg_put/startingShards.json new file mode 100644 index 0000000..b534f60 --- /dev/null +++ b/testdata/TestShardMapUpdateShards/error_agg_put/startingShards.json @@ -0,0 +1,24 @@ +[ + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570" + } + } +] diff --git a/testdata/TestShardMapUpdateShards/error_pending_put/newShards.json b/testdata/TestShardMapUpdateShards/error_pending_put/newShards.json new file mode 100644 index 0000000..a3c75a3 --- /dev/null +++ b/testdata/TestShardMapUpdateShards/error_pending_put/newShards.json @@ -0,0 +1,13 @@ +[ + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + } +] diff --git a/testdata/TestShardMapUpdateShards/error_pending_put/startingShards.json b/testdata/TestShardMapUpdateShards/error_pending_put/startingShards.json new file mode 100644 index 0000000..b534f60 --- /dev/null +++ b/testdata/TestShardMapUpdateShards/error_pending_put/startingShards.json @@ -0,0 +1,24 @@ +[ + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570" + } + } +] diff --git a/testdata/TestShardMapUpdateShards/no_update/startingShards.json b/testdata/TestShardMapUpdateShards/no_update/startingShards.json new file mode 100644 index 0000000..b534f60 --- /dev/null +++ b/testdata/TestShardMapUpdateShards/no_update/startingShards.json @@ -0,0 +1,24 @@ +[ + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570" + } + } +] diff --git a/testdata/TestShardMapUpdateShards/update/newShards.json b/testdata/TestShardMapUpdateShards/update/newShards.json new file mode 100644 index 0000000..b534f60 --- /dev/null +++ b/testdata/TestShardMapUpdateShards/update/newShards.json @@ -0,0 +1,24 @@ +[ + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570" + } + } +] diff --git a/testdata/TestShardMapUpdateShards/update/startingShards.json b/testdata/TestShardMapUpdateShards/update/startingShards.json new file mode 100644 index 0000000..cb4fac2 --- /dev/null +++ b/testdata/TestShardMapUpdateShards/update/startingShards.json @@ -0,0 +1,12 @@ +[ + { + "ShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49595968993647867597933217504568283421571430797549764610" + } + } +] diff --git a/testdata/TestShardMapUpdateShards/update_drained/newShards.json b/testdata/TestShardMapUpdateShards/update_drained/newShards.json new file mode 100644 index 0000000..cb4fac2 --- /dev/null +++ b/testdata/TestShardMapUpdateShards/update_drained/newShards.json @@ -0,0 +1,12 @@ +[ + { + "ShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49595968993647867597933217504568283421571430797549764610" + } + } +] diff --git a/testdata/TestShardMapUpdateShards/update_drained/startingShards.json b/testdata/TestShardMapUpdateShards/update_drained/startingShards.json new file mode 100644 index 0000000..b534f60 --- /dev/null +++ b/testdata/TestShardMapUpdateShards/update_drained/startingShards.json @@ -0,0 +1,24 @@ +[ + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570" + } + } +] diff --git a/testdata/temp_shards.json b/testdata/temp_shards.json new file mode 100644 index 0000000..f0c8d49 --- /dev/null +++ b/testdata/temp_shards.json @@ -0,0 +1,211 @@ +{ + "Shards": [ + { + "ShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49595968993647867597933217504568283421571430797549764610", + "EndingSequenceNumber": "49596663835639358786393957172608352134099768717757906946" + } + }, + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138", + "EndingSequenceNumber": "49607779829788938711531110633687494410773686544326721554" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570", + "EndingSequenceNumber": "49607779829811239456729641256829030129046334905832701986" + } + }, + { + "ShardId": "shardId-000000000003", + "ParentShardId": "shardId-000000000001", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "85070591730234615865843651857942052863" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845878926372270955230306724070309094068785774642" + } + }, + { + "ShardId": "shardId-000000000004", + "ParentShardId": "shardId-000000000001", + "HashKeyRange": { + "StartingHashKey": "85070591730234615865843651857942052864", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845901227117469485853448259788581742430291755074" + } + }, + { + "ShardId": "shardId-000000000005", + "ParentShardId": "shardId-000000000002", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "255211775190703847597530955573826158591" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845923527862668016476589795506854390791797735506" + } + }, + { + "ShardId": "shardId-000000000006", + "ParentShardId": "shardId-000000000002", + "HashKeyRange": { + "StartingHashKey": "255211775190703847597530955573826158592", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845945828607866547099731331225127039153303715938" + } + } + ] +} + + + +{ + "Shards": [ + { + "ShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49595968993647867597933217504568283421571430797549764610", + "EndingSequenceNumber": "49596663835639358786393957172608352134099768717757906946" + } + }, + { + "ShardId": "shardId-000000000001", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829777788338931845322117935477456973969091461138", + "EndingSequenceNumber": "49607779829788938711531110633687494410773686544326721554" + } + }, + { + "ShardId": "shardId-000000000002", + "ParentShardId": "shardId-000000000000", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779829800089084130375945259471195729622330597441570", + "EndingSequenceNumber": "49607779829811239456729641256829030129046334905832701986" + } + }, + { + "ShardId": "shardId-000000000003", + "ParentShardId": "shardId-000000000001", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "85070591730234615865843651857942052863" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845878926372270955230306724070309094068785774642", + "EndingSequenceNumber": "49607779845890076744870220541876283003625868766428004402" + } + }, + { + "ShardId": "shardId-000000000004", + "ParentShardId": "shardId-000000000001", + "HashKeyRange": { + "StartingHashKey": "85070591730234615865843651857942052864", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845901227117469485853448259788581742430291755074", + "EndingSequenceNumber": "49607779845912377490068751165017818721898517127933984834" + } + }, + { + "ShardId": "shardId-000000000005", + "ParentShardId": "shardId-000000000002", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "255211775190703847597530955573826158591" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845923527862668016476589795506854390791797735506", + "EndingSequenceNumber": "49607779845934678235267281788159354440171165489439965266" + } + }, + { + "ShardId": "shardId-000000000006", + "ParentShardId": "shardId-000000000002", + "HashKeyRange": { + "StartingHashKey": "255211775190703847597530955573826158592", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607779845945828607866547099731331225127039153303715938", + "EndingSequenceNumber": "49607779845956978980465812411300890158443813850945945698" + } + }, + { + "ShardId": "shardId-000000000007", + "ParentShardId": "shardId-000000000003", + "AdjacentParentShardId": "shardId-000000000004", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "170141183460469231731687303715884105727" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607780184582644447554059503951213195292473836399755378", + "EndingSequenceNumber": "49607780184593794820153324815520772128609187648585597042" + } + }, + { + "ShardId": "shardId-000000000008", + "ParentShardId": "shardId-000000000005", + "AdjacentParentShardId": "shardId-000000000006", + "HashKeyRange": { + "StartingHashKey": "170141183460469231731687303715884105728", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607780184604945192752590127092748913565122197905735810", + "EndingSequenceNumber": "49607780184616095565351855438662307846881836010091577474" + } + }, + { + "ShardId": "shardId-000000000009", + "ParentShardId": "shardId-000000000007", + "AdjacentParentShardId": "shardId-000000000008", + "HashKeyRange": { + "StartingHashKey": "0", + "EndingHashKey": "340282366920938463463374607431768211455" + }, + "SequenceNumberRange": { + "StartingSequenceNumber": "49607780207106397098069988876902288650667323286767009938" + } + } + ] +} diff --git a/user_record.go b/user_record.go new file mode 100644 index 0000000..0789b72 --- /dev/null +++ b/user_record.go @@ -0,0 +1,34 @@ +package producer + +import "math/big" + +// UserRecord represents an individual record that is meant for aggregation +type UserRecord interface { + // PartitionKey returns the partition key of the record + PartitionKey() string + // ExplicitHashKey returns an optional explicit hash key that will be used for shard + // mapping. Should return nil if there is none. + ExplicitHashKey() *big.Int + // The raw data payload of the record that should be added to the record + Data() []byte + // Size is the size of the record's data. Do not include the size of the partition key + // in this result. The partition key's size is calculated separately by the aggregator. + Size() int +} + +type DataRecord struct { + partitionKey string + data []byte +} + +func NewDataRecord(data []byte, partitionKey string) *DataRecord { + return &DataRecord{ + partitionKey: partitionKey, + data: data, + } +} + +func (r *DataRecord) PartitionKey() string { return r.partitionKey } +func (r *DataRecord) ExplicitHashKey() *big.Int { return nil } +func (r *DataRecord) Data() []byte { return r.data } +func (r *DataRecord) Size() int { return len(r.data) } diff --git a/worker_pool.go b/worker_pool.go new file mode 100644 index 0000000..0020569 --- /dev/null +++ b/worker_pool.go @@ -0,0 +1,312 @@ +package producer + +import ( + "fmt" + "sync" + "time" + + k "github.com/aws/aws-sdk-go/service/kinesis" + "github.com/jpillora/backoff" +) + +type Work struct { + records []*AggregatedRecordRequest + size int + reason string + b *backoff.Backoff +} + +func NewWork(records []*AggregatedRecordRequest, size int, reason string) *Work { + return &Work{ + records: records, + size: size, + reason: reason, + b: &backoff.Backoff{ + Jitter: true, + }, + } +} + +type WorkerPool struct { + *Config + input chan *AggregatedRecordRequest + unfinished chan []*AggregatedRecordRequest + flush chan struct{} + pause chan struct{} + done chan struct{} + errs chan error +} + +func NewWorkerPool(config *Config) *WorkerPool { + return &WorkerPool{ + Config: config, + input: make(chan *AggregatedRecordRequest), + unfinished: make(chan []*AggregatedRecordRequest), + flush: make(chan struct{}), + pause: make(chan struct{}), + done: make(chan struct{}), + errs: make(chan error), + } +} + +func (wp *WorkerPool) Start() { + go wp.loop() +} + +func (wp *WorkerPool) Errors() chan error { + return wp.errs +} + +func (wp *WorkerPool) Add(record *AggregatedRecordRequest) { + wp.input <- record +} + +func (wp *WorkerPool) Pause() []*AggregatedRecordRequest { + wp.pause <- struct{}{} + return <-wp.unfinished +} + +func (wp *WorkerPool) Resume(records []*AggregatedRecordRequest) { + wp.unfinished <- records + <-wp.pause +} + +func (wp *WorkerPool) Wait() { + <-wp.done + close(wp.errs) +} + +func (wp *WorkerPool) Flush() { + wp.flush <- struct{}{} +} + +func (wp *WorkerPool) Close() { + close(wp.input) +} + +func (wp *WorkerPool) loop() { + var ( + buf = make([]*AggregatedRecordRequest, 0, wp.BatchCount) + inflight []*Work = nil + retry = make(chan *Work) + size = 0 + connections semaphore = make(chan struct{}, wp.MaxConnections) + closed semaphore = make(chan struct{}, wp.MaxConnections) + ) + + // create new work item from buffer and append to inflight work + flushBuf := func(reason string) { + if size == 0 { + return + } + work := NewWork(buf, size, reason) + buf = make([]*AggregatedRecordRequest, 0, wp.BatchCount) + size = 0 + inflight = append(inflight, work) + } + + // Push aggregated record into the buffer. Flush buffer into new work item if push will + // exceed size limits + push := func(record *AggregatedRecordRequest) { + rsize := len(record.Entry.Data) + len([]byte(*record.Entry.PartitionKey)) + if size+rsize > wp.BatchSize { + // if this record would overflow the batch buffer, send it inflight + flushBuf("batch size") + } + buf = append(buf, record) + size += rsize + if len(buf) >= wp.BatchCount { + flushBuf("batch length") + } + } + + // prepend work item to start of inflight buffer. Work that needs to be retried is + // prepended for prioritization over new work + prepend := func(work *Work) { + inflight = append([]*Work{work}, inflight...) + } + + do := func(work *Work) { + failed := wp.send(work) + if failed != nil { + retry <- failed + } + connections.release() + } + + var ( + flush chan struct{} = wp.flush + pause chan struct{} = wp.pause + input chan *AggregatedRecordRequest = wp.input + completed int + ) + + // fill up the closed connection semaphore before starting the loop so that when + // connections are closed after stopping, the loop can exit when all have closed + closed.wait(wp.MaxConnections) + + defer close(wp.done) + + for { + select { + case record, ok := <-input: + if !ok { + input = nil + flushBuf("drain") + } else { + push(record) + } + case <-flush: + flushBuf("flush interval") + case connections <- struct{}{}: + // acquired an open connection + // check to see if there is any work in flight that needs to be sent + var work *Work + if len(inflight) > 0 { + work, inflight = inflight[0], inflight[1:] + } + + if work != nil { + go do(work) + } else if input == nil { + // If input is nil, no more work will be coming so close the connection for good + closed.release() + } else { + // otherwise release it + connections.release() + } + case closed <- struct{}{}: + // this case will block until the connections case releases the closed semaphore + completed++ + if completed == wp.MaxConnections { + return + } + case failed := <-retry: + // prioritize work that needs to be resent due to throttling + prepend(failed) + case <-pause: + // collect failed records that need retry from open connections + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + for failed := range retry { + prepend(failed) + } + }() + // wait for open connections to finish + connections.wait(wp.MaxConnections - completed) + // safe to close retry channel now that no connections are open + close(retry) + // wait to finish collecting all failed requests + wg.Wait() + // flush out anything remaining in the buffer + flushBuf("pause") + // capture the inflight requests that did not get finished + var drained []*AggregatedRecordRequest + for _, work := range inflight { + drained = append(drained, work.records...) + } + // reset state + retry = make(chan *Work) + inflight = nil + // send the drained records + wp.unfinished <- drained + // reset closed connections + closed.wait(completed) + completed = 0 + // reopen connections + connections.open(wp.MaxConnections) + // collect records to push after resuming + // this will block the pool until Resume() is called + records := <-wp.unfinished + for _, record := range records { + push(record) + } + if input == nil { + // if the pool was paused after Close(), then we want to flush any remaining buffer + flushBuf("drain") + } + wp.pause <- struct{}{} + } + } +} + +func (wp *WorkerPool) send(work *Work) *Work { + count := len(work.records) + wp.Logger.Info("flushing records", LogValue{"reason", work.reason}, LogValue{"records", count}) + + kinesisRecords := make([]*k.PutRecordsRequestEntry, count) + for i := 0; i < count; i++ { + kinesisRecords[i] = work.records[i].Entry + } + + out, err := wp.Client.PutRecords(&k.PutRecordsInput{ + StreamName: &wp.StreamName, + Records: kinesisRecords, + }) + + if err != nil { + wp.Logger.Error("send", err) + for _, r := range work.records { + failure := &FailureRecord{ + Err: err, + PartitionKey: *r.Entry.PartitionKey, + UserRecords: r.UserRecords, + } + if r.Entry.ExplicitHashKey != nil { + failure.ExplicitHashKey = *r.Entry.ExplicitHashKey + } + wp.errs <- failure + } + return nil + } + + if wp.Verbose { + for i, r := range out.Records { + values := make([]LogValue, 2) + if r.ErrorCode != nil { + values[0] = LogValue{"ErrorCode", *r.ErrorCode} + values[1] = LogValue{"ErrorMessage", *r.ErrorMessage} + } else { + values[0] = LogValue{"ShardId", *r.ShardId} + values[1] = LogValue{"SequenceNumber", *r.SequenceNumber} + } + wp.Logger.Info(fmt.Sprintf("Result[%d]", i), values...) + } + } + + failed := *out.FailedRecordCount + if failed == 0 { + return nil + } + + duration := work.b.Duration() + + wp.Logger.Info( + "put failures", + LogValue{"failures", failed}, + LogValue{"backoff", duration.String()}, + ) + time.Sleep(duration) + + // change the logging state for the next itertion + work.reason = "retry" + work.records = failures(work.records, out.Records, failed) + return work +} + +// failures returns the failed records as indicated in the response. +func failures( + records []*AggregatedRecordRequest, + response []*k.PutRecordsResultEntry, + count int64, +) []*AggregatedRecordRequest { + out := make([]*AggregatedRecordRequest, 0, count) + for i, record := range response { + if record.ErrorCode != nil { + out = append(out, records[i]) + } + } + return out +}