From c3e319a6000f9fa073fede4a7226732e1770c589 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Tue, 5 Dec 2023 15:37:12 +0800 Subject: [PATCH 01/24] simple support encode large message handle . --- pkg/sink/codec/builder/encoder_builder.go | 2 +- pkg/sink/codec/simple/encoder.go | 133 ++++++++++++++++++++-- pkg/sink/codec/simple/encoder_test.go | 20 +++- pkg/sink/codec/simple/message.go | 23 +++- 4 files changed, 156 insertions(+), 22 deletions(-) diff --git a/pkg/sink/codec/builder/encoder_builder.go b/pkg/sink/codec/builder/encoder_builder.go index ea9ab3b118b..3a3f01fec62 100644 --- a/pkg/sink/codec/builder/encoder_builder.go +++ b/pkg/sink/codec/builder/encoder_builder.go @@ -47,7 +47,7 @@ func NewRowEventEncoderBuilder( case config.ProtocolCraft: return craft.NewBatchEncoderBuilder(cfg), nil case config.ProtocolSimple: - return simple.NewBuilder(cfg), nil + return simple.NewBuilder(ctx, cfg) default: return nil, cerror.ErrSinkUnknownProtocol.GenWithStackByArgs(cfg.Protocol) } diff --git a/pkg/sink/codec/simple/encoder.go b/pkg/sink/codec/simple/encoder.go index f72af4db0c5..a3d80fc03b7 100644 --- a/pkg/sink/codec/simple/encoder.go +++ b/pkg/sink/codec/simple/encoder.go @@ -17,24 +17,29 @@ import ( "context" "encoding/json" + "github.com/pingcap/errors" + "github.com/pingcap/log" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/pkg/config" cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/sink/codec" "github.com/pingcap/tiflow/pkg/sink/codec/common" + "github.com/pingcap/tiflow/pkg/sink/kafka/claimcheck" + "go.uber.org/zap" ) type encoder struct { - config *common.Config - messages []*common.Message + + config *common.Config + claimCheck *claimcheck.ClaimCheck } // AppendRowChangedEvent implement the RowEventEncoder interface func (e *encoder) AppendRowChangedEvent( - _ context.Context, _ string, event *model.RowChangedEvent, callback func(), + ctx context.Context, _ string, event *model.RowChangedEvent, callback func(), ) error { - m, err := newDMLMessage(event) + m, err := newDMLMessage(event, false) if err != nil { return err } @@ -43,6 +48,12 @@ func (e *encoder) AppendRowChangedEvent( return cerror.WrapError(cerror.ErrEncodeFailed, err) } + value, err = common.Compress(e.config.ChangefeedID, + e.config.LargeMessageHandle.LargeMessageHandleCompression, value) + if err != nil { + return err + } + result := &common.Message{ Key: nil, Value: value, @@ -54,10 +65,94 @@ func (e *encoder) AppendRowChangedEvent( Callback: callback, } result.IncRowsCount() + if result.Length() > e.config.MaxMessageBytes { + if e.config.LargeMessageHandle.Disabled() { + log.Error("Single message is too large for simple", + zap.Int("maxMessageBytes", e.config.MaxMessageBytes), + zap.Int("length", result.Length()), + zap.Any("table", event.Table)) + return cerror.ErrMessageTooLarge.GenWithStackByArgs() + } + if e.config.LargeMessageHandle.HandleKeyOnly() { + m, err = newDMLMessage(event, true) + if err != nil { + return err + } + value, err = json.Marshal(m) + if err != nil { + return cerror.WrapError(cerror.ErrEncodeFailed, err) + } + value, err = common.Compress(e.config.ChangefeedID, + e.config.LargeMessageHandle.LargeMessageHandleCompression, value) + if err != nil { + return err + } + result.Value = value + if result.Length() > e.config.MaxMessageBytes { + log.Error("Single message is still too large for simple only encode handle key columns", + zap.Int("maxMessageBytes", e.config.MaxMessageBytes), + zap.Int("length", result.Length()), + zap.Any("table", event.Table)) + return cerror.ErrMessageTooLarge.GenWithStackByArgs() + } + log.Warn("Single message is too large for simple, only encode handle-key columns", + zap.Int("maxMessageBytes", e.config.MaxMessageBytes), + zap.Int("originLength", result.Length()), + zap.Int("length", result.Length()), + zap.Any("table", event.Table)) + } + if e.config.LargeMessageHandle.EnableClaimCheck() { + claimCheckFileName := claimcheck.NewFileName() + if err := e.claimCheck.WriteMessage(ctx, result.Key, result.Value, claimCheckFileName); err != nil { + return errors.Trace(err) + } + + result, err = e.newClaimCheckLocationMessage(event, callback, claimCheckFileName) + if err != nil { + return errors.Trace(err) + } + } + } + e.messages = append(e.messages, result) return nil } +func (e *encoder) newClaimCheckLocationMessage( + event *model.RowChangedEvent, callback func(), fileName string, +) (*common.Message, error) { + m, err := newDMLMessage(event, true) + if err != nil { + return nil, err + } + m.ClaimCheckLocation = e.claimCheck.FileNameWithPrefix(fileName) + + value, err := json.Marshal(m) + if err != nil { + return nil, cerror.WrapError(cerror.ErrEncodeFailed, err) + } + + value, err = common.Compress(e.config.ChangefeedID, + e.config.LargeMessageHandle.LargeMessageHandleCompression, value) + if err != nil { + return nil, errors.Trace(err) + } + + result := common.NewMsg(config.ProtocolSimple, nil, value, 0, model.MessageTypeRow, nil, nil) + result.Callback = callback + result.IncRowsCount() + + length := result.Length() + if length > e.config.MaxMessageBytes { + log.Warn("Single message is too large for canal-json, when create the claim check location message", + zap.Int("maxMessageBytes", e.config.MaxMessageBytes), + zap.Int("length", length), + zap.Any("table", event.Table)) + return nil, cerror.ErrMessageTooLarge.GenWithStackByArgs(length) + } + return result, nil +} + // Build implement the RowEventEncoder interface func (e *encoder) Build() []*common.Message { if len(e.messages) == 0 { @@ -89,25 +184,41 @@ func (e *encoder) EncodeDDLEvent(event *model.DDLEvent) (*common.Message, error) } type builder struct { - config *common.Config + config *common.Config + claimCheck *claimcheck.ClaimCheck } // NewBuilder returns a new builder -func NewBuilder(config *common.Config) *builder { - return &builder{ - config: config, +func NewBuilder(ctx context.Context, config *common.Config) (*builder, error) { + var ( + claimCheck *claimcheck.ClaimCheck + err error + ) + if config.LargeMessageHandle.EnableClaimCheck() { + claimCheck, err = claimcheck.New(ctx, + config.LargeMessageHandle.ClaimCheckStorageURI, config.ChangefeedID) + if err != nil { + return nil, errors.Trace(err) + } } + return &builder{ + config: config, + claimCheck: claimCheck, + }, nil } // Build implement the RowEventEncoderBuilder interface func (b *builder) Build() codec.RowEventEncoder { return &encoder{ - config: b.config, - messages: make([]*common.Message, 0, 1), + messages: make([]*common.Message, 0, 1), + config: b.config, + claimCheck: b.claimCheck, } } // CleanMetrics implement the RowEventEncoderBuilder interface func (b *builder) CleanMetrics() { - // do nothing + if b.claimCheck != nil { + b.claimCheck.CleanMetrics() + } } diff --git a/pkg/sink/codec/simple/encoder_test.go b/pkg/sink/codec/simple/encoder_test.go index 805abee4465..2aa1b68d609 100644 --- a/pkg/sink/codec/simple/encoder_test.go +++ b/pkg/sink/codec/simple/encoder_test.go @@ -27,8 +27,11 @@ import ( func TestEncodeCheckpoint(t *testing.T) { t.Parallel() + ctx := context.Background() codecConfig := common.NewConfig(config.ProtocolSimple) - enc := NewBuilder(codecConfig).Build() + builder, err := NewBuilder(ctx, codecConfig) + require.NoError(t, err) + enc := builder.Build() checkpoint := 23 m, err := enc.EncodeCheckpointEvent(uint64(checkpoint)) @@ -62,8 +65,11 @@ func TestEncodeDDLEvent(t *testing.T) { key idx_name_email(name, email))` ddlEvent := helper.DDL2Event(sql) + ctx := context.Background() codecConfig := common.NewConfig(config.ProtocolSimple) - enc := NewBuilder(codecConfig).Build() + builder, err := NewBuilder(ctx, codecConfig) + require.NoError(t, err) + enc := builder.Build() m, err := enc.EncodeDDLEvent(ddlEvent) require.NoError(t, err) @@ -122,8 +128,11 @@ func TestEncoderOtherTypes(t *testing.T) { helper := entry.NewSchemaTestHelper(t) defer helper.Close() + ctx := context.Background() codecConfig := common.NewConfig(config.ProtocolSimple) - enc := NewBuilder(codecConfig).Build() + builder, err := NewBuilder(ctx, codecConfig) + require.NoError(t, err) + enc := builder.Build() sql := `create table test.t( a int primary key auto_increment, @@ -199,8 +208,11 @@ func TestEncodeBootstrapEvent(t *testing.T) { ddlEvent := helper.DDL2Event(sql) ddlEvent.IsBootstrap = true + ctx := context.Background() codecConfig := common.NewConfig(config.ProtocolSimple) - enc := NewBuilder(codecConfig).Build() + builder, err := NewBuilder(ctx, codecConfig) + require.NoError(t, err) + enc := builder.Build() m, err := enc.EncodeDDLEvent(ddlEvent) require.NoError(t, err) diff --git a/pkg/sink/codec/simple/message.go b/pkg/sink/codec/simple/message.go index 4f44f1cdc3f..03a40751a84 100644 --- a/pkg/sink/codec/simple/message.go +++ b/pkg/sink/codec/simple/message.go @@ -321,6 +321,9 @@ type message struct { SQL string `json:"sql,omitempty"` // SchemaVersion is for the DDL, Bootstrap and DML event. SchemaVersion uint64 `json:"schemaVersion,omitempty"` + + // ClaimCheckLocation is only for the DML event. + ClaimCheckLocation string `json:"claimCheckLocation,omitempty"` } func newResolvedMessage(ts uint64) *message { @@ -366,7 +369,9 @@ func newDDLMessage(ddl *model.DDLEvent) *message { return msg } -func newDMLMessage(event *model.RowChangedEvent) (*message, error) { +func newDMLMessage( + event *model.RowChangedEvent, onlyHandleKey bool, +) (*message, error) { m := &message{ Version: defaultVersion, Database: event.Table.Schema, @@ -378,23 +383,23 @@ func newDMLMessage(event *model.RowChangedEvent) (*message, error) { var err error if event.IsInsert() { m.Type = InsertType - m.Data, err = formatColumns(event.Columns, event.ColInfos) + m.Data, err = formatColumns(event.Columns, event.ColInfos, onlyHandleKey) if err != nil { return nil, err } } else if event.IsDelete() { m.Type = DeleteType - m.Old, err = formatColumns(event.PreColumns, event.ColInfos) + m.Old, err = formatColumns(event.PreColumns, event.ColInfos, onlyHandleKey) if err != nil { return nil, err } } else if event.IsUpdate() { m.Type = UpdateType - m.Data, err = formatColumns(event.Columns, event.ColInfos) + m.Data, err = formatColumns(event.Columns, event.ColInfos, onlyHandleKey) if err != nil { return nil, err } - m.Old, err = formatColumns(event.PreColumns, event.ColInfos) + m.Old, err = formatColumns(event.PreColumns, event.ColInfos, onlyHandleKey) if err != nil { return nil, err } @@ -406,10 +411,16 @@ func newDMLMessage(event *model.RowChangedEvent) (*message, error) { } func formatColumns( - columns []*model.Column, columnInfos []rowcodec.ColInfo, + columns []*model.Column, columnInfos []rowcodec.ColInfo, onlyHandleKey bool, ) (map[string]interface{}, error) { result := make(map[string]interface{}, len(columns)) for idx, col := range columns { + if col == nil { + continue + } + if onlyHandleKey && !col.Flag.IsHandleKey() { + continue + } value, err := encodeValue(col.Value, columnInfos[idx].Ft) if err != nil { return nil, err From 1e93072dbdd571af8e7ea1eb2f45141b1fa0356d Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Wed, 6 Dec 2023 15:28:55 +0800 Subject: [PATCH 02/24] adjust ddl. --- pkg/sink/codec/simple/encoder_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/sink/codec/simple/encoder_test.go b/pkg/sink/codec/simple/encoder_test.go index 2aa1b68d609..8b1b676aa4a 100644 --- a/pkg/sink/codec/simple/encoder_test.go +++ b/pkg/sink/codec/simple/encoder_test.go @@ -59,9 +59,8 @@ func TestEncodeDDLEvent(t *testing.T) { sql := `create table test.t( id int primary key, name varchar(255) not null, - age int, + gender enum('male', 'female'), email varchar(255) not null, - key idx_name(name), key idx_name_email(name, email))` ddlEvent := helper.DDL2Event(sql) From 7133cacb33249bdec2eb854e67a9b4af440acdc2 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Wed, 6 Dec 2023 18:02:55 +0800 Subject: [PATCH 03/24] add compression related unit test. --- pkg/sink/codec/simple/decoder.go | 18 +- pkg/sink/codec/simple/encoder.go | 128 ++++---- pkg/sink/codec/simple/encoder_test.go | 407 ++++++++++++++------------ 3 files changed, 288 insertions(+), 265 deletions(-) diff --git a/pkg/sink/codec/simple/decoder.go b/pkg/sink/codec/simple/decoder.go index b8b9c1e0322..a84988ca03f 100644 --- a/pkg/sink/codec/simple/decoder.go +++ b/pkg/sink/codec/simple/decoder.go @@ -19,21 +19,23 @@ import ( "github.com/pingcap/log" "github.com/pingcap/tiflow/cdc/model" cerror "github.com/pingcap/tiflow/pkg/errors" + "github.com/pingcap/tiflow/pkg/sink/codec/common" "go.uber.org/zap" ) type decoder struct { - value []byte - - msg *message + config *common.Config - memo TableInfoProvider + value []byte + msg *message + memo TableInfoProvider } // NewDecoder returns a new decoder -func NewDecoder() *decoder { +func NewDecoder(config *common.Config) *decoder { return &decoder{ - memo: newMemoryTableInfoProvider(), + config: config, + memo: newMemoryTableInfoProvider(), } } @@ -43,6 +45,10 @@ func (d *decoder) AddKeyValue(_, value []byte) error { return cerror.ErrDecodeFailed.GenWithStack( "decoder value already exists, not consumed yet") } + value, err := common.Decompress(d.config.LargeMessageHandle.LargeMessageHandleCompression, value) + if err != nil { + return err + } d.value = value return nil } diff --git a/pkg/sink/codec/simple/encoder.go b/pkg/sink/codec/simple/encoder.go index a3d80fc03b7..e8988fc4a0c 100644 --- a/pkg/sink/codec/simple/encoder.go +++ b/pkg/sink/codec/simple/encoder.go @@ -55,7 +55,6 @@ func (e *encoder) AppendRowChangedEvent( } result := &common.Message{ - Key: nil, Value: value, Ts: event.CommitTs, Schema: &event.Table.Schema, @@ -73,86 +72,47 @@ func (e *encoder) AppendRowChangedEvent( zap.Any("table", event.Table)) return cerror.ErrMessageTooLarge.GenWithStackByArgs() } - if e.config.LargeMessageHandle.HandleKeyOnly() { - m, err = newDMLMessage(event, true) - if err != nil { - return err - } - value, err = json.Marshal(m) - if err != nil { - return cerror.WrapError(cerror.ErrEncodeFailed, err) - } - value, err = common.Compress(e.config.ChangefeedID, - e.config.LargeMessageHandle.LargeMessageHandleCompression, value) - if err != nil { - return err - } - result.Value = value - if result.Length() > e.config.MaxMessageBytes { - log.Error("Single message is still too large for simple only encode handle key columns", - zap.Int("maxMessageBytes", e.config.MaxMessageBytes), - zap.Int("length", result.Length()), - zap.Any("table", event.Table)) - return cerror.ErrMessageTooLarge.GenWithStackByArgs() - } - log.Warn("Single message is too large for simple, only encode handle-key columns", - zap.Int("maxMessageBytes", e.config.MaxMessageBytes), - zap.Int("originLength", result.Length()), - zap.Int("length", result.Length()), - zap.Any("table", event.Table)) + + m, err = newDMLMessage(event, true) + if err != nil { + return err } - if e.config.LargeMessageHandle.EnableClaimCheck() { - claimCheckFileName := claimcheck.NewFileName() - if err := e.claimCheck.WriteMessage(ctx, result.Key, result.Value, claimCheckFileName); err != nil { - return errors.Trace(err) - } - result, err = e.newClaimCheckLocationMessage(event, callback, claimCheckFileName) - if err != nil { + if e.config.LargeMessageHandle.EnableClaimCheck() { + fileName := claimcheck.NewFileName() + if err = e.claimCheck.WriteMessage(ctx, result.Key, result.Value, fileName); err != nil { return errors.Trace(err) } + m.ClaimCheckLocation = e.claimCheck.FileNameWithPrefix(fileName) + } + value, err = json.Marshal(m) + if err != nil { + return cerror.WrapError(cerror.ErrEncodeFailed, err) + } + value, err = common.Compress(e.config.ChangefeedID, + e.config.LargeMessageHandle.LargeMessageHandleCompression, value) + if err != nil { + return err } + result.Value = value + if result.Length() > e.config.MaxMessageBytes { + log.Error("Single message is still too large for simple", + zap.Int("maxMessageBytes", e.config.MaxMessageBytes), + zap.Int("length", result.Length()), + zap.Any("table", event.Table)) + return cerror.ErrMessageTooLarge.GenWithStackByArgs() + } + log.Warn("Single message is too large for simple", + zap.Int("maxMessageBytes", e.config.MaxMessageBytes), + zap.Int("originLength", result.Length()), + zap.Int("length", result.Length()), + zap.Any("table", event.Table)) } e.messages = append(e.messages, result) return nil } -func (e *encoder) newClaimCheckLocationMessage( - event *model.RowChangedEvent, callback func(), fileName string, -) (*common.Message, error) { - m, err := newDMLMessage(event, true) - if err != nil { - return nil, err - } - m.ClaimCheckLocation = e.claimCheck.FileNameWithPrefix(fileName) - - value, err := json.Marshal(m) - if err != nil { - return nil, cerror.WrapError(cerror.ErrEncodeFailed, err) - } - - value, err = common.Compress(e.config.ChangefeedID, - e.config.LargeMessageHandle.LargeMessageHandleCompression, value) - if err != nil { - return nil, errors.Trace(err) - } - - result := common.NewMsg(config.ProtocolSimple, nil, value, 0, model.MessageTypeRow, nil, nil) - result.Callback = callback - result.IncRowsCount() - - length := result.Length() - if length > e.config.MaxMessageBytes { - log.Warn("Single message is too large for canal-json, when create the claim check location message", - zap.Int("maxMessageBytes", e.config.MaxMessageBytes), - zap.Int("length", length), - zap.Any("table", event.Table)) - return nil, cerror.ErrMessageTooLarge.GenWithStackByArgs(length) - } - return result, nil -} - // Build implement the RowEventEncoder interface func (e *encoder) Build() []*common.Message { if len(e.messages) == 0 { @@ -170,17 +130,39 @@ func (e *encoder) EncodeCheckpointEvent(ts uint64) (*common.Message, error) { if err != nil { return nil, cerror.WrapError(cerror.ErrEncodeFailed, err) } + value, err = common.Compress(e.config.ChangefeedID, + e.config.LargeMessageHandle.LargeMessageHandleCompression, value) + if err != nil { + return nil, err + } return common.NewResolvedMsg(config.ProtocolSimple, nil, value, ts), nil } // EncodeDDLEvent implement the DDLEventBatchEncoder interface func (e *encoder) EncodeDDLEvent(event *model.DDLEvent) (*common.Message, error) { - message := newDDLMessage(event) - value, err := json.Marshal(message) + m := newDDLMessage(event) + value, err := json.Marshal(m) if err != nil { return nil, cerror.WrapError(cerror.ErrEncodeFailed, err) } - return common.NewDDLMsg(config.ProtocolSimple, nil, value, event), nil + + value, err = common.Compress(e.config.ChangefeedID, + e.config.LargeMessageHandle.LargeMessageHandleCompression, value) + if err != nil { + return nil, err + } + + result := common.NewDDLMsg(config.ProtocolSimple, nil, value, event) + if result.Length() > e.config.MaxMessageBytes { + if e.config.LargeMessageHandle.Disabled() { + log.Error("DDL message is too large for simple", + zap.Int("maxMessageBytes", e.config.MaxMessageBytes), + zap.Int("length", result.Length()), + zap.Any("table", event.TableInfo.TableName)) + return nil, cerror.ErrMessageTooLarge.GenWithStackByArgs() + } + } + return result, nil } type builder struct { diff --git a/pkg/sink/codec/simple/encoder_test.go b/pkg/sink/codec/simple/encoder_test.go index 8b1b676aa4a..4fba28e1ad7 100644 --- a/pkg/sink/codec/simple/encoder_test.go +++ b/pkg/sink/codec/simple/encoder_test.go @@ -19,6 +19,7 @@ import ( "github.com/pingcap/tiflow/cdc/entry" "github.com/pingcap/tiflow/cdc/model" + "github.com/pingcap/tiflow/pkg/compression" "github.com/pingcap/tiflow/pkg/config" "github.com/pingcap/tiflow/pkg/sink/codec/common" "github.com/stretchr/testify/require" @@ -28,99 +29,115 @@ func TestEncodeCheckpoint(t *testing.T) { t.Parallel() ctx := context.Background() - codecConfig := common.NewConfig(config.ProtocolSimple) - builder, err := NewBuilder(ctx, codecConfig) - require.NoError(t, err) - enc := builder.Build() - - checkpoint := 23 - m, err := enc.EncodeCheckpointEvent(uint64(checkpoint)) - require.NoError(t, err) - - dec := NewDecoder() - err = dec.AddKeyValue(m.Key, m.Value) - require.NoError(t, err) - - messageType, hasNext, err := dec.HasNext() - require.NoError(t, err) - require.True(t, hasNext) - require.Equal(t, model.MessageTypeResolved, messageType) - require.NotEqual(t, 0, dec.msg.BuildTs) - - ts, err := dec.NextResolvedEvent() - require.NoError(t, err) - require.Equal(t, uint64(checkpoint), ts) + for _, compressionType := range []string{ + compression.None, + compression.Snappy, + compression.LZ4, + } { + codecConfig := common.NewConfig(config.ProtocolSimple) + codecConfig.LargeMessageHandle.LargeMessageHandleCompression = compressionType + builder, err := NewBuilder(ctx, codecConfig) + require.NoError(t, err) + enc := builder.Build() + + checkpoint := 23 + m, err := enc.EncodeCheckpointEvent(uint64(checkpoint)) + require.NoError(t, err) + + dec := NewDecoder(codecConfig) + err = dec.AddKeyValue(m.Key, m.Value) + require.NoError(t, err) + + messageType, hasNext, err := dec.HasNext() + require.NoError(t, err) + require.True(t, hasNext) + require.Equal(t, model.MessageTypeResolved, messageType) + require.NotEqual(t, 0, dec.msg.BuildTs) + + ts, err := dec.NextResolvedEvent() + require.NoError(t, err) + require.Equal(t, uint64(checkpoint), ts) + } } func TestEncodeDDLEvent(t *testing.T) { helper := entry.NewSchemaTestHelper(t) defer helper.Close() - sql := `create table test.t( + ctx := context.Background() + for _, compressionType := range []string{ + compression.None, + compression.Snappy, + compression.LZ4, + } { + codecConfig := common.NewConfig(config.ProtocolSimple) + codecConfig.LargeMessageHandle.LargeMessageHandleCompression = compressionType + builder, err := NewBuilder(ctx, codecConfig) + require.NoError(t, err) + enc := builder.Build() + + sql := `create table test.t( id int primary key, name varchar(255) not null, gender enum('male', 'female'), email varchar(255) not null, key idx_name_email(name, email))` - ddlEvent := helper.DDL2Event(sql) - - ctx := context.Background() - codecConfig := common.NewConfig(config.ProtocolSimple) - builder, err := NewBuilder(ctx, codecConfig) - require.NoError(t, err) - enc := builder.Build() - - m, err := enc.EncodeDDLEvent(ddlEvent) - require.NoError(t, err) - - dec := NewDecoder() - err = dec.AddKeyValue(m.Key, m.Value) - require.NoError(t, err) - - messageType, hasNext, err := dec.HasNext() - require.NoError(t, err) - require.True(t, hasNext) - require.Equal(t, model.MessageTypeDDL, messageType) - require.NotEqual(t, 0, dec.msg.BuildTs) - - event, err := dec.NextDDLEvent() - require.NoError(t, err) - require.Equal(t, ddlEvent.CommitTs, event.CommitTs) - // because we don't we don't set startTs in the encoded message, - // so the startTs is equal to commitTs - require.Equal(t, ddlEvent.CommitTs, event.StartTs) - require.Equal(t, ddlEvent.Query, event.Query) - require.Equal(t, len(ddlEvent.TableInfo.Columns), len(event.TableInfo.Columns)) - require.Equal(t, len(ddlEvent.TableInfo.Indices), len(event.TableInfo.Indices)) - - item := dec.memo.Read(ddlEvent.TableInfo.TableName.Schema, - ddlEvent.TableInfo.TableName.Table, ddlEvent.TableInfo.UpdateTS) - require.NotNil(t, item) - - sql = `insert into test.t values (1, "jack", 23, "jack@abc.com")` - row := helper.DML2Event(sql, "test", "t") - - err = enc.AppendRowChangedEvent(context.Background(), "", row, func() {}) - require.NoError(t, err) - - messages := enc.Build() - require.Len(t, messages, 1) - - err = dec.AddKeyValue(messages[0].Key, messages[0].Value) - require.NoError(t, err) - - messageType, hasNext, err = dec.HasNext() - require.NoError(t, err) - require.True(t, hasNext) - require.Equal(t, model.MessageTypeRow, messageType) - require.NotEqual(t, 0, dec.msg.BuildTs) - - decodedRow, err := dec.NextRowChangedEvent() - require.NoError(t, err) - require.Equal(t, decodedRow.CommitTs, row.CommitTs) - require.Equal(t, decodedRow.Table.Schema, row.Table.Schema) - require.Equal(t, decodedRow.Table.Table, row.Table.Table) - require.Nil(t, decodedRow.PreColumns) + ddlEvent := helper.DDL2Event(sql) + + m, err := enc.EncodeDDLEvent(ddlEvent) + require.NoError(t, err) + + dec := NewDecoder(codecConfig) + err = dec.AddKeyValue(m.Key, m.Value) + require.NoError(t, err) + + messageType, hasNext, err := dec.HasNext() + require.NoError(t, err) + require.True(t, hasNext) + require.Equal(t, model.MessageTypeDDL, messageType) + require.NotEqual(t, 0, dec.msg.BuildTs) + + event, err := dec.NextDDLEvent() + require.NoError(t, err) + require.Equal(t, ddlEvent.CommitTs, event.CommitTs) + // because we don't we don't set startTs in the encoded message, + // so the startTs is equal to commitTs + require.Equal(t, ddlEvent.CommitTs, event.StartTs) + require.Equal(t, ddlEvent.Query, event.Query) + require.Equal(t, len(ddlEvent.TableInfo.Columns), len(event.TableInfo.Columns)) + require.Equal(t, len(ddlEvent.TableInfo.Indices), len(event.TableInfo.Indices)) + + item := dec.memo.Read(ddlEvent.TableInfo.TableName.Schema, + ddlEvent.TableInfo.TableName.Table, ddlEvent.TableInfo.UpdateTS) + require.NotNil(t, item) + + sql = `insert into test.t values (1, "jack", "male", "jack@abc.com")` + row := helper.DML2Event(sql, "test", "t") + + err = enc.AppendRowChangedEvent(context.Background(), "", row, func() {}) + require.NoError(t, err) + + messages := enc.Build() + require.Len(t, messages, 1) + + err = dec.AddKeyValue(messages[0].Key, messages[0].Value) + require.NoError(t, err) + + messageType, hasNext, err = dec.HasNext() + require.NoError(t, err) + require.True(t, hasNext) + require.Equal(t, model.MessageTypeRow, messageType) + require.NotEqual(t, 0, dec.msg.BuildTs) + + decodedRow, err := dec.NextRowChangedEvent() + require.NoError(t, err) + require.Equal(t, decodedRow.CommitTs, row.CommitTs) + require.Equal(t, decodedRow.Table.Schema, row.Table.Schema) + require.Equal(t, decodedRow.Table.Table, row.Table.Table) + require.Nil(t, decodedRow.PreColumns) + + helper.Tk().MustExec("drop table test.t") + } } func TestEncoderOtherTypes(t *testing.T) { @@ -128,68 +145,77 @@ func TestEncoderOtherTypes(t *testing.T) { defer helper.Close() ctx := context.Background() - codecConfig := common.NewConfig(config.ProtocolSimple) - builder, err := NewBuilder(ctx, codecConfig) - require.NoError(t, err) - enc := builder.Build() - - sql := `create table test.t( + for _, compressionType := range []string{ + compression.None, + compression.Snappy, + compression.LZ4, + } { + codecConfig := common.NewConfig(config.ProtocolSimple) + codecConfig.LargeMessageHandle.LargeMessageHandleCompression = compressionType + builder, err := NewBuilder(ctx, codecConfig) + require.NoError(t, err) + enc := builder.Build() + + sql := `create table test.t( a int primary key auto_increment, b enum('a', 'b', 'c'), c set('a', 'b', 'c'), d bit(64), e json)` - ddlEvent := helper.DDL2Event(sql) + ddlEvent := helper.DDL2Event(sql) - m, err := enc.EncodeDDLEvent(ddlEvent) - require.NoError(t, err) + m, err := enc.EncodeDDLEvent(ddlEvent) + require.NoError(t, err) - dec := NewDecoder() - err = dec.AddKeyValue(m.Key, m.Value) - require.NoError(t, err) + dec := NewDecoder(codecConfig) + err = dec.AddKeyValue(m.Key, m.Value) + require.NoError(t, err) - messageType, hasNext, err := dec.HasNext() - require.NoError(t, err) - require.True(t, hasNext) - require.Equal(t, model.MessageTypeDDL, messageType) + messageType, hasNext, err := dec.HasNext() + require.NoError(t, err) + require.True(t, hasNext) + require.Equal(t, model.MessageTypeDDL, messageType) - _, err = dec.NextDDLEvent() - require.NoError(t, err) + _, err = dec.NextDDLEvent() + require.NoError(t, err) - sql = `insert into test.t() values (1, 'a', 'a,b', b'1000001', '{ + sql = `insert into test.t() values (1, 'a', 'a,b', b'1000001', '{ "key1": "value1", "key2": "value2" }');` - row := helper.DML2Event(sql, "test", "t") + row := helper.DML2Event(sql, "test", "t") - err = enc.AppendRowChangedEvent(context.Background(), "", row, func() {}) - require.NoError(t, err) + err = enc.AppendRowChangedEvent(context.Background(), "", row, func() {}) + require.NoError(t, err) - messages := enc.Build() - require.Len(t, messages, 1) + messages := enc.Build() + require.Len(t, messages, 1) - err = dec.AddKeyValue(messages[0].Key, messages[0].Value) - require.NoError(t, err) + err = dec.AddKeyValue(messages[0].Key, messages[0].Value) + require.NoError(t, err) - messageType, hasNext, err = dec.HasNext() - require.NoError(t, err) - require.True(t, hasNext) - require.Equal(t, model.MessageTypeRow, messageType) + messageType, hasNext, err = dec.HasNext() + require.NoError(t, err) + require.True(t, hasNext) + require.Equal(t, model.MessageTypeRow, messageType) - decodedRow, err := dec.NextRowChangedEvent() - require.NoError(t, err) + decodedRow, err := dec.NextRowChangedEvent() + require.NoError(t, err) - decodedColumns := make(map[string]*model.Column, len(decodedRow.Columns)) - for _, column := range decodedRow.Columns { - decodedColumns[column.Name] = column - } + decodedColumns := make(map[string]*model.Column, len(decodedRow.Columns)) + for _, column := range decodedRow.Columns { + decodedColumns[column.Name] = column + } + + for _, expected := range row.Columns { + decoded, ok := decodedColumns[expected.Name] + require.True(t, ok) + require.Equal(t, expected.Value, decoded.Value) + require.Equal(t, expected.Charset, decoded.Charset) + require.Equal(t, expected.Collation, decoded.Collation) + } - for _, expected := range row.Columns { - decoded, ok := decodedColumns[expected.Name] - require.True(t, ok) - require.Equal(t, expected.Value, decoded.Value) - require.Equal(t, expected.Charset, decoded.Charset) - require.Equal(t, expected.Collation, decoded.Collation) + helper.Tk().MustExec("drop table test.t") } } @@ -197,72 +223,81 @@ func TestEncodeBootstrapEvent(t *testing.T) { helper := entry.NewSchemaTestHelper(t) defer helper.Close() - sql := `create table test.t( + ctx := context.Background() + for _, compressionType := range []string{ + compression.None, + compression.Snappy, + compression.LZ4, + } { + codecConfig := common.NewConfig(config.ProtocolSimple) + codecConfig.LargeMessageHandle.LargeMessageHandleCompression = compressionType + builder, err := NewBuilder(ctx, codecConfig) + require.NoError(t, err) + enc := builder.Build() + + sql := `create table test.t( id int primary key, name varchar(255) not null, age int, email varchar(255) not null, key idx_name(name), key idx_name_email(name, email))` - ddlEvent := helper.DDL2Event(sql) - ddlEvent.IsBootstrap = true - - ctx := context.Background() - codecConfig := common.NewConfig(config.ProtocolSimple) - builder, err := NewBuilder(ctx, codecConfig) - require.NoError(t, err) - enc := builder.Build() - - m, err := enc.EncodeDDLEvent(ddlEvent) - require.NoError(t, err) - - dec := NewDecoder() - err = dec.AddKeyValue(m.Key, m.Value) - require.NoError(t, err) - - messageType, hasNext, err := dec.HasNext() - require.NoError(t, err) - require.True(t, hasNext) - require.Equal(t, model.MessageTypeDDL, messageType) - require.NotEqual(t, 0, dec.msg.BuildTs) - - event, err := dec.NextDDLEvent() - require.NoError(t, err) - require.Equal(t, ddlEvent.CommitTs, event.CommitTs) - // because we don't we don't set startTs in the encoded message, - // so the startTs is equal to commitTs - require.Equal(t, ddlEvent.CommitTs, event.StartTs) - // Bootstrap event doesn't have query - require.Equal(t, "", event.Query) - require.Equal(t, len(ddlEvent.TableInfo.Columns), len(event.TableInfo.Columns)) - require.Equal(t, len(ddlEvent.TableInfo.Indices), len(event.TableInfo.Indices)) - - item := dec.memo.Read(ddlEvent.TableInfo.TableName.Schema, - ddlEvent.TableInfo.TableName.Table, ddlEvent.TableInfo.UpdateTS) - require.NotNil(t, item) - - sql = `insert into test.t values (1, "jack", 23, "jack@abc.com")` - row := helper.DML2Event(sql, "test", "t") - - err = enc.AppendRowChangedEvent(context.Background(), "", row, func() {}) - require.NoError(t, err) - - messages := enc.Build() - require.Len(t, messages, 1) - - err = dec.AddKeyValue(messages[0].Key, messages[0].Value) - require.NoError(t, err) - - messageType, hasNext, err = dec.HasNext() - require.NoError(t, err) - require.True(t, hasNext) - require.Equal(t, model.MessageTypeRow, messageType) - require.NotEqual(t, 0, dec.msg.BuildTs) - - decodedRow, err := dec.NextRowChangedEvent() - require.NoError(t, err) - require.Equal(t, decodedRow.CommitTs, row.CommitTs) - require.Equal(t, decodedRow.Table.Schema, row.Table.Schema) - require.Equal(t, decodedRow.Table.Table, row.Table.Table) - require.Nil(t, decodedRow.PreColumns) + ddlEvent := helper.DDL2Event(sql) + ddlEvent.IsBootstrap = true + + m, err := enc.EncodeDDLEvent(ddlEvent) + require.NoError(t, err) + + dec := NewDecoder(codecConfig) + err = dec.AddKeyValue(m.Key, m.Value) + require.NoError(t, err) + + messageType, hasNext, err := dec.HasNext() + require.NoError(t, err) + require.True(t, hasNext) + require.Equal(t, model.MessageTypeDDL, messageType) + require.NotEqual(t, 0, dec.msg.BuildTs) + + event, err := dec.NextDDLEvent() + require.NoError(t, err) + require.Equal(t, ddlEvent.CommitTs, event.CommitTs) + // because we don't we don't set startTs in the encoded message, + // so the startTs is equal to commitTs + require.Equal(t, ddlEvent.CommitTs, event.StartTs) + // Bootstrap event doesn't have query + require.Equal(t, "", event.Query) + require.Equal(t, len(ddlEvent.TableInfo.Columns), len(event.TableInfo.Columns)) + require.Equal(t, len(ddlEvent.TableInfo.Indices), len(event.TableInfo.Indices)) + + item := dec.memo.Read(ddlEvent.TableInfo.TableName.Schema, + ddlEvent.TableInfo.TableName.Table, ddlEvent.TableInfo.UpdateTS) + require.NotNil(t, item) + + sql = `insert into test.t values (1, "jack", 23, "jack@abc.com")` + row := helper.DML2Event(sql, "test", "t") + + err = enc.AppendRowChangedEvent(context.Background(), "", row, func() {}) + require.NoError(t, err) + + messages := enc.Build() + require.Len(t, messages, 1) + + err = dec.AddKeyValue(messages[0].Key, messages[0].Value) + require.NoError(t, err) + + messageType, hasNext, err = dec.HasNext() + require.NoError(t, err) + require.True(t, hasNext) + require.Equal(t, model.MessageTypeRow, messageType) + require.NotEqual(t, 0, dec.msg.BuildTs) + + decodedRow, err := dec.NextRowChangedEvent() + require.NoError(t, err) + require.Equal(t, decodedRow.CommitTs, row.CommitTs) + require.Equal(t, decodedRow.Table.Schema, row.Table.Schema) + require.Equal(t, decodedRow.Table.Table, row.Table.Table) + require.Nil(t, decodedRow.PreColumns) + + helper.Tk().MustExec("drop table test.t") + } } From e53f5c0f3f8ae59db27b59451ded36f5fa6df999 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Wed, 6 Dec 2023 18:12:36 +0800 Subject: [PATCH 04/24] move canal test util to the utils, it should be shared among all protocols. --- .../codec/canal/canal_json_decoder_test.go | 13 ++--- .../canal_json_row_event_encoder_test.go | 53 ++++++++++--------- pkg/sink/codec/utils/mysql_types.go | 2 +- .../test_utils.go} | 24 ++++----- 4 files changed, 47 insertions(+), 45 deletions(-) rename pkg/sink/codec/{canal/canal_test_util.go => utils/test_utils.go} (95%) diff --git a/pkg/sink/codec/canal/canal_json_decoder_test.go b/pkg/sink/codec/canal/canal_json_decoder_test.go index d95446f42f3..df88117dc94 100644 --- a/pkg/sink/codec/canal/canal_json_decoder_test.go +++ b/pkg/sink/codec/canal/canal_json_decoder_test.go @@ -20,13 +20,14 @@ import ( "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/pkg/config" "github.com/pingcap/tiflow/pkg/sink/codec/common" + "github.com/pingcap/tiflow/pkg/sink/codec/utils" "github.com/stretchr/testify/require" ) func TestNewCanalJSONBatchDecoder4RowMessage(t *testing.T) { - insertEvent, _, _ := newLargeEvent4Test(t) + insertEvent, _, _ := utils.newLargeEvent4Test(t) ctx := context.Background() - expectedDecodedValue := collectExpectedDecodedValue(testColumnsTable) + expectedDecodedValue := utils.collectExpectedDecodedValue(utils.testColumnsTable) for _, encodeEnable := range []bool{false, true} { codecConfig := common.NewConfig(config.ProtocolCanalJSON) codecConfig.EnableTiDBExtension = encodeEnable @@ -100,7 +101,7 @@ func TestNewCanalJSONBatchDecoder4DDLMessage(t *testing.T) { require.NoError(t, err) encoder := builder.Build() - result, err := encoder.EncodeDDLEvent(testCaseDDL) + result, err := encoder.EncodeDDLEvent(utils.testCaseDDL) require.NoError(t, err) require.NotNil(t, result) @@ -121,13 +122,13 @@ func TestNewCanalJSONBatchDecoder4DDLMessage(t *testing.T) { require.Nil(t, err) if encodeEnable && decodeEnable { - require.Equal(t, testCaseDDL.CommitTs, consumed.CommitTs) + require.Equal(t, utils.testCaseDDL.CommitTs, consumed.CommitTs) } else { require.Equal(t, uint64(0), consumed.CommitTs) } - require.Equal(t, testCaseDDL.TableInfo, consumed.TableInfo) - require.Equal(t, testCaseDDL.Query, consumed.Query) + require.Equal(t, utils.testCaseDDL.TableInfo, consumed.TableInfo) + require.Equal(t, utils.testCaseDDL.Query, consumed.Query) ty, hasNext, err = decoder.HasNext() require.Nil(t, err) diff --git a/pkg/sink/codec/canal/canal_json_row_event_encoder_test.go b/pkg/sink/codec/canal/canal_json_row_event_encoder_test.go index ef48961636c..23332ecbaa8 100644 --- a/pkg/sink/codec/canal/canal_json_row_event_encoder_test.go +++ b/pkg/sink/codec/canal/canal_json_row_event_encoder_test.go @@ -26,6 +26,7 @@ import ( "github.com/pingcap/tiflow/pkg/config" cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/sink/codec/common" + "github.com/pingcap/tiflow/pkg/sink/codec/utils" "github.com/stretchr/testify/require" "golang.org/x/text/encoding/charmap" ) @@ -50,7 +51,7 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { encoder, ok := builder.Build().(*JSONRowEventEncoder) require.True(t, ok) - insertEvent, updateEvent, deleteEvent := newLargeEvent4Test(t) + insertEvent, updateEvent, deleteEvent := utils.NewLargeEvent4Test(t) data, err := newJSONMessageForDML(encoder.builder, insertEvent, encoder.config, false, "") require.NoError(t, err) @@ -78,28 +79,28 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { obtainedDataMap := jsonMsg.getData() require.NotNil(t, obtainedDataMap) - for _, item := range testColumnsTable { - obtainedValue, ok := obtainedDataMap[item.column.Name] + for _, item := range utils.TestColumnsTable { + obtainedValue, ok := obtainedDataMap[item.Column.Name] require.True(t, ok) - if !item.column.Flag.IsBinary() { - require.Equal(t, item.expectedEncodedValue, obtainedValue) + if !item.Column.Flag.IsBinary() { + require.Equal(t, item.ExpectedEncodedValue, obtainedValue) continue } // for `Column.Value` is nil, which mean's it is nullable, set the value to `""` if obtainedValue == nil { - require.Equal(t, "", item.expectedEncodedValue) + require.Equal(t, "", item.ExpectedEncodedValue) continue } - if bytes, ok := item.column.Value.([]byte); ok { + if bytes, ok := item.Column.Value.([]byte); ok { expectedValue, err := charmap.ISO8859_1.NewDecoder().Bytes(bytes) require.NoError(t, err) require.Equal(t, string(expectedValue), obtainedValue) continue } - require.Equal(t, item.expectedEncodedValue, obtainedValue) + require.Equal(t, item.ExpectedEncodedValue, obtainedValue) } data, err = newJSONMessageForDML(encoder.builder, updateEvent, encoder.config, false, "") @@ -201,7 +202,7 @@ func TestCanalJSONCompressionE2E(t *testing.T) { require.NoError(t, err) encoder := builder.Build() - insertEvent, _, _ := newLargeEvent4Test(t) + insertEvent, _, _ := utils.NewLargeEvent4Test(t) // encode normal row changed event err = encoder.AppendRowChangedEvent(ctx, "", insertEvent, func() {}) @@ -227,7 +228,7 @@ func TestCanalJSONCompressionE2E(t *testing.T) { require.Equal(t, decodedEvent.Table.Table, insertEvent.Table.Table) // encode DDL event - message, err = encoder.EncodeDDLEvent(testCaseDDL) + message, err = encoder.EncodeDDLEvent(utils.TestCaseDDL) require.NoError(t, err) err = decoder.AddKeyValue(message.Key, message.Value) @@ -241,10 +242,10 @@ func TestCanalJSONCompressionE2E(t *testing.T) { decodedDDL, err := decoder.NextDDLEvent() require.NoError(t, err) - require.Equal(t, decodedDDL.Query, testCaseDDL.Query) - require.Equal(t, decodedDDL.CommitTs, testCaseDDL.CommitTs) - require.Equal(t, decodedDDL.TableInfo.TableName.Schema, testCaseDDL.TableInfo.TableName.Schema) - require.Equal(t, decodedDDL.TableInfo.TableName.Table, testCaseDDL.TableInfo.TableName.Table) + require.Equal(t, decodedDDL.Query, utils.TestCaseDDL.Query) + require.Equal(t, decodedDDL.CommitTs, utils.TestCaseDDL.CommitTs) + require.Equal(t, decodedDDL.TableInfo.TableName.Schema, utils.TestCaseDDL.TableInfo.TableName.Schema) + require.Equal(t, decodedDDL.TableInfo.TableName.Table, utils.TestCaseDDL.TableInfo.TableName.Table) // encode checkpoint event waterMark := uint64(2333) @@ -277,7 +278,7 @@ func TestCanalJSONClaimCheckE2E(t *testing.T) { require.NoError(t, err) encoder := builder.Build() - insertEvent, _, _ := newLargeEvent4Test(t) + insertEvent, _, _ := utils.NewLargeEvent4Test(t) err = encoder.AppendRowChangedEvent(ctx, "", insertEvent, func() {}) require.NoError(t, err) @@ -307,7 +308,7 @@ func TestCanalJSONClaimCheckE2E(t *testing.T) { decodedColumns[column.Name] = column } - expectedValue := collectExpectedDecodedValue(testColumnsTable) + expectedValue := utils.CollectExpectedDecodedValue(utils.TestColumnsTable) for _, column := range insertEvent.Columns { decodedColumn, ok := decodedColumns[column.Name] require.True(t, ok) @@ -329,7 +330,7 @@ func TestNewCanalJSONMessageHandleKeyOnly4LargeMessage(t *testing.T) { require.NoError(t, err) encoder := builder.Build() - insertEvent, _, _ := newLargeEvent4Test(t) + insertEvent, _, _ := utils.NewLargeEvent4Test(t) err = encoder.AppendRowChangedEvent(context.Background(), "", insertEvent, func() {}) require.NoError(t, err) @@ -372,16 +373,16 @@ func TestNewCanalJSONMessageFromDDL(t *testing.T) { require.NoError(t, err) encoder := builder.Build().(*JSONRowEventEncoder) - message := encoder.newJSONMessageForDDL(testCaseDDL) + message := encoder.newJSONMessageForDDL(utils.TestCaseDDL) require.NotNil(t, message) msg, ok := message.(*JSONMessage) require.True(t, ok) - require.Equal(t, convertToCanalTs(testCaseDDL.CommitTs), msg.ExecutionTime) + require.Equal(t, convertToCanalTs(utils.TestCaseDDL.CommitTs), msg.ExecutionTime) require.True(t, msg.IsDDL) require.Equal(t, "cdc", msg.Schema) require.Equal(t, "person", msg.Table) - require.Equal(t, testCaseDDL.Query, msg.Query) + require.Equal(t, utils.TestCaseDDL.Query, msg.Query) require.Equal(t, "CREATE", msg.EventType) codecConfig.EnableTiDBExtension = true @@ -389,14 +390,14 @@ func TestNewCanalJSONMessageFromDDL(t *testing.T) { require.NoError(t, err) encoder = builder.Build().(*JSONRowEventEncoder) - message = encoder.newJSONMessageForDDL(testCaseDDL) + message = encoder.newJSONMessageForDDL(utils.TestCaseDDL) require.NotNil(t, message) withExtension, ok := message.(*canalJSONMessageWithTiDBExtension) require.True(t, ok) require.NotNil(t, withExtension.Extensions) - require.Equal(t, testCaseDDL.CommitTs, withExtension.Extensions.CommitTs) + require.Equal(t, utils.TestCaseDDL.CommitTs, withExtension.Extensions.CommitTs) } func TestBatching(t *testing.T) { @@ -407,7 +408,7 @@ func TestBatching(t *testing.T) { encoder := builder.Build() require.NotNil(t, encoder) - _, updateEvent, _ := newLargeEvent4Test(t) + _, updateEvent, _ := utils.NewLargeEvent4Test(t) updateCase := *updateEvent for i := 1; i <= 1000; i++ { ts := uint64(i) @@ -547,7 +548,7 @@ func TestDDLEventWithExtensionValueMarshal(t *testing.T) { } require.NotNil(t, encoder) - message := encoder.newJSONMessageForDDL(testCaseDDL) + message := encoder.newJSONMessageForDDL(utils.TestCaseDDL) require.NotNil(t, message) msg, ok := message.(*canalJSONMessageWithTiDBExtension) @@ -731,7 +732,7 @@ func TestCanalJSONContentCompatibleE2E(t *testing.T) { encoder := builder.Build() - insertEvent, _, _ := newLargeEvent4Test(t) + insertEvent, _, _ := utils.NewLargeEvent4Test(t) err = encoder.AppendRowChangedEvent(ctx, "", insertEvent, func() {}) require.NoError(t, err) @@ -759,7 +760,7 @@ func TestCanalJSONContentCompatibleE2E(t *testing.T) { obtainedColumns[column.Name] = column } - expectedValue := collectExpectedDecodedValue(testColumnsTable) + expectedValue := utils.CollectExpectedDecodedValue(utils.TestColumnsTable) for _, actual := range insertEvent.Columns { obtained, ok := obtainedColumns[actual.Name] require.True(t, ok) diff --git a/pkg/sink/codec/utils/mysql_types.go b/pkg/sink/codec/utils/mysql_types.go index d2eac54fbfa..006799ee37f 100644 --- a/pkg/sink/codec/utils/mysql_types.go +++ b/pkg/sink/codec/utils/mysql_types.go @@ -40,7 +40,7 @@ func WithZerofill4MySQLType(mysqlType string, zerofill bool) string { return mysqlType } -// GetMySQLType get the mysql type from column info +// GetMySQLType get the mysql type from Column info func GetMySQLType(columnInfo *timodel.ColumnInfo, fullType bool) string { if !fullType { result := types.TypeToStr(columnInfo.GetType(), columnInfo.GetCharset()) diff --git a/pkg/sink/codec/canal/canal_test_util.go b/pkg/sink/codec/utils/test_utils.go similarity index 95% rename from pkg/sink/codec/canal/canal_test_util.go rename to pkg/sink/codec/utils/test_utils.go index c7d3d82ba03..a322f0111b2 100644 --- a/pkg/sink/codec/canal/canal_test_util.go +++ b/pkg/sink/codec/utils/test_utils.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package canal +package utils import ( "testing" @@ -23,17 +23,17 @@ import ( ) type testColumnTuple struct { - column *model.Column + Column *model.Column - // expectedEncodedValue is expected by encoding - expectedEncodedValue string + // ExpectedEncodedValue is expected by encoding + ExpectedEncodedValue string // expectedDecodedValue is expected by decoding expectedDecodedValue interface{} } var ( - testColumnsTable = []*testColumnTuple{ + TestColumnsTable = []*testColumnTuple{ { &model.Column{Name: "t", Flag: model.HandleKeyFlag | model.PrimaryKeyFlag, Type: mysql.TypeTiny, Value: int64(127)}, "127", "127", @@ -238,7 +238,7 @@ var ( "2333", "2333", }, - // for column value type in `[]uint8` and have `BinaryFlag`, expectedEncodedValue is dummy. + // for Column value type in `[]uint8` and have `BinaryFlag`, ExpectedEncodedValue is dummy. { &model.Column{Name: "varcharT", Type: mysql.TypeVarchar, Value: []uint8("测试Varchar")}, "测试Varchar", "测试Varchar", @@ -354,7 +354,7 @@ var ( }, } - testCaseDDL = &model.DDLEvent{ + TestCaseDDL = &model.DDLEvent{ CommitTs: 417318403368288260, TableInfo: &model.TableInfo{ TableName: model.TableName{ @@ -369,20 +369,20 @@ var ( func collectAllColumns(groups []*testColumnTuple) []*model.Column { columns := make([]*model.Column, 0, len(groups)) for _, item := range groups { - columns = append(columns, item.column) + columns = append(columns, item.Column) } return columns } -func collectExpectedDecodedValue(columns []*testColumnTuple) map[string]interface{} { +func CollectExpectedDecodedValue(columns []*testColumnTuple) map[string]interface{} { result := make(map[string]interface{}, len(columns)) for _, item := range columns { - result[item.column.Name] = item.expectedDecodedValue + result[item.Column.Name] = item.expectedDecodedValue } return result } -func newLargeEvent4Test(t *testing.T) (*model.RowChangedEvent, *model.RowChangedEvent, *model.RowChangedEvent) { +func NewLargeEvent4Test(t *testing.T) (*model.RowChangedEvent, *model.RowChangedEvent, *model.RowChangedEvent) { helper := entry.NewSchemaTestHelper(t) defer helper.Close() @@ -443,7 +443,7 @@ func newLargeEvent4Test(t *testing.T) (*model.RowChangedEvent, *model.RowChanged tableInfo := model.WrapTableInfo(0, "test", 1, job.BinlogInfo.TableInfo) _, _, colInfo := tableInfo.GetRowColInfos() - testColumns := collectAllColumns(testColumnsTable) + testColumns := collectAllColumns(TestColumnsTable) insert := &model.RowChangedEvent{ CommitTs: 417318403368288260, From 0858e4058c3a13f37fdbc6ad3992d7c9b60bb35f Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Fri, 8 Dec 2023 16:37:39 +0800 Subject: [PATCH 05/24] fix tests. --- .../canal_json_row_event_encoder_test.go | 14 +- pkg/sink/codec/simple/decoder.go | 164 +++++++- pkg/sink/codec/simple/encoder.go | 4 +- pkg/sink/codec/simple/encoder_test.go | 352 ++++++++++++++---- pkg/sink/codec/simple/message.go | 31 +- pkg/sink/codec/utils/test_utils.go | 6 +- 6 files changed, 478 insertions(+), 93 deletions(-) diff --git a/pkg/sink/codec/canal/canal_json_row_event_encoder_test.go b/pkg/sink/codec/canal/canal_json_row_event_encoder_test.go index 193d9bd9f97..566e7f2cf7b 100644 --- a/pkg/sink/codec/canal/canal_json_row_event_encoder_test.go +++ b/pkg/sink/codec/canal/canal_json_row_event_encoder_test.go @@ -41,7 +41,7 @@ func TestBuildCanalJSONRowEventEncoder(t *testing.T) { } func TestDMLE2E(t *testing.T) { - insertEvent, updateEvent, deleteEvent := utils.NewLargeEvent4Test(t) + _, insertEvent, updateEvent, deleteEvent := utils.NewLargeEvent4Test(t) ctx := context.Background() @@ -126,7 +126,7 @@ func TestDMLE2E(t *testing.T) { } func TestCanalJSONCompressionE2E(t *testing.T) { - insertEvent, _, _ := utils.NewLargeEvent4Test(t) + _, insertEvent, _, _ := utils.NewLargeEvent4Test(t) codecConfig := common.NewConfig(config.ProtocolCanalJSON) codecConfig.EnableTiDBExtension = true @@ -217,7 +217,7 @@ func TestCanalJSONClaimCheckE2E(t *testing.T) { require.NoError(t, err) encoder := builder.Build() - insertEvent, _, _ := utils.NewLargeEvent4Test(t) + _, insertEvent, _, _ := utils.NewLargeEvent4Test(t) err = encoder.AppendRowChangedEvent(ctx, "", insertEvent, func() {}) require.NoError(t, err) @@ -269,7 +269,7 @@ func TestNewCanalJSONMessageHandleKeyOnly4LargeMessage(t *testing.T) { require.NoError(t, err) encoder := builder.Build() - insertEvent, _, _ := utils.NewLargeEvent4Test(t) + _, insertEvent, _, _ := utils.NewLargeEvent4Test(t) err = encoder.AppendRowChangedEvent(context.Background(), "", insertEvent, func() {}) require.NoError(t, err) @@ -351,7 +351,7 @@ func TestBatching(t *testing.T) { encoder := builder.Build() require.NotNil(t, encoder) - _, updateEvent, _ := utils.NewLargeEvent4Test(t) + _, _, updateEvent, _ := utils.NewLargeEvent4Test(t) updateCase := *updateEvent for i := 1; i <= 1000; i++ { ts := uint64(i) @@ -629,7 +629,7 @@ func TestCanalJSONContentCompatibleE2E(t *testing.T) { encoder := builder.Build() - insertEvent, _, _ := utils.NewLargeEvent4Test(t) + _, insertEvent, _, _ := utils.NewLargeEvent4Test(t) err = encoder.AppendRowChangedEvent(ctx, "", insertEvent, func() {}) require.NoError(t, err) @@ -666,7 +666,7 @@ func TestCanalJSONContentCompatibleE2E(t *testing.T) { } func TestNewCanalJSONBatchDecoder4RowMessage(t *testing.T) { - insertEvent, _, _ := utils.NewLargeEvent4Test(t) + _, insertEvent, _, _ := utils.NewLargeEvent4Test(t) ctx := context.Background() for _, encodeEnable := range []bool{false, true} { diff --git a/pkg/sink/codec/simple/decoder.go b/pkg/sink/codec/simple/decoder.go index a84988ca03f..93847764eb3 100644 --- a/pkg/sink/codec/simple/decoder.go +++ b/pkg/sink/codec/simple/decoder.go @@ -14,29 +14,56 @@ package simple import ( + "context" + "database/sql" "encoding/json" + "path/filepath" "github.com/pingcap/log" + "github.com/pingcap/tidb/br/pkg/storage" + "github.com/pingcap/tidb/types" "github.com/pingcap/tiflow/cdc/model" cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/sink/codec/common" + "github.com/pingcap/tiflow/pkg/util" "go.uber.org/zap" ) type decoder struct { config *common.Config + upstreamTiDB *sql.DB + storage storage.ExternalStorage + value []byte msg *message memo TableInfoProvider } // NewDecoder returns a new decoder -func NewDecoder(config *common.Config) *decoder { - return &decoder{ - config: config, - memo: newMemoryTableInfoProvider(), +func NewDecoder(ctx context.Context, config *common.Config, db *sql.DB) (*decoder, error) { + var ( + externalStorage storage.ExternalStorage + err error + ) + if config.LargeMessageHandle.EnableClaimCheck() { + storageURI := config.LargeMessageHandle.ClaimCheckStorageURI + externalStorage, err = util.GetExternalStorageFromURI(ctx, storageURI) + if err != nil { + return nil, cerror.WrapError(cerror.ErrKafkaInvalidConfig, err) + } + } + + if config.LargeMessageHandle.HandleKeyOnly() && db == nil { + return nil, cerror.ErrCodecDecode. + GenWithStack("handle-key-only is enabled, but upstream TiDB is not provided") } + + return &decoder{ + config: config, + storage: externalStorage, + memo: newMemoryTableInfoProvider(), + }, nil } // AddKeyValue add the received key and values to the decoder, @@ -100,6 +127,14 @@ func (d *decoder) NextRowChangedEvent() (*model.RowChangedEvent, error) { "invalid row changed event message") } + if d.msg.ClaimCheckLocation != "" { + return d.assembleClaimCheckRowChangedEvent(d.msg.ClaimCheckLocation) + } + + if d.msg.HandleKeyOnly { + return d.assembleHandleKeyOnlyRowChangedEvent(d.msg) + } + tableInfo := d.memo.Read(d.msg.Database, d.msg.Table, d.msg.SchemaVersion) if tableInfo == nil { return nil, cerror.ErrCodecDecode.GenWithStack( @@ -116,6 +151,127 @@ func (d *decoder) NextRowChangedEvent() (*model.RowChangedEvent, error) { return event, nil } +func (d *decoder) assembleClaimCheckRowChangedEvent(claimCheckLocation string) (*model.RowChangedEvent, error) { + _, claimCheckFileName := filepath.Split(claimCheckLocation) + data, err := d.storage.ReadFile(context.Background(), claimCheckFileName) + if err != nil { + return nil, err + } + claimCheckM, err := common.UnmarshalClaimCheckMessage(data) + if err != nil { + return nil, err + } + + value, err := common.Decompress(d.config.LargeMessageHandle.LargeMessageHandleCompression, claimCheckM.Value) + if err != nil { + return nil, err + } + + var m message + err = json.Unmarshal(value, &m) + if err != nil { + return nil, err + } + + d.msg = &m + return d.NextRowChangedEvent() +} + +func (d *decoder) assembleHandleKeyOnlyRowChangedEvent(m *message) (*model.RowChangedEvent, error) { + tableInfo := d.memo.Read(m.Database, m.Table, m.SchemaVersion) + if tableInfo == nil { + return nil, cerror.ErrCodecDecode.GenWithStack( + "cannot found the table info, schema: %s, table: %s, version: %d", + m.Database, m.Table, m.SchemaVersion) + } + + fieldTypeMap := make(map[string]*types.FieldType, len(tableInfo.Columns)) + for _, col := range tableInfo.Columns { + fieldTypeMap[col.Name.L] = &col.FieldType + } + + result := &message{ + Version: defaultVersion, + Database: m.Database, + Table: m.Table, + Type: m.Type, + CommitTs: m.CommitTs, + SchemaVersion: m.SchemaVersion, + } + + ctx := context.Background() + switch m.Type { + case InsertType: + holder, err := common.SnapshotQuery(ctx, d.upstreamTiDB, m.CommitTs, m.Database, m.Table, m.Data) + if err != nil { + return nil, err + } + data, err := d.buildData(holder, fieldTypeMap) + if err != nil { + return nil, err + } + result.Data = data + case UpdateType: + holder, err := common.SnapshotQuery(ctx, d.upstreamTiDB, m.CommitTs, m.Database, m.Table, m.Data) + if err != nil { + return nil, err + } + data, err := d.buildData(holder, fieldTypeMap) + if err != nil { + return nil, err + } + result.Data = data + + holder, err = common.SnapshotQuery(ctx, d.upstreamTiDB, m.CommitTs-1, m.Database, m.Table, m.Old) + if err != nil { + return nil, err + } + old, err := d.buildData(holder, fieldTypeMap) + if err != nil { + return nil, err + } + result.Old = old + case DeleteType: + holder, err := common.SnapshotQuery(ctx, d.upstreamTiDB, m.CommitTs-1, m.Database, m.Table, m.Old) + if err != nil { + return nil, err + } + data, err := d.buildData(holder, fieldTypeMap) + if err != nil { + return nil, err + } + result.Old = data + } + + d.msg = result + return d.NextRowChangedEvent() +} + +func (d *decoder) buildData( + holder *common.ColumnsHolder, fieldTypeMap map[string]*types.FieldType, +) (map[string]interface{}, error) { + columnsCount := holder.Length() + result := make(map[string]interface{}, columnsCount) + + for i := 0; i < columnsCount; i++ { + col := holder.Types[i] + value := holder.Values[i] + + fieldType, ok := fieldTypeMap[col.Name()] + if !ok { + return nil, cerror.ErrCodecDecode.GenWithStack( + "cannot found the field type, schema: %s, table: %s, column: %s", + d.msg.Database, d.msg.Table, col.Name()) + } + value, err := encodeValue(value, fieldType) + if err != nil { + return nil, err + } + result[col.Name()] = value + } + return result, nil +} + // NextDDLEvent returns the next DDL event if exists func (d *decoder) NextDDLEvent() (*model.DDLEvent, error) { if d.msg.Type != DDLType && d.msg.Type != BootstrapType { diff --git a/pkg/sink/codec/simple/encoder.go b/pkg/sink/codec/simple/encoder.go index e8988fc4a0c..f8ea8bdce76 100644 --- a/pkg/sink/codec/simple/encoder.go +++ b/pkg/sink/codec/simple/encoder.go @@ -125,8 +125,8 @@ func (e *encoder) Build() []*common.Message { // EncodeCheckpointEvent implement the DDLEventBatchEncoder interface func (e *encoder) EncodeCheckpointEvent(ts uint64) (*common.Message, error) { - message := newResolvedMessage(ts) - value, err := json.Marshal(message) + m := newResolvedMessage(ts) + value, err := json.Marshal(m) if err != nil { return nil, cerror.WrapError(cerror.ErrEncodeFailed, err) } diff --git a/pkg/sink/codec/simple/encoder_test.go b/pkg/sink/codec/simple/encoder_test.go index 4fba28e1ad7..8efdfdd29e5 100644 --- a/pkg/sink/codec/simple/encoder_test.go +++ b/pkg/sink/codec/simple/encoder_test.go @@ -15,13 +15,16 @@ package simple import ( "context" + "database/sql" "testing" "github.com/pingcap/tiflow/cdc/entry" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/pkg/compression" "github.com/pingcap/tiflow/pkg/config" + "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/sink/codec/common" + "github.com/pingcap/tiflow/pkg/sink/codec/utils" "github.com/stretchr/testify/require" ) @@ -44,7 +47,9 @@ func TestEncodeCheckpoint(t *testing.T) { m, err := enc.EncodeCheckpointEvent(uint64(checkpoint)) require.NoError(t, err) - dec := NewDecoder(codecConfig) + dec, err := NewDecoder(ctx, codecConfig, nil) + require.NoError(t, err) + err = dec.AddKeyValue(m.Key, m.Value) require.NoError(t, err) @@ -77,17 +82,19 @@ func TestEncodeDDLEvent(t *testing.T) { enc := builder.Build() sql := `create table test.t( - id int primary key, - name varchar(255) not null, - gender enum('male', 'female'), - email varchar(255) not null, - key idx_name_email(name, email))` + id int primary key, + name varchar(255) not null, + gender enum('male', 'female'), + email varchar(255) not null, + key idx_name_email(name, email))` ddlEvent := helper.DDL2Event(sql) m, err := enc.EncodeDDLEvent(ddlEvent) require.NoError(t, err) - dec := NewDecoder(codecConfig) + dec, err := NewDecoder(ctx, codecConfig, nil) + require.NoError(t, err) + err = dec.AddKeyValue(m.Key, m.Value) require.NoError(t, err) @@ -144,6 +151,78 @@ func TestEncoderOtherTypes(t *testing.T) { helper := entry.NewSchemaTestHelper(t) defer helper.Close() + ctx := context.Background() + codecConfig := common.NewConfig(config.ProtocolSimple) + builder, err := NewBuilder(ctx, codecConfig) + require.NoError(t, err) + enc := builder.Build() + + sql := `create table test.t( + a int primary key auto_increment, + b enum('a', 'b', 'c'), + c set('a', 'b', 'c'), + d bit(64), + e json)` + ddlEvent := helper.DDL2Event(sql) + + m, err := enc.EncodeDDLEvent(ddlEvent) + require.NoError(t, err) + + dec, err := NewDecoder(ctx, codecConfig, nil) + require.NoError(t, err) + + err = dec.AddKeyValue(m.Key, m.Value) + require.NoError(t, err) + + messageType, hasNext, err := dec.HasNext() + require.NoError(t, err) + require.True(t, hasNext) + require.Equal(t, model.MessageTypeDDL, messageType) + + _, err = dec.NextDDLEvent() + require.NoError(t, err) + + sql = `insert into test.t() values (1, 'a', 'a,b', b'1000001', '{ + "key1": "value1", + "key2": "value2" + }');` + row := helper.DML2Event(sql, "test", "t") + + err = enc.AppendRowChangedEvent(context.Background(), "", row, func() {}) + require.NoError(t, err) + + messages := enc.Build() + require.Len(t, messages, 1) + + err = dec.AddKeyValue(messages[0].Key, messages[0].Value) + require.NoError(t, err) + + messageType, hasNext, err = dec.HasNext() + require.NoError(t, err) + require.True(t, hasNext) + require.Equal(t, model.MessageTypeRow, messageType) + + decodedRow, err := dec.NextRowChangedEvent() + require.NoError(t, err) + + decodedColumns := make(map[string]*model.Column, len(decodedRow.Columns)) + for _, column := range decodedRow.Columns { + decodedColumns[column.Name] = column + } + + for _, expected := range row.Columns { + decoded, ok := decodedColumns[expected.Name] + require.True(t, ok) + require.Equal(t, expected.Value, decoded.Value) + require.Equal(t, expected.Charset, decoded.Charset) + require.Equal(t, expected.Collation, decoded.Collation) + } +} + +func TestEncodeBootstrapEvent(t *testing.T) { + helper := entry.NewSchemaTestHelper(t) + defer helper.Close() + ctx := context.Background() for _, compressionType := range []string{ compression.None, @@ -157,17 +236,20 @@ func TestEncoderOtherTypes(t *testing.T) { enc := builder.Build() sql := `create table test.t( - a int primary key auto_increment, - b enum('a', 'b', 'c'), - c set('a', 'b', 'c'), - d bit(64), - e json)` + id int primary key, + name varchar(255) not null, + age int, + email varchar(255) not null, + key idx_name_email(name, email))` ddlEvent := helper.DDL2Event(sql) + ddlEvent.IsBootstrap = true m, err := enc.EncodeDDLEvent(ddlEvent) require.NoError(t, err) - dec := NewDecoder(codecConfig) + dec, err := NewDecoder(ctx, codecConfig, nil) + require.NoError(t, err) + err = dec.AddKeyValue(m.Key, m.Value) require.NoError(t, err) @@ -175,14 +257,24 @@ func TestEncoderOtherTypes(t *testing.T) { require.NoError(t, err) require.True(t, hasNext) require.Equal(t, model.MessageTypeDDL, messageType) + require.NotEqual(t, 0, dec.msg.BuildTs) - _, err = dec.NextDDLEvent() + event, err := dec.NextDDLEvent() require.NoError(t, err) + require.Equal(t, ddlEvent.CommitTs, event.CommitTs) + // because we don't we don't set startTs in the encoded message, + // so the startTs is equal to commitTs + require.Equal(t, ddlEvent.CommitTs, event.StartTs) + // Bootstrap event doesn't have query + require.Equal(t, "", event.Query) + require.Equal(t, len(ddlEvent.TableInfo.Columns), len(event.TableInfo.Columns)) + require.Equal(t, len(ddlEvent.TableInfo.Indices), len(event.TableInfo.Indices)) - sql = `insert into test.t() values (1, 'a', 'a,b', b'1000001', '{ - "key1": "value1", - "key2": "value2" - }');` + item := dec.memo.Read(ddlEvent.TableInfo.TableName.Schema, + ddlEvent.TableInfo.TableName.Table, ddlEvent.TableInfo.UpdateTS) + require.NotNil(t, item) + + sql = `insert into test.t values (1, "jack", 23, "jack@abc.com")` row := helper.DML2Event(sql, "test", "t") err = enc.AppendRowChangedEvent(context.Background(), "", row, func() {}) @@ -198,57 +290,128 @@ func TestEncoderOtherTypes(t *testing.T) { require.NoError(t, err) require.True(t, hasNext) require.Equal(t, model.MessageTypeRow, messageType) + require.NotEqual(t, 0, dec.msg.BuildTs) decodedRow, err := dec.NextRowChangedEvent() require.NoError(t, err) + require.Equal(t, decodedRow.CommitTs, row.CommitTs) + require.Equal(t, decodedRow.Table.Schema, row.Table.Schema) + require.Equal(t, decodedRow.Table.Table, row.Table.Table) + require.Nil(t, decodedRow.PreColumns) + + helper.Tk().MustExec("drop table test.t") + } +} + +func TestDMLEventCompressionE2E(t *testing.T) { + ddlEvent, insertEvent, _, _ := utils.NewLargeEvent4Test(t) + + ctx := context.Background() + for _, compressionType := range []string{ + compression.None, + compression.Snappy, + compression.LZ4, + } { + codecConfig := common.NewConfig(config.ProtocolSimple) + codecConfig.LargeMessageHandle.LargeMessageHandleCompression = compressionType + + builder, err := NewBuilder(ctx, codecConfig) + require.NoError(t, err) + enc := builder.Build() + + dec, err := NewDecoder(ctx, codecConfig, nil) + require.NoError(t, err) + + m, err := enc.EncodeDDLEvent(ddlEvent) + require.NoError(t, err) + + err = dec.AddKeyValue(m.Key, m.Value) + require.NoError(t, err) + + messageType, hasNext, err := dec.HasNext() + require.NoError(t, err) + require.True(t, hasNext) + require.Equal(t, model.MessageTypeDDL, messageType) + + _, err = dec.NextDDLEvent() + require.NoError(t, err) + + err = enc.AppendRowChangedEvent(context.Background(), "", insertEvent, func() {}) + require.NoError(t, err) + + messages := enc.Build() + require.Len(t, messages, 1) + + err = dec.AddKeyValue(messages[0].Key, messages[0].Value) + require.NoError(t, err) + + messageType, hasNext, err = dec.HasNext() + require.NoError(t, err) + require.True(t, hasNext) + require.Equal(t, model.MessageTypeRow, messageType) + + decodedRow, err := dec.NextRowChangedEvent() + require.NoError(t, err) + + require.Equal(t, decodedRow.CommitTs, insertEvent.CommitTs) + require.Equal(t, decodedRow.Table.Schema, insertEvent.Table.Schema) + require.Equal(t, decodedRow.Table.Table, insertEvent.Table.Table) decodedColumns := make(map[string]*model.Column, len(decodedRow.Columns)) for _, column := range decodedRow.Columns { decodedColumns[column.Name] = column } - - for _, expected := range row.Columns { - decoded, ok := decodedColumns[expected.Name] + for _, col := range insertEvent.Columns { + decoded, ok := decodedColumns[col.Name] require.True(t, ok) - require.Equal(t, expected.Value, decoded.Value) - require.Equal(t, expected.Charset, decoded.Charset) - require.Equal(t, expected.Collation, decoded.Collation) + require.Equal(t, col.Type, decoded.Type) + require.Equal(t, col.Charset, decoded.Charset) + require.Equal(t, col.Collation, decoded.Collation) + require.EqualValues(t, col.Value, decoded.Value) } - - helper.Tk().MustExec("drop table test.t") } } -func TestEncodeBootstrapEvent(t *testing.T) { - helper := entry.NewSchemaTestHelper(t) - defer helper.Close() +func TestDMLMessageTooLarge(t *testing.T) { + _, insertEvent, _, _ := utils.NewLargeEvent4Test(t) + + codecConfig := common.NewConfig(config.ProtocolSimple) + codecConfig.MaxMessageBytes = 100 + + builder, err := NewBuilder(context.Background(), codecConfig) + require.NoError(t, err) + enc := builder.Build() + + err = enc.AppendRowChangedEvent(context.Background(), "", insertEvent, func() {}) + require.ErrorIs(t, err, errors.ErrMessageTooLarge) +} + +func TestLargerMessageHandleClaimCheck(t *testing.T) { + ddlEvent, _, updateEvent, _ := utils.NewLargeEvent4Test(t) ctx := context.Background() + codecConfig := common.NewConfig(config.ProtocolSimple) + codecConfig.MaxMessageBytes = 500 + codecConfig.LargeMessageHandle.LargeMessageHandleOption = config.LargeMessageHandleOptionClaimCheck + codecConfig.LargeMessageHandle.ClaimCheckStorageURI = "file:///tmp/simple-claim-check" + for _, compressionType := range []string{ compression.None, compression.Snappy, compression.LZ4, } { - codecConfig := common.NewConfig(config.ProtocolSimple) codecConfig.LargeMessageHandle.LargeMessageHandleCompression = compressionType + builder, err := NewBuilder(ctx, codecConfig) require.NoError(t, err) enc := builder.Build() - sql := `create table test.t( - id int primary key, - name varchar(255) not null, - age int, - email varchar(255) not null, - key idx_name(name), - key idx_name_email(name, email))` - ddlEvent := helper.DDL2Event(sql) - ddlEvent.IsBootstrap = true + dec, err := NewDecoder(ctx, codecConfig, nil) + require.NoError(t, err) m, err := enc.EncodeDDLEvent(ddlEvent) require.NoError(t, err) - dec := NewDecoder(codecConfig) err = dec.AddKeyValue(m.Key, m.Value) require.NoError(t, err) @@ -256,48 +419,105 @@ func TestEncodeBootstrapEvent(t *testing.T) { require.NoError(t, err) require.True(t, hasNext) require.Equal(t, model.MessageTypeDDL, messageType) - require.NotEqual(t, 0, dec.msg.BuildTs) - event, err := dec.NextDDLEvent() + _, err = dec.NextDDLEvent() require.NoError(t, err) - require.Equal(t, ddlEvent.CommitTs, event.CommitTs) - // because we don't we don't set startTs in the encoded message, - // so the startTs is equal to commitTs - require.Equal(t, ddlEvent.CommitTs, event.StartTs) - // Bootstrap event doesn't have query - require.Equal(t, "", event.Query) - require.Equal(t, len(ddlEvent.TableInfo.Columns), len(event.TableInfo.Columns)) - require.Equal(t, len(ddlEvent.TableInfo.Indices), len(event.TableInfo.Indices)) - item := dec.memo.Read(ddlEvent.TableInfo.TableName.Schema, - ddlEvent.TableInfo.TableName.Table, ddlEvent.TableInfo.UpdateTS) - require.NotNil(t, item) + err = enc.AppendRowChangedEvent(ctx, "", updateEvent, func() {}) + require.NoError(t, err) - sql = `insert into test.t values (1, "jack", 23, "jack@abc.com")` - row := helper.DML2Event(sql, "test", "t") + claimCheckLocationM := enc.Build()[0] - err = enc.AppendRowChangedEvent(context.Background(), "", row, func() {}) + err = dec.AddKeyValue(claimCheckLocationM.Key, claimCheckLocationM.Value) + require.NoError(t, err) + + messageType, hasNext, err = dec.HasNext() + require.NoError(t, err) + require.True(t, hasNext) + require.Equal(t, model.MessageTypeRow, messageType) + + decodedRow, err := dec.NextRowChangedEvent() + require.NoError(t, err) + + require.Equal(t, decodedRow.CommitTs, updateEvent.CommitTs) + require.Equal(t, decodedRow.Table.Schema, updateEvent.Table.Schema) + require.Equal(t, decodedRow.Table.Table, updateEvent.Table.Table) + + decodedColumns := make(map[string]*model.Column, len(decodedRow.Columns)) + for _, column := range decodedRow.Columns { + decodedColumns[column.Name] = column + } + for _, col := range updateEvent.Columns { + decoded, ok := decodedColumns[col.Name] + require.True(t, ok) + require.Equal(t, col.Type, decoded.Type) + require.Equal(t, col.Charset, decoded.Charset) + require.Equal(t, col.Collation, decoded.Collation) + require.EqualValues(t, col.Value, decoded.Value) + } + + for _, column := range decodedRow.PreColumns { + decodedColumns[column.Name] = column + } + for _, col := range updateEvent.PreColumns { + decoded, ok := decodedColumns[col.Name] + require.True(t, ok) + require.Equal(t, col.Type, decoded.Type) + require.Equal(t, col.Charset, decoded.Charset) + require.Equal(t, col.Collation, decoded.Collation) + require.EqualValues(t, col.Value, decoded.Value) + } + } +} + +func TestLargeMessageHandleKeyOnly(t *testing.T) { + _, _, updateEvent, _ := utils.NewLargeEvent4Test(t) + + ctx := context.Background() + codecConfig := common.NewConfig(config.ProtocolSimple) + codecConfig.MaxMessageBytes = 500 + codecConfig.LargeMessageHandle.LargeMessageHandleOption = config.LargeMessageHandleOptionHandleKeyOnly + for _, compressionType := range []string{ + compression.None, + compression.Snappy, + compression.LZ4, + } { + codecConfig.LargeMessageHandle.LargeMessageHandleCompression = compressionType + + builder, err := NewBuilder(ctx, codecConfig) + require.NoError(t, err) + enc := builder.Build() + + err = enc.AppendRowChangedEvent(ctx, "", updateEvent, func() {}) require.NoError(t, err) messages := enc.Build() require.Len(t, messages, 1) + dec, err := NewDecoder(ctx, codecConfig, &sql.DB{}) + require.NoError(t, err) + err = dec.AddKeyValue(messages[0].Key, messages[0].Value) require.NoError(t, err) - messageType, hasNext, err = dec.HasNext() + messageType, hasNext, err := dec.HasNext() require.NoError(t, err) require.True(t, hasNext) require.Equal(t, model.MessageTypeRow, messageType) - require.NotEqual(t, 0, dec.msg.BuildTs) - - decodedRow, err := dec.NextRowChangedEvent() - require.NoError(t, err) - require.Equal(t, decodedRow.CommitTs, row.CommitTs) - require.Equal(t, decodedRow.Table.Schema, row.Table.Schema) - require.Equal(t, decodedRow.Table.Table, row.Table.Table) - require.Nil(t, decodedRow.PreColumns) - helper.Tk().MustExec("drop table test.t") + for _, col := range updateEvent.Columns { + if col.Flag.IsHandleKey() { + require.Contains(t, dec.msg.Data, col.Name) + } else { + require.NotContains(t, dec.msg.Data, col.Name) + } + } + for _, col := range updateEvent.PreColumns { + if col.Flag.IsHandleKey() { + require.Contains(t, dec.msg.Old, col.Name) + } else { + require.NotContains(t, dec.msg.Old, col.Name) + } + } } } diff --git a/pkg/sink/codec/simple/message.go b/pkg/sink/codec/simple/message.go index 03a40751a84..fbf0a5844d6 100644 --- a/pkg/sink/codec/simple/message.go +++ b/pkg/sink/codec/simple/message.go @@ -324,6 +324,9 @@ type message struct { // ClaimCheckLocation is only for the DML event. ClaimCheckLocation string `json:"claimCheckLocation,omitempty"` + + // HandleKeyOnly is only for the DML event. + HandleKeyOnly bool `json:"handleKeyOnly,omitempty"` } func newResolvedMessage(ts uint64) *message { @@ -512,49 +515,55 @@ func decodeColumn(name string, value interface{}, fieldType *types.FieldType) (* return result, nil } + var err error switch fieldType.GetType() { case mysql.TypeBit: - val, err := strconv.ParseUint(data, 10, 64) + value, err = strconv.ParseUint(data, 10, 64) if err != nil { log.Panic("invalid column value for bit or set", zap.String("name", name), zap.Any("data", data), zap.Any("type", fieldType.GetType()), zap.Error(err)) } - result.Value = val - case mysql.TypeTiny, mysql.TypeShort, mysql.TypeLong, mysql.TypeInt24: - val, err := strconv.ParseInt(data, 10, 64) + case mysql.TypeTiny, mysql.TypeShort, mysql.TypeLong, mysql.TypeInt24, mysql.TypeYear: + value, err = strconv.ParseInt(data, 10, 64) if err != nil { return nil, cerror.WrapError(cerror.ErrDecodeFailed, err) } - result.Value = val + case mysql.TypeLonglong: + value, err = strconv.ParseInt(data, 10, 64) + if err != nil { + value, err = strconv.ParseUint(data, 10, 64) + if err != nil { + return nil, cerror.WrapError(cerror.ErrDecodeFailed, err) + } + } case mysql.TypeFloat: - val, err := strconv.ParseFloat(data, 32) + value, err = strconv.ParseFloat(data, 32) if err != nil { return nil, cerror.WrapError(cerror.ErrDecodeFailed, err) } - result.Value = val case mysql.TypeDouble: - val, err := strconv.ParseFloat(data, 64) + value, err = strconv.ParseFloat(data, 64) if err != nil { return nil, cerror.WrapError(cerror.ErrDecodeFailed, err) } - result.Value = val case mysql.TypeEnum: element := fieldType.GetElems() enumVar, err := tiTypes.ParseEnumName(element, data, fieldType.GetCharset()) if err != nil { return nil, cerror.WrapError(cerror.ErrDecodeFailed, err) } - result.Value = enumVar.Value + value = enumVar.Value case mysql.TypeSet: elements := fieldType.GetElems() setVar, err := tiTypes.ParseSetName(elements, data, fieldType.GetCharset()) if err != nil { return nil, cerror.WrapError(cerror.ErrDecodeFailed, err) } - result.Value = setVar.Value + value = setVar.Value default: } + result.Value = value return result, nil } diff --git a/pkg/sink/codec/utils/test_utils.go b/pkg/sink/codec/utils/test_utils.go index a1f93fdb392..d458175e8c2 100644 --- a/pkg/sink/codec/utils/test_utils.go +++ b/pkg/sink/codec/utils/test_utils.go @@ -21,7 +21,7 @@ import ( ) // NewLargeEvent4Test creates large events for test -func NewLargeEvent4Test(t *testing.T) (*model.RowChangedEvent, *model.RowChangedEvent, *model.RowChangedEvent) { +func NewLargeEvent4Test(t *testing.T) (*model.DDLEvent, *model.RowChangedEvent, *model.RowChangedEvent, *model.RowChangedEvent) { helper := entry.NewSchemaTestHelper(t) defer helper.Close() @@ -78,7 +78,7 @@ func NewLargeEvent4Test(t *testing.T) (*model.RowChangedEvent, *model.RowChanged setT set('a', 'b', 'c'), bitT bit(10), jsonT json)` - _ = helper.DDL2Event(sql) + ddlEvent := helper.DDL2Event(sql) sql = `insert into test.t values( 127, @@ -142,5 +142,5 @@ func NewLargeEvent4Test(t *testing.T) (*model.RowChangedEvent, *model.RowChanged deleteE.PreColumns = deleteE.Columns deleteE.Columns = nil - return insert, &update, &deleteE + return ddlEvent, insert, &update, &deleteE } From b361329870883e2cd945839f757d397f16d6069e Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Fri, 8 Dec 2023 16:46:55 +0800 Subject: [PATCH 06/24] add more unit test. --- cmd/kafka-consumer/main.go | 2 +- .../conf/changefeed.toml | 4 + .../conf/diff_config.toml | 29 +++++ .../kafka_simple_claim_check/data/data.sql | 100 ++++++++++++++++++ .../kafka_simple_claim_check/run.sh | 48 +++++++++ .../conf/changefeed.toml | 3 + .../conf/diff_config.toml | 29 +++++ .../data/data.sql | 100 ++++++++++++++++++ .../kafka_simple_handle_key_only/run.sh | 48 +++++++++ tests/integration_tests/run_group.sh | 2 +- 10 files changed, 363 insertions(+), 2 deletions(-) create mode 100644 tests/integration_tests/kafka_simple_claim_check/conf/changefeed.toml create mode 100644 tests/integration_tests/kafka_simple_claim_check/conf/diff_config.toml create mode 100644 tests/integration_tests/kafka_simple_claim_check/data/data.sql create mode 100644 tests/integration_tests/kafka_simple_claim_check/run.sh create mode 100644 tests/integration_tests/kafka_simple_handle_key_only/conf/changefeed.toml create mode 100644 tests/integration_tests/kafka_simple_handle_key_only/conf/diff_config.toml create mode 100644 tests/integration_tests/kafka_simple_handle_key_only/data/data.sql create mode 100644 tests/integration_tests/kafka_simple_handle_key_only/run.sh diff --git a/cmd/kafka-consumer/main.go b/cmd/kafka-consumer/main.go index 57e289bb882..0f787362646 100644 --- a/cmd/kafka-consumer/main.go +++ b/cmd/kafka-consumer/main.go @@ -630,7 +630,7 @@ func (c *Consumer) ConsumeClaim(session sarama.ConsumerGroupSession, claim saram } decoder = avro.NewDecoder(c.codecConfig, schemaM, c.option.topic, c.tz) case config.ProtocolSimple: - decoder = simple.NewDecoder() + decoder, err = simple.NewDecoder(ctx, c.codecConfig, c.upstreamTiDB) default: log.Panic("Protocol not supported", zap.Any("Protocol", c.codecConfig.Protocol)) } diff --git a/tests/integration_tests/kafka_simple_claim_check/conf/changefeed.toml b/tests/integration_tests/kafka_simple_claim_check/conf/changefeed.toml new file mode 100644 index 00000000000..296d4b7522c --- /dev/null +++ b/tests/integration_tests/kafka_simple_claim_check/conf/changefeed.toml @@ -0,0 +1,4 @@ +[sink.kafka-config.large-message-handle] +large-message-handle-compression = "snappy" +large-message-handle-option = "claim-check" +claim-check-storage-uri = "file:///tmp/kafka-simple-claim-check" diff --git a/tests/integration_tests/kafka_simple_claim_check/conf/diff_config.toml b/tests/integration_tests/kafka_simple_claim_check/conf/diff_config.toml new file mode 100644 index 00000000000..13480c80b3e --- /dev/null +++ b/tests/integration_tests/kafka_simple_claim_check/conf/diff_config.toml @@ -0,0 +1,29 @@ +# diff Configuration. + +check-thread-count = 4 + +export-fix-sql = true + +check-struct-only = false + +[task] +output-dir = "/tmp/tidb_cdc_test/kafka_simple_claim_check/output" + +source-instances = ["mysql1"] + +target-instance = "tidb0" + +target-check-tables = ["test.?*"] + +[data-sources] +[data-sources.mysql1] +host = "127.0.0.1" +port = 4000 +user = "root" +password = "" + +[data-sources.tidb0] +host = "127.0.0.1" +port = 3306 +user = "root" +password = "" diff --git a/tests/integration_tests/kafka_simple_claim_check/data/data.sql b/tests/integration_tests/kafka_simple_claim_check/data/data.sql new file mode 100644 index 00000000000..2977b9aa123 --- /dev/null +++ b/tests/integration_tests/kafka_simple_claim_check/data/data.sql @@ -0,0 +1,100 @@ +drop database if exists test; +create database test; +use test; + +create table t ( + id int primary key auto_increment, + + c_tinyint tinyint null, + c_smallint smallint null, + c_mediumint mediumint null, + c_int int null, + c_bigint bigint null, + + c_unsigned_tinyint tinyint unsigned null, + c_unsigned_smallint smallint unsigned null, + c_unsigned_mediumint mediumint unsigned null, + c_unsigned_int int unsigned null, + c_unsigned_bigint bigint unsigned null, + + c_float float null, + c_double double null, + c_decimal decimal null, + c_decimal_2 decimal(10, 4) null, + + c_unsigned_float float unsigned null, + c_unsigned_double double unsigned null, + c_unsigned_decimal decimal unsigned null, + c_unsigned_decimal_2 decimal(10, 4) unsigned null, + + c_date date null, + c_datetime datetime null, + c_timestamp timestamp null, + c_time time null, + c_year year null, + + c_tinytext tinytext null, + c_text text null, + c_mediumtext mediumtext null, + c_longtext longtext null, + + c_tinyblob tinyblob null, + c_blob blob null, + c_mediumblob mediumblob null, + c_longblob longblob null, + + c_char char(16) null, + c_varchar varchar(16) null, + c_binary binary(16) null, + c_varbinary varbinary(16) null, + + c_enum enum ('a','b','c') null, + c_set set ('a','b','c') null, + c_bit bit(64) null, + c_json json null +); + +insert into t values ( + 1, + 1, 2, 3, 4, 5, + 1, 2, 3, 4, 5, + 2020.0202, 2020.0303, 2020.0404, 2021.1208, + 3.1415, 2.7182, 8000, 179394.233, + '2020-02-20', '2020-02-20 02:20:20', '2020-02-20 02:20:20', '02:20:20', '2020', + '89504E470D0A1A0A', '89504E470D0A1A0A', '89504E470D0A1A0A', '89504E470D0A1A0A', + x'89504E470D0A1A0A', x'89504E470D0A1A0A', x'89504E470D0A1A0A', x'89504E470D0A1A0A', + '89504E470D0A1A0A', '89504E470D0A1A0A', x'89504E470D0A1A0A', x'89504E470D0A1A0A', + 'b', 'b,c', b'1000001', '{ + "key1": "value1", + "key2": "value2", + "key3": "123" + }' +); + +update t set c_float = 3.1415, c_double = 2.7182, c_decimal = 8000, c_decimal_2 = 179394.233 where id = 1; + +delete from t where id = 1; + +insert into t values ( + 2, + 1, 2, 3, 4, 5, + 1, 2, 3, 4, 5, + 2020.0202, 2020.0303, 2020.0404, 2021.1208, + 3.1415, 2.7182, 8000, 179394.233, + '2020-02-20', '2020-02-20 02:20:20', '2020-02-20 02:20:20', '02:20:20', '2020', + '89504E470D0A1A0A', '89504E470D0A1A0A', '89504E470D0A1A0A', '89504E470D0A1A0A', + x'89504E470D0A1A0A', x'89504E470D0A1A0A', x'89504E470D0A1A0A', x'89504E470D0A1A0A', + '89504E470D0A1A0A', '89504E470D0A1A0A', x'89504E470D0A1A0A', x'89504E470D0A1A0A', + 'b', 'b,c', b'1000001', '{ + "key1": "value1", + "key2": "value2", + "key3": "123" + }' +); + +update t set c_float = 3.1415, c_double = 2.7182, c_decimal = 8000, c_decimal_2 = 179394.233 where id = 2; + +create table finish_mark +( + id int PRIMARY KEY +); diff --git a/tests/integration_tests/kafka_simple_claim_check/run.sh b/tests/integration_tests/kafka_simple_claim_check/run.sh new file mode 100644 index 00000000000..038c4e147ea --- /dev/null +++ b/tests/integration_tests/kafka_simple_claim_check/run.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +set -e + +CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +source $CUR/../_utils/test_prepare +WORK_DIR=$OUT_DIR/$TEST_NAME +CDC_BINARY=cdc.test +SINK_TYPE=$1 + +# use kafka-consumer with canal-json decoder to sync data from kafka to mysql +function run() { + if [ "$SINK_TYPE" != "kafka" ]; then + return + fi + + rm -rf $WORK_DIR && mkdir -p $WORK_DIR + + start_tidb_cluster --workdir $WORK_DIR + + cd $WORK_DIR + + TOPIC_NAME="kafka-simple-claim-check" + + # record tso before we create tables to skip the system table DDLs + start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1}) + + run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY + + SINK_URI="kafka://127.0.0.1:9092/$TOPIC_NAME?protocol=simple&max-message-bytes=500" + + run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --config="$CUR/conf/changefeed.toml" + + cdc_kafka_consumer --upstream-uri $SINK_URI --downstream-uri="mysql://root@127.0.0.1:3306/?safe-mode=true&batch-dml-enable=false" --upstream-tidb-dsn="root@tcp(${UP_TIDB_HOST}:${UP_TIDB_PORT})/?" --config="$CUR/conf/changefeed.toml" 2>&1 & + + run_sql_file $CUR/data/data.sql ${UP_TIDB_HOST} ${UP_TIDB_PORT} + + # sync_diff can't check non-exist table, so we check expected tables are created in downstream first + check_table_exists test.finish_mark ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} 200 + check_sync_diff $WORK_DIR $CUR/conf/diff_config.toml + + cleanup_process $CDC_BINARY +} + +trap stop_tidb_cluster EXIT +run $* +check_logs $WORK_DIR +echo "[$(date)] <<<<<< run test case $TEST_NAME success! >>>>>>" diff --git a/tests/integration_tests/kafka_simple_handle_key_only/conf/changefeed.toml b/tests/integration_tests/kafka_simple_handle_key_only/conf/changefeed.toml new file mode 100644 index 00000000000..1a68fb078e4 --- /dev/null +++ b/tests/integration_tests/kafka_simple_handle_key_only/conf/changefeed.toml @@ -0,0 +1,3 @@ +[sink.kafka-config.large-message-handle] +large-message-handle-option = "handle-key-only" +large-message-handle-compression = "snappy" diff --git a/tests/integration_tests/kafka_simple_handle_key_only/conf/diff_config.toml b/tests/integration_tests/kafka_simple_handle_key_only/conf/diff_config.toml new file mode 100644 index 00000000000..7b35b451c52 --- /dev/null +++ b/tests/integration_tests/kafka_simple_handle_key_only/conf/diff_config.toml @@ -0,0 +1,29 @@ +# diff Configuration. + +check-thread-count = 4 + +export-fix-sql = true + +check-struct-only = false + +[task] +output-dir = "/tmp/tidb_cdc_test/simple_handle_key_only/output" + +source-instances = ["mysql1"] + +target-instance = "tidb0" + +target-check-tables = ["test.?*"] + +[data-sources] +[data-sources.mysql1] +host = "127.0.0.1" +port = 4000 +user = "root" +password = "" + +[data-sources.tidb0] +host = "127.0.0.1" +port = 3306 +user = "root" +password = "" diff --git a/tests/integration_tests/kafka_simple_handle_key_only/data/data.sql b/tests/integration_tests/kafka_simple_handle_key_only/data/data.sql new file mode 100644 index 00000000000..2977b9aa123 --- /dev/null +++ b/tests/integration_tests/kafka_simple_handle_key_only/data/data.sql @@ -0,0 +1,100 @@ +drop database if exists test; +create database test; +use test; + +create table t ( + id int primary key auto_increment, + + c_tinyint tinyint null, + c_smallint smallint null, + c_mediumint mediumint null, + c_int int null, + c_bigint bigint null, + + c_unsigned_tinyint tinyint unsigned null, + c_unsigned_smallint smallint unsigned null, + c_unsigned_mediumint mediumint unsigned null, + c_unsigned_int int unsigned null, + c_unsigned_bigint bigint unsigned null, + + c_float float null, + c_double double null, + c_decimal decimal null, + c_decimal_2 decimal(10, 4) null, + + c_unsigned_float float unsigned null, + c_unsigned_double double unsigned null, + c_unsigned_decimal decimal unsigned null, + c_unsigned_decimal_2 decimal(10, 4) unsigned null, + + c_date date null, + c_datetime datetime null, + c_timestamp timestamp null, + c_time time null, + c_year year null, + + c_tinytext tinytext null, + c_text text null, + c_mediumtext mediumtext null, + c_longtext longtext null, + + c_tinyblob tinyblob null, + c_blob blob null, + c_mediumblob mediumblob null, + c_longblob longblob null, + + c_char char(16) null, + c_varchar varchar(16) null, + c_binary binary(16) null, + c_varbinary varbinary(16) null, + + c_enum enum ('a','b','c') null, + c_set set ('a','b','c') null, + c_bit bit(64) null, + c_json json null +); + +insert into t values ( + 1, + 1, 2, 3, 4, 5, + 1, 2, 3, 4, 5, + 2020.0202, 2020.0303, 2020.0404, 2021.1208, + 3.1415, 2.7182, 8000, 179394.233, + '2020-02-20', '2020-02-20 02:20:20', '2020-02-20 02:20:20', '02:20:20', '2020', + '89504E470D0A1A0A', '89504E470D0A1A0A', '89504E470D0A1A0A', '89504E470D0A1A0A', + x'89504E470D0A1A0A', x'89504E470D0A1A0A', x'89504E470D0A1A0A', x'89504E470D0A1A0A', + '89504E470D0A1A0A', '89504E470D0A1A0A', x'89504E470D0A1A0A', x'89504E470D0A1A0A', + 'b', 'b,c', b'1000001', '{ + "key1": "value1", + "key2": "value2", + "key3": "123" + }' +); + +update t set c_float = 3.1415, c_double = 2.7182, c_decimal = 8000, c_decimal_2 = 179394.233 where id = 1; + +delete from t where id = 1; + +insert into t values ( + 2, + 1, 2, 3, 4, 5, + 1, 2, 3, 4, 5, + 2020.0202, 2020.0303, 2020.0404, 2021.1208, + 3.1415, 2.7182, 8000, 179394.233, + '2020-02-20', '2020-02-20 02:20:20', '2020-02-20 02:20:20', '02:20:20', '2020', + '89504E470D0A1A0A', '89504E470D0A1A0A', '89504E470D0A1A0A', '89504E470D0A1A0A', + x'89504E470D0A1A0A', x'89504E470D0A1A0A', x'89504E470D0A1A0A', x'89504E470D0A1A0A', + '89504E470D0A1A0A', '89504E470D0A1A0A', x'89504E470D0A1A0A', x'89504E470D0A1A0A', + 'b', 'b,c', b'1000001', '{ + "key1": "value1", + "key2": "value2", + "key3": "123" + }' +); + +update t set c_float = 3.1415, c_double = 2.7182, c_decimal = 8000, c_decimal_2 = 179394.233 where id = 2; + +create table finish_mark +( + id int PRIMARY KEY +); diff --git a/tests/integration_tests/kafka_simple_handle_key_only/run.sh b/tests/integration_tests/kafka_simple_handle_key_only/run.sh new file mode 100644 index 00000000000..7208687275c --- /dev/null +++ b/tests/integration_tests/kafka_simple_handle_key_only/run.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +set -e + +CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +source $CUR/../_utils/test_prepare +WORK_DIR=$OUT_DIR/$TEST_NAME +CDC_BINARY=cdc.test +SINK_TYPE=$1 + +# use kafka-consumer with canal-json decoder to sync data from kafka to mysql +function run() { + if [ "$SINK_TYPE" != "kafka" ]; then + return + fi + + rm -rf $WORK_DIR && mkdir -p $WORK_DIR + + start_tidb_cluster --workdir $WORK_DIR + + cd $WORK_DIR + + TOPIC_NAME="simple-handle-key-only" + + # record tso before we create tables to skip the system table DDLs + start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1}) + + run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY + + SINK_URI="kafka://127.0.0.1:9092/$TOPIC_NAME?protocol=simple&max-message-bytes=500" + + run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --config="$CUR/conf/changefeed.toml" + + cdc_kafka_consumer --upstream-uri $SINK_URI --downstream-uri="mysql://root@127.0.0.1:3306/?safe-mode=true&batch-dml-enable=false" --upstream-tidb-dsn="root@tcp(${UP_TIDB_HOST}:${UP_TIDB_PORT})/?" --config="$CUR/conf/changefeed.toml" 2>&1 & + + run_sql_file $CUR/data/data.sql ${UP_TIDB_HOST} ${UP_TIDB_PORT} + + # sync_diff can't check non-exist table, so we check expected tables are created in downstream first + check_table_exists test.finish_mark ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} 200 + check_sync_diff $WORK_DIR $CUR/conf/diff_config.toml + + cleanup_process $CDC_BINARY +} + +trap stop_tidb_cluster EXIT +run $* +check_logs $WORK_DIR +echo "[$(date)] <<<<<< run test case $TEST_NAME success! >>>>>>" diff --git a/tests/integration_tests/run_group.sh b/tests/integration_tests/run_group.sh index ba16bf6ec70..fbb8175f790 100755 --- a/tests/integration_tests/run_group.sh +++ b/tests/integration_tests/run_group.sh @@ -15,7 +15,7 @@ mysql_only_http="http_api http_api_tls api_v2" mysql_only_consistent_replicate="consistent_replicate_ddl consistent_replicate_gbk consistent_replicate_nfs consistent_replicate_storage_file consistent_replicate_storage_file_large_value consistent_replicate_storage_s3 consistent_partition_table" kafka_only="kafka_big_messages kafka_compression kafka_messages kafka_sink_error_resume mq_sink_lost_callback mq_sink_dispatcher kafka_column_selector kafka_column_selector_avro" -kafka_only_protocol="kafka_simple_basic canal_json_adapter_compatibility canal_json_basic canal_json_content_compatible multi_topics avro_basic canal_json_handle_key_only open_protocol_handle_key_only canal_json_claim_check open_protocol_claim_check" +kafka_only_protocol="kafka_simple_basic kafka_simple_handle_key_only kafka_simple_claim_check canal_json_adapter_compatibility canal_json_basic canal_json_content_compatible multi_topics avro_basic canal_json_handle_key_only open_protocol_handle_key_only canal_json_claim_check open_protocol_claim_check" kafka_only_v2="kafka_big_txn_v2 kafka_big_messages_v2 multi_tables_ddl_v2 multi_topics_v2" storage_only="lossy_ddl storage_csv_update" From 58a4a5b27251885bad1ad0d97772260d5e497f19 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Fri, 8 Dec 2023 17:48:39 +0800 Subject: [PATCH 07/24] simple support encode large message handle . --- pkg/config/large_message.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/config/large_message.go b/pkg/config/large_message.go index 83d6d255853..f0a7ae76695 100644 --- a/pkg/config/large_message.go +++ b/pkg/config/large_message.go @@ -62,7 +62,7 @@ func (c *LargeMessageHandleConfig) AdjustAndValidate(protocol Protocol, enableTi } switch protocol { - case ProtocolOpen: + case ProtocolOpen, ProtocolSimple: case ProtocolCanalJSON: if !enableTiDBExtension { return cerror.ErrInvalidReplicaConfig.GenWithStack( From 842b94199fb853e12595cbf638ea775c928247ba Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Mon, 11 Dec 2023 11:16:59 +0800 Subject: [PATCH 08/24] adjust log. --- pkg/sink/codec/simple/encoder.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/sink/codec/simple/encoder.go b/pkg/sink/codec/simple/encoder.go index f8ea8bdce76..52a8d870e56 100644 --- a/pkg/sink/codec/simple/encoder.go +++ b/pkg/sink/codec/simple/encoder.go @@ -154,7 +154,7 @@ func (e *encoder) EncodeDDLEvent(event *model.DDLEvent) (*common.Message, error) result := common.NewDDLMsg(config.ProtocolSimple, nil, value, event) if result.Length() > e.config.MaxMessageBytes { - if e.config.LargeMessageHandle.Disabled() { + if !e.config.LargeMessageHandle.EnableClaimCheck() { log.Error("DDL message is too large for simple", zap.Int("maxMessageBytes", e.config.MaxMessageBytes), zap.Int("length", result.Length()), From 031f5974f82f62f54cde01ab784e930438029a4a Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Mon, 11 Dec 2023 15:31:37 +0800 Subject: [PATCH 09/24] fix integration test. --- pkg/sink/codec/simple/encoder.go | 99 ++++++++++--------- pkg/sink/codec/simple/encoder_test.go | 17 ++-- pkg/sink/codec/simple/message.go | 1 + .../kafka_simple_claim_check/data/data.sql | 56 ----------- .../kafka_simple_claim_check/data/ddl.sql | 55 +++++++++++ .../kafka_simple_claim_check/run.sh | 13 ++- .../conf/changefeed.toml | 2 +- .../data/data.sql | 56 ----------- .../kafka_simple_handle_key_only/data/ddl.sql | 55 +++++++++++ .../kafka_simple_handle_key_only/run.sh | 13 ++- 10 files changed, 195 insertions(+), 172 deletions(-) create mode 100644 tests/integration_tests/kafka_simple_claim_check/data/ddl.sql create mode 100644 tests/integration_tests/kafka_simple_handle_key_only/data/ddl.sql diff --git a/pkg/sink/codec/simple/encoder.go b/pkg/sink/codec/simple/encoder.go index 52a8d870e56..6432086a7c0 100644 --- a/pkg/sink/codec/simple/encoder.go +++ b/pkg/sink/codec/simple/encoder.go @@ -63,54 +63,62 @@ func (e *encoder) AppendRowChangedEvent( Protocol: config.ProtocolSimple, Callback: callback, } + + log.Info("simple encode DML message", zap.Int("length", result.Length())) + result.IncRowsCount() - if result.Length() > e.config.MaxMessageBytes { - if e.config.LargeMessageHandle.Disabled() { - log.Error("Single message is too large for simple", - zap.Int("maxMessageBytes", e.config.MaxMessageBytes), - zap.Int("length", result.Length()), - zap.Any("table", event.Table)) - return cerror.ErrMessageTooLarge.GenWithStackByArgs() - } + if result.Length() <= e.config.MaxMessageBytes { + e.messages = append(e.messages, result) + return nil + } - m, err = newDMLMessage(event, true) - if err != nil { - return err - } + if e.config.LargeMessageHandle.Disabled() { + log.Error("Single message is too large for simple", + zap.Int("maxMessageBytes", e.config.MaxMessageBytes), + zap.Int("length", result.Length()), + zap.Any("table", event.Table)) + return cerror.ErrMessageTooLarge.GenWithStackByArgs() + } - if e.config.LargeMessageHandle.EnableClaimCheck() { - fileName := claimcheck.NewFileName() - if err = e.claimCheck.WriteMessage(ctx, result.Key, result.Value, fileName); err != nil { - return errors.Trace(err) - } - m.ClaimCheckLocation = e.claimCheck.FileNameWithPrefix(fileName) - } - value, err = json.Marshal(m) - if err != nil { - return cerror.WrapError(cerror.ErrEncodeFailed, err) - } - value, err = common.Compress(e.config.ChangefeedID, - e.config.LargeMessageHandle.LargeMessageHandleCompression, value) - if err != nil { - return err - } - result.Value = value - if result.Length() > e.config.MaxMessageBytes { - log.Error("Single message is still too large for simple", - zap.Int("maxMessageBytes", e.config.MaxMessageBytes), - zap.Int("length", result.Length()), - zap.Any("table", event.Table)) - return cerror.ErrMessageTooLarge.GenWithStackByArgs() + m, err = newDMLMessage(event, true) + if err != nil { + return err + } + + if e.config.LargeMessageHandle.EnableClaimCheck() { + fileName := claimcheck.NewFileName() + m.ClaimCheckLocation = e.claimCheck.FileNameWithPrefix(fileName) + if err = e.claimCheck.WriteMessage(ctx, result.Key, result.Value, fileName); err != nil { + return errors.Trace(err) } - log.Warn("Single message is too large for simple", + } + + value, err = json.Marshal(m) + if err != nil { + return cerror.WrapError(cerror.ErrEncodeFailed, err) + } + value, err = common.Compress(e.config.ChangefeedID, + e.config.LargeMessageHandle.LargeMessageHandleCompression, value) + if err != nil { + return err + } + result.Value = value + + if result.Length() <= e.config.MaxMessageBytes { + log.Warn("Single message is too large for simple, only encode handle key columns", zap.Int("maxMessageBytes", e.config.MaxMessageBytes), zap.Int("originLength", result.Length()), zap.Int("length", result.Length()), zap.Any("table", event.Table)) + e.messages = append(e.messages, result) + return nil } - e.messages = append(e.messages, result) - return nil + log.Error("Single message is still too large for simple after only encode handle key columns", + zap.Int("maxMessageBytes", e.config.MaxMessageBytes), + zap.Int("length", result.Length()), + zap.Any("table", event.Table)) + return cerror.ErrMessageTooLarge.GenWithStackByArgs() } // Build implement the RowEventEncoder interface @@ -151,16 +159,15 @@ func (e *encoder) EncodeDDLEvent(event *model.DDLEvent) (*common.Message, error) if err != nil { return nil, err } - result := common.NewDDLMsg(config.ProtocolSimple, nil, value, event) + + log.Info("simple encode DDL message", zap.Int("length", result.Length())) if result.Length() > e.config.MaxMessageBytes { - if !e.config.LargeMessageHandle.EnableClaimCheck() { - log.Error("DDL message is too large for simple", - zap.Int("maxMessageBytes", e.config.MaxMessageBytes), - zap.Int("length", result.Length()), - zap.Any("table", event.TableInfo.TableName)) - return nil, cerror.ErrMessageTooLarge.GenWithStackByArgs() - } + log.Error("DDL message is too large for simple", + zap.Int("maxMessageBytes", e.config.MaxMessageBytes), + zap.Int("length", result.Length()), + zap.Any("table", event.TableInfo.TableName)) + return nil, cerror.ErrMessageTooLarge.GenWithStackByArgs() } return result, nil } diff --git a/pkg/sink/codec/simple/encoder_test.go b/pkg/sink/codec/simple/encoder_test.go index 8efdfdd29e5..5ce387e35b9 100644 --- a/pkg/sink/codec/simple/encoder_test.go +++ b/pkg/sink/codec/simple/encoder_test.go @@ -87,6 +87,7 @@ func TestEncodeDDLEvent(t *testing.T) { gender enum('male', 'female'), email varchar(255) not null, key idx_name_email(name, email))` + ddlEvent := helper.DDL2Event(sql) m, err := enc.EncodeDDLEvent(ddlEvent) @@ -390,26 +391,24 @@ func TestLargerMessageHandleClaimCheck(t *testing.T) { ddlEvent, _, updateEvent, _ := utils.NewLargeEvent4Test(t) ctx := context.Background() - codecConfig := common.NewConfig(config.ProtocolSimple) - codecConfig.MaxMessageBytes = 500 - codecConfig.LargeMessageHandle.LargeMessageHandleOption = config.LargeMessageHandleOptionClaimCheck - codecConfig.LargeMessageHandle.ClaimCheckStorageURI = "file:///tmp/simple-claim-check" - for _, compressionType := range []string{ compression.None, compression.Snappy, compression.LZ4, } { + codecConfig := common.NewConfig(config.ProtocolSimple) + codecConfig.LargeMessageHandle.LargeMessageHandleOption = config.LargeMessageHandleOptionClaimCheck + codecConfig.LargeMessageHandle.ClaimCheckStorageURI = "file:///tmp/simple-claim-check" codecConfig.LargeMessageHandle.LargeMessageHandleCompression = compressionType builder, err := NewBuilder(ctx, codecConfig) require.NoError(t, err) enc := builder.Build() - dec, err := NewDecoder(ctx, codecConfig, nil) + m, err := enc.EncodeDDLEvent(ddlEvent) require.NoError(t, err) - m, err := enc.EncodeDDLEvent(ddlEvent) + dec, err := NewDecoder(ctx, codecConfig, nil) require.NoError(t, err) err = dec.AddKeyValue(m.Key, m.Value) @@ -423,11 +422,13 @@ func TestLargerMessageHandleClaimCheck(t *testing.T) { _, err = dec.NextDDLEvent() require.NoError(t, err) + enc.(*encoder).config.MaxMessageBytes = 500 err = enc.AppendRowChangedEvent(ctx, "", updateEvent, func() {}) require.NoError(t, err) claimCheckLocationM := enc.Build()[0] + dec.config.MaxMessageBytes = 500 err = dec.AddKeyValue(claimCheckLocationM.Key, claimCheckLocationM.Value) require.NoError(t, err) @@ -435,6 +436,7 @@ func TestLargerMessageHandleClaimCheck(t *testing.T) { require.NoError(t, err) require.True(t, hasNext) require.Equal(t, model.MessageTypeRow, messageType) + require.NotEqual(t, "", dec.msg.ClaimCheckLocation) decodedRow, err := dec.NextRowChangedEvent() require.NoError(t, err) @@ -504,6 +506,7 @@ func TestLargeMessageHandleKeyOnly(t *testing.T) { require.NoError(t, err) require.True(t, hasNext) require.Equal(t, model.MessageTypeRow, messageType) + require.True(t, dec.msg.HandleKeyOnly) for _, col := range updateEvent.Columns { if col.Flag.IsHandleKey() { diff --git a/pkg/sink/codec/simple/message.go b/pkg/sink/codec/simple/message.go index fbf0a5844d6..23985bd400a 100644 --- a/pkg/sink/codec/simple/message.go +++ b/pkg/sink/codec/simple/message.go @@ -382,6 +382,7 @@ func newDMLMessage( CommitTs: event.CommitTs, BuildTs: time.Now().UnixMilli(), SchemaVersion: event.TableInfo.UpdateTS, + HandleKeyOnly: onlyHandleKey, } var err error if event.IsInsert() { diff --git a/tests/integration_tests/kafka_simple_claim_check/data/data.sql b/tests/integration_tests/kafka_simple_claim_check/data/data.sql index 2977b9aa123..3a92a4a92b0 100644 --- a/tests/integration_tests/kafka_simple_claim_check/data/data.sql +++ b/tests/integration_tests/kafka_simple_claim_check/data/data.sql @@ -1,59 +1,3 @@ -drop database if exists test; -create database test; -use test; - -create table t ( - id int primary key auto_increment, - - c_tinyint tinyint null, - c_smallint smallint null, - c_mediumint mediumint null, - c_int int null, - c_bigint bigint null, - - c_unsigned_tinyint tinyint unsigned null, - c_unsigned_smallint smallint unsigned null, - c_unsigned_mediumint mediumint unsigned null, - c_unsigned_int int unsigned null, - c_unsigned_bigint bigint unsigned null, - - c_float float null, - c_double double null, - c_decimal decimal null, - c_decimal_2 decimal(10, 4) null, - - c_unsigned_float float unsigned null, - c_unsigned_double double unsigned null, - c_unsigned_decimal decimal unsigned null, - c_unsigned_decimal_2 decimal(10, 4) unsigned null, - - c_date date null, - c_datetime datetime null, - c_timestamp timestamp null, - c_time time null, - c_year year null, - - c_tinytext tinytext null, - c_text text null, - c_mediumtext mediumtext null, - c_longtext longtext null, - - c_tinyblob tinyblob null, - c_blob blob null, - c_mediumblob mediumblob null, - c_longblob longblob null, - - c_char char(16) null, - c_varchar varchar(16) null, - c_binary binary(16) null, - c_varbinary varbinary(16) null, - - c_enum enum ('a','b','c') null, - c_set set ('a','b','c') null, - c_bit bit(64) null, - c_json json null -); - insert into t values ( 1, 1, 2, 3, 4, 5, diff --git a/tests/integration_tests/kafka_simple_claim_check/data/ddl.sql b/tests/integration_tests/kafka_simple_claim_check/data/ddl.sql new file mode 100644 index 00000000000..cd062fe8ce4 --- /dev/null +++ b/tests/integration_tests/kafka_simple_claim_check/data/ddl.sql @@ -0,0 +1,55 @@ +drop database if exists test; +create database test; +use test; + +create table t ( + id int primary key auto_increment, + + c_tinyint tinyint null, + c_smallint smallint null, + c_mediumint mediumint null, + c_int int null, + c_bigint bigint null, + + c_unsigned_tinyint tinyint unsigned null, + c_unsigned_smallint smallint unsigned null, + c_unsigned_mediumint mediumint unsigned null, + c_unsigned_int int unsigned null, + c_unsigned_bigint bigint unsigned null, + + c_float float null, + c_double double null, + c_decimal decimal null, + c_decimal_2 decimal(10, 4) null, + + c_unsigned_float float unsigned null, + c_unsigned_double double unsigned null, + c_unsigned_decimal decimal unsigned null, + c_unsigned_decimal_2 decimal(10, 4) unsigned null, + + c_date date null, + c_datetime datetime null, + c_timestamp timestamp null, + c_time time null, + c_year year null, + + c_tinytext tinytext null, + c_text text null, + c_mediumtext mediumtext null, + c_longtext longtext null, + + c_tinyblob tinyblob null, + c_blob blob null, + c_mediumblob mediumblob null, + c_longblob longblob null, + + c_char char(16) null, + c_varchar varchar(16) null, + c_binary binary(16) null, + c_varbinary varbinary(16) null, + + c_enum enum ('a','b','c') null, + c_set set ('a','b','c') null, + c_bit bit(64) null, + c_json json null +); diff --git a/tests/integration_tests/kafka_simple_claim_check/run.sh b/tests/integration_tests/kafka_simple_claim_check/run.sh index 038c4e147ea..9b92cf754c4 100644 --- a/tests/integration_tests/kafka_simple_claim_check/run.sh +++ b/tests/integration_tests/kafka_simple_claim_check/run.sh @@ -19,17 +19,24 @@ function run() { start_tidb_cluster --workdir $WORK_DIR cd $WORK_DIR + run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY TOPIC_NAME="kafka-simple-claim-check" # record tso before we create tables to skip the system table DDLs start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1}) - run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY + changefeed_id="kafka-simple-claim-check" + SINK_URI="kafka://127.0.0.1:9092/$TOPIC_NAME?protocol=simple" + run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" -c ${changefeed_id} --config="$CUR/conf/changefeed.toml" + run_sql_file $CUR/data/ddl.sql ${UP_TIDB_HOST} ${UP_TIDB_PORT} - SINK_URI="kafka://127.0.0.1:9092/$TOPIC_NAME?protocol=simple&max-message-bytes=500" + sleep 5 - run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --config="$CUR/conf/changefeed.toml" + run_cdc_cli changefeed pause -c ${changefeed_id} + + SINK_URI="kafka://127.0.0.1:9092/$TOPIC_NAME?protocol=simple&max-message-bytes=500" + run_cdc_cli changefeed update -c ${changefeed_id} --sink-uri="$SINK_URI" --config="$CUR/conf/changefeed.toml" --no-confirm cdc_kafka_consumer --upstream-uri $SINK_URI --downstream-uri="mysql://root@127.0.0.1:3306/?safe-mode=true&batch-dml-enable=false" --upstream-tidb-dsn="root@tcp(${UP_TIDB_HOST}:${UP_TIDB_PORT})/?" --config="$CUR/conf/changefeed.toml" 2>&1 & diff --git a/tests/integration_tests/kafka_simple_handle_key_only/conf/changefeed.toml b/tests/integration_tests/kafka_simple_handle_key_only/conf/changefeed.toml index 1a68fb078e4..216463b8f56 100644 --- a/tests/integration_tests/kafka_simple_handle_key_only/conf/changefeed.toml +++ b/tests/integration_tests/kafka_simple_handle_key_only/conf/changefeed.toml @@ -1,3 +1,3 @@ [sink.kafka-config.large-message-handle] large-message-handle-option = "handle-key-only" -large-message-handle-compression = "snappy" +large-message-handle-compression = "lz4" diff --git a/tests/integration_tests/kafka_simple_handle_key_only/data/data.sql b/tests/integration_tests/kafka_simple_handle_key_only/data/data.sql index 2977b9aa123..3a92a4a92b0 100644 --- a/tests/integration_tests/kafka_simple_handle_key_only/data/data.sql +++ b/tests/integration_tests/kafka_simple_handle_key_only/data/data.sql @@ -1,59 +1,3 @@ -drop database if exists test; -create database test; -use test; - -create table t ( - id int primary key auto_increment, - - c_tinyint tinyint null, - c_smallint smallint null, - c_mediumint mediumint null, - c_int int null, - c_bigint bigint null, - - c_unsigned_tinyint tinyint unsigned null, - c_unsigned_smallint smallint unsigned null, - c_unsigned_mediumint mediumint unsigned null, - c_unsigned_int int unsigned null, - c_unsigned_bigint bigint unsigned null, - - c_float float null, - c_double double null, - c_decimal decimal null, - c_decimal_2 decimal(10, 4) null, - - c_unsigned_float float unsigned null, - c_unsigned_double double unsigned null, - c_unsigned_decimal decimal unsigned null, - c_unsigned_decimal_2 decimal(10, 4) unsigned null, - - c_date date null, - c_datetime datetime null, - c_timestamp timestamp null, - c_time time null, - c_year year null, - - c_tinytext tinytext null, - c_text text null, - c_mediumtext mediumtext null, - c_longtext longtext null, - - c_tinyblob tinyblob null, - c_blob blob null, - c_mediumblob mediumblob null, - c_longblob longblob null, - - c_char char(16) null, - c_varchar varchar(16) null, - c_binary binary(16) null, - c_varbinary varbinary(16) null, - - c_enum enum ('a','b','c') null, - c_set set ('a','b','c') null, - c_bit bit(64) null, - c_json json null -); - insert into t values ( 1, 1, 2, 3, 4, 5, diff --git a/tests/integration_tests/kafka_simple_handle_key_only/data/ddl.sql b/tests/integration_tests/kafka_simple_handle_key_only/data/ddl.sql new file mode 100644 index 00000000000..cd062fe8ce4 --- /dev/null +++ b/tests/integration_tests/kafka_simple_handle_key_only/data/ddl.sql @@ -0,0 +1,55 @@ +drop database if exists test; +create database test; +use test; + +create table t ( + id int primary key auto_increment, + + c_tinyint tinyint null, + c_smallint smallint null, + c_mediumint mediumint null, + c_int int null, + c_bigint bigint null, + + c_unsigned_tinyint tinyint unsigned null, + c_unsigned_smallint smallint unsigned null, + c_unsigned_mediumint mediumint unsigned null, + c_unsigned_int int unsigned null, + c_unsigned_bigint bigint unsigned null, + + c_float float null, + c_double double null, + c_decimal decimal null, + c_decimal_2 decimal(10, 4) null, + + c_unsigned_float float unsigned null, + c_unsigned_double double unsigned null, + c_unsigned_decimal decimal unsigned null, + c_unsigned_decimal_2 decimal(10, 4) unsigned null, + + c_date date null, + c_datetime datetime null, + c_timestamp timestamp null, + c_time time null, + c_year year null, + + c_tinytext tinytext null, + c_text text null, + c_mediumtext mediumtext null, + c_longtext longtext null, + + c_tinyblob tinyblob null, + c_blob blob null, + c_mediumblob mediumblob null, + c_longblob longblob null, + + c_char char(16) null, + c_varchar varchar(16) null, + c_binary binary(16) null, + c_varbinary varbinary(16) null, + + c_enum enum ('a','b','c') null, + c_set set ('a','b','c') null, + c_bit bit(64) null, + c_json json null +); diff --git a/tests/integration_tests/kafka_simple_handle_key_only/run.sh b/tests/integration_tests/kafka_simple_handle_key_only/run.sh index 7208687275c..0e5b4035b1d 100644 --- a/tests/integration_tests/kafka_simple_handle_key_only/run.sh +++ b/tests/integration_tests/kafka_simple_handle_key_only/run.sh @@ -19,17 +19,24 @@ function run() { start_tidb_cluster --workdir $WORK_DIR cd $WORK_DIR + run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY TOPIC_NAME="simple-handle-key-only" # record tso before we create tables to skip the system table DDLs start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1}) - run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY + changefeed_id="simple-handle-key-only" + SINK_URI="kafka://127.0.0.1:9092/$TOPIC_NAME?protocol=simple" + run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" -c ${changefeed_id} --config="$CUR/conf/changefeed.toml" + run_sql_file $CUR/data/ddl.sql ${UP_TIDB_HOST} ${UP_TIDB_PORT} - SINK_URI="kafka://127.0.0.1:9092/$TOPIC_NAME?protocol=simple&max-message-bytes=500" + sleep 5 - run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --config="$CUR/conf/changefeed.toml" + run_cdc_cli changefeed pause -c ${changefeed_id} + + SINK_URI="kafka://127.0.0.1:9092/$TOPIC_NAME?protocol=simple&max-message-bytes=500" + run_cdc_cli changefeed update -c ${changefeed_id} --sink-uri="$SINK_URI" --config="$CUR/conf/changefeed.toml" --no-confirm cdc_kafka_consumer --upstream-uri $SINK_URI --downstream-uri="mysql://root@127.0.0.1:3306/?safe-mode=true&batch-dml-enable=false" --upstream-tidb-dsn="root@tcp(${UP_TIDB_HOST}:${UP_TIDB_PORT})/?" --config="$CUR/conf/changefeed.toml" 2>&1 & From e018113246b31d155970f520e6e7d37d7ab4bb04 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Mon, 11 Dec 2023 17:04:58 +0800 Subject: [PATCH 10/24] fix tests. --- pkg/sink/codec/simple/encoder_test.go | 1 - tests/integration_tests/kafka_simple_claim_check/run.sh | 1 + tests/integration_tests/kafka_simple_handle_key_only/run.sh | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/sink/codec/simple/encoder_test.go b/pkg/sink/codec/simple/encoder_test.go index 5ce387e35b9..c6b2ddbab89 100644 --- a/pkg/sink/codec/simple/encoder_test.go +++ b/pkg/sink/codec/simple/encoder_test.go @@ -87,7 +87,6 @@ func TestEncodeDDLEvent(t *testing.T) { gender enum('male', 'female'), email varchar(255) not null, key idx_name_email(name, email))` - ddlEvent := helper.DDL2Event(sql) m, err := enc.EncodeDDLEvent(ddlEvent) diff --git a/tests/integration_tests/kafka_simple_claim_check/run.sh b/tests/integration_tests/kafka_simple_claim_check/run.sh index 9b92cf754c4..b0a13351981 100644 --- a/tests/integration_tests/kafka_simple_claim_check/run.sh +++ b/tests/integration_tests/kafka_simple_claim_check/run.sh @@ -37,6 +37,7 @@ function run() { SINK_URI="kafka://127.0.0.1:9092/$TOPIC_NAME?protocol=simple&max-message-bytes=500" run_cdc_cli changefeed update -c ${changefeed_id} --sink-uri="$SINK_URI" --config="$CUR/conf/changefeed.toml" --no-confirm + run_cdc_cli changefeed resume -c ${changefeed_id} cdc_kafka_consumer --upstream-uri $SINK_URI --downstream-uri="mysql://root@127.0.0.1:3306/?safe-mode=true&batch-dml-enable=false" --upstream-tidb-dsn="root@tcp(${UP_TIDB_HOST}:${UP_TIDB_PORT})/?" --config="$CUR/conf/changefeed.toml" 2>&1 & diff --git a/tests/integration_tests/kafka_simple_handle_key_only/run.sh b/tests/integration_tests/kafka_simple_handle_key_only/run.sh index 0e5b4035b1d..dbad2e0701c 100644 --- a/tests/integration_tests/kafka_simple_handle_key_only/run.sh +++ b/tests/integration_tests/kafka_simple_handle_key_only/run.sh @@ -37,7 +37,7 @@ function run() { SINK_URI="kafka://127.0.0.1:9092/$TOPIC_NAME?protocol=simple&max-message-bytes=500" run_cdc_cli changefeed update -c ${changefeed_id} --sink-uri="$SINK_URI" --config="$CUR/conf/changefeed.toml" --no-confirm - + run_cdc_cli changefeed resume -c ${changefeed_id} cdc_kafka_consumer --upstream-uri $SINK_URI --downstream-uri="mysql://root@127.0.0.1:3306/?safe-mode=true&batch-dml-enable=false" --upstream-tidb-dsn="root@tcp(${UP_TIDB_HOST}:${UP_TIDB_PORT})/?" --config="$CUR/conf/changefeed.toml" 2>&1 & run_sql_file $CUR/data/data.sql ${UP_TIDB_HOST} ${UP_TIDB_PORT} From c6da186813658ce001ea6f5168c461aafb164e93 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Mon, 11 Dec 2023 18:09:27 +0800 Subject: [PATCH 11/24] use test. --- tests/integration_tests/kafka_simple_claim_check/data/data.sql | 1 + .../integration_tests/kafka_simple_handle_key_only/data/data.sql | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/integration_tests/kafka_simple_claim_check/data/data.sql b/tests/integration_tests/kafka_simple_claim_check/data/data.sql index 3a92a4a92b0..88cb3d0c997 100644 --- a/tests/integration_tests/kafka_simple_claim_check/data/data.sql +++ b/tests/integration_tests/kafka_simple_claim_check/data/data.sql @@ -1,3 +1,4 @@ +use test; insert into t values ( 1, 1, 2, 3, 4, 5, diff --git a/tests/integration_tests/kafka_simple_handle_key_only/data/data.sql b/tests/integration_tests/kafka_simple_handle_key_only/data/data.sql index 3a92a4a92b0..88cb3d0c997 100644 --- a/tests/integration_tests/kafka_simple_handle_key_only/data/data.sql +++ b/tests/integration_tests/kafka_simple_handle_key_only/data/data.sql @@ -1,3 +1,4 @@ +use test; insert into t values ( 1, 1, 2, 3, 4, 5, From 00340c76f3ce8ed7ae65fb248844a70fbb1a59aa Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Tue, 12 Dec 2023 15:57:11 +0800 Subject: [PATCH 12/24] Revert "ddlManager (ticdc): add bootstrap sending function (#10045)" This reverts commit 5fc3d25f573b88e21686fe43638df38db106670b. --- cdc/model/changefeed.go | 18 ------- cdc/owner/changefeed.go | 9 +--- cdc/owner/changefeed_test.go | 4 -- cdc/owner/ddl_manager.go | 97 ++++------------------------------- cdc/owner/ddl_manager_test.go | 2 +- cdc/owner/ddl_sink.go | 17 ------ 6 files changed, 13 insertions(+), 134 deletions(-) diff --git a/cdc/model/changefeed.go b/cdc/model/changefeed.go index c8330db49b2..805d1a56729 100644 --- a/cdc/model/changefeed.go +++ b/cdc/model/changefeed.go @@ -589,24 +589,6 @@ func (info *ChangeFeedInfo) DownstreamType() (DownstreamType, error) { return Unknown, nil } -// NeedSendBootstrapEvent returns true if the changefeed need to send bootstrap event. -func (info *ChangeFeedInfo) NeedSendBootstrapEvent() (bool, error) { - downStreamType, err := info.DownstreamType() - if err != nil { - return false, errors.Trace(err) - } - if downStreamType != MQ { - return false, nil - } - if info.Config.Sink.Protocol == nil { - return false, nil - } - if *info.Config.Sink.Protocol == config.ProtocolSimple.String() { - return true, nil - } - return false, nil -} - func (info *ChangeFeedInfo) fixMemoryQuota() { info.Config.FixMemoryQuota() } diff --git a/cdc/owner/changefeed.go b/cdc/owner/changefeed.go index 863d1202385..8ef13a45395 100644 --- a/cdc/owner/changefeed.go +++ b/cdc/owner/changefeed.go @@ -639,11 +639,6 @@ LOOP2: return errors.Trace(err) } - needSendBootstrapEvent, err := c.latestInfo.NeedSendBootstrapEvent() - if err != nil { - return errors.Trace(err) - } - c.ddlManager = newDDLManager( c.id, ddlStartTs, @@ -654,9 +649,7 @@ LOOP2: c.redoDDLMgr, c.redoMetaMgr, downstreamType, - util.GetOrZero(c.latestInfo.Config.BDRMode), - needSendBootstrapEvent, - ) + util.GetOrZero(c.latestInfo.Config.BDRMode)) // create scheduler cfg := *c.cfg diff --git a/cdc/owner/changefeed_test.go b/cdc/owner/changefeed_test.go index 7eac7ca303e..d505efa2437 100644 --- a/cdc/owner/changefeed_test.go +++ b/cdc/owner/changefeed_test.go @@ -140,10 +140,6 @@ func (m *mockDDLSink) emitCheckpointTs(ts uint64, tables []*model.TableInfo) { m.mu.currentTables = tables } -func (m *mockDDLSink) emitBootstrapEvent(ctx context.Context, ddl *model.DDLEvent) error { - return nil -} - func (m *mockDDLSink) getCheckpointTsAndTableNames() (uint64, []*model.TableInfo) { m.mu.Lock() defer m.mu.Unlock() diff --git a/cdc/owner/ddl_manager.go b/cdc/owner/ddl_manager.go index 44cc63e7e43..6a0ec488c7e 100644 --- a/cdc/owner/ddl_manager.go +++ b/cdc/owner/ddl_manager.go @@ -17,7 +17,6 @@ import ( "context" "math/rand" "sort" - "sync/atomic" "time" "github.com/pingcap/errors" @@ -94,14 +93,6 @@ var redoBarrierDDLs = map[timodel.ActionType]struct{}{ timodel.ActionRemovePartitioning: {}, } -type bootstrapState int32 - -const ( - bootstrapStateNone bootstrapState = iota - bootstrapStateRunning - bootstrapStateCompleted -) - // ddlManager holds the pending DDL events of all tables and responsible for // executing them to downstream. // It also provides the ability to calculate the barrier of a changefeed. @@ -136,12 +127,6 @@ type ddlManager struct { BDRMode bool ddlResolvedTs model.Ts - - // needBootstrap is true when the downstream is kafka - // and the protocol is simple protocol. - needSendBootstrapEvent bool - errCh chan error - bootstrapState int32 } func newDDLManager( @@ -155,7 +140,6 @@ func newDDLManager( redoMetaManager redo.MetaManager, sinkType model.DownstreamType, bdrMode bool, - needSendBootstrapEvent bool, ) *ddlManager { log.Info("owner create ddl manager", zap.String("namespace", changefeedID.Namespace), @@ -166,19 +150,17 @@ func newDDLManager( zap.Stringer("sinkType", sinkType)) return &ddlManager{ - changfeedID: changefeedID, - ddlSink: ddlSink, - ddlPuller: ddlPuller, - schema: schema, - redoDDLManager: redoManager, - redoMetaManager: redoMetaManager, - startTs: startTs, - checkpointTs: checkpointTs, - ddlResolvedTs: startTs, - BDRMode: bdrMode, - pendingDDLs: make(map[model.TableName][]*model.DDLEvent), - errCh: make(chan error, 1), - needSendBootstrapEvent: needSendBootstrapEvent, + changfeedID: changefeedID, + ddlSink: ddlSink, + ddlPuller: ddlPuller, + schema: schema, + redoDDLManager: redoManager, + redoMetaManager: redoMetaManager, + startTs: startTs, + checkpointTs: checkpointTs, + ddlResolvedTs: startTs, + BDRMode: bdrMode, + pendingDDLs: make(map[model.TableName][]*model.DDLEvent), } } @@ -195,16 +177,6 @@ func (m *ddlManager) tick( ctx context.Context, checkpointTs model.Ts, ) ([]model.TableID, *schedulepb.BarrierWithMinTs, error) { - if m.needSendBootstrapEvent { - finished, err := m.checkAndBootstrap(ctx) - if err != nil { - return nil, nil, err - } - if !finished { - return nil, schedulepb.NewBarrierWithMinTs(checkpointTs), nil - } - } - m.justSentDDL = nil m.checkpointTs = checkpointTs @@ -594,53 +566,6 @@ func (m *ddlManager) cleanCache() { m.physicalTablesCache = nil } -func (m *ddlManager) checkAndBootstrap(ctx context.Context) (bool, error) { - if atomic.LoadInt32(&m.bootstrapState) == int32(bootstrapStateCompleted) { - return true, nil - } - - select { - case err := <-m.errCh: - return false, err - default: - } - - if atomic.LoadInt32(&m.bootstrapState) == int32(bootstrapStateRunning) { - return false, nil - } - // begin bootstrap - atomic.StoreInt32(&m.bootstrapState, int32(bootstrapStateRunning)) - tables, err := m.allTables(ctx) - if err != nil { - return false, err - } - bootstrapEvents := make([]*model.DDLEvent, 0, len(tables)) - for _, table := range tables { - ddlEvent := &model.DDLEvent{ - StartTs: m.startTs, - CommitTs: m.startTs, - TableInfo: table, - IsBootstrap: true, - } - bootstrapEvents = append(bootstrapEvents, ddlEvent) - } - // send bootstrap events - go func() { - for _, event := range bootstrapEvents { - err := m.ddlSink.emitBootstrapEvent(ctx, event) - if err != nil { - log.Error("emit bootstrap event failed", - zap.Any("bootstrapEvent", event), zap.Error(err)) - atomic.StoreInt32(&m.bootstrapState, int32(bootstrapStateNone)) - m.errCh <- err - return - } - } - atomic.StoreInt32(&m.bootstrapState, int32(bootstrapStateCompleted)) - }() - return false, nil -} - // getRelatedPhysicalTableIDs get all related physical table ids of a ddl event. // It is a helper function to calculate tableBarrier. func getRelatedPhysicalTableIDs(ddl *model.DDLEvent) []model.TableID { diff --git a/cdc/owner/ddl_manager_test.go b/cdc/owner/ddl_manager_test.go index 0d7eb543ab2..3255608be32 100644 --- a/cdc/owner/ddl_manager_test.go +++ b/cdc/owner/ddl_manager_test.go @@ -48,7 +48,7 @@ func createDDLManagerForTest(t *testing.T) *ddlManager { schema, redo.NewDisabledDDLManager(), redo.NewDisabledMetaManager(), - model.DB, false, false) + model.DB, false) return res } diff --git a/cdc/owner/ddl_sink.go b/cdc/owner/ddl_sink.go index 1c67c0ffad6..f06d01e6f3a 100644 --- a/cdc/owner/ddl_sink.go +++ b/cdc/owner/ddl_sink.go @@ -54,7 +54,6 @@ type DDLSink interface { // the DDL event will be sent to another goroutine and execute to downstream // the caller of this function can call again and again until a true returned emitDDLEvent(ctx context.Context, ddl *model.DDLEvent) (bool, error) - emitBootstrapEvent(ctx context.Context, ddl *model.DDLEvent) error emitSyncPoint(ctx context.Context, checkpointTs uint64) error // close the ddlsink, cancel running goroutine. close(ctx context.Context) error @@ -385,22 +384,6 @@ func (s *ddlSinkImpl) emitDDLEvent(ctx context.Context, ddl *model.DDLEvent) (bo return false, nil } -// emitBootstrapEvent sent bootstrap event to downstream. -// It is a synchronous operation. -func (s *ddlSinkImpl) emitBootstrapEvent(ctx context.Context, ddl *model.DDLEvent) error { - if !ddl.IsBootstrap { - return nil - } - err := s.sink.WriteDDLEvent(ctx, ddl) - if err != nil { - return errors.Trace(err) - } - // TODO: change this log to debug level after testing complete. - log.Info("emit bootstrap event", zap.String("namespace", s.changefeedID.Namespace), - zap.String("changefeed", s.changefeedID.ID), zap.Any("bootstrapEvent", ddl)) - return nil -} - func (s *ddlSinkImpl) emitSyncPoint(ctx context.Context, checkpointTs uint64) (err error) { if checkpointTs == s.lastSyncPoint { return nil From 658a3985bb42787bf7497740da54ad43e11c4452 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Tue, 12 Dec 2023 16:16:59 +0800 Subject: [PATCH 13/24] remove useless sink type. --- cdc/model/changefeed.go | 45 ----------------------------------- cdc/owner/changefeed.go | 6 ----- cdc/owner/ddl_manager.go | 4 +--- cdc/owner/ddl_manager_test.go | 2 +- 4 files changed, 2 insertions(+), 55 deletions(-) diff --git a/cdc/model/changefeed.go b/cdc/model/changefeed.go index 805d1a56729..441aaf0348a 100644 --- a/cdc/model/changefeed.go +++ b/cdc/model/changefeed.go @@ -571,24 +571,6 @@ func (info *ChangeFeedInfo) updateSinkURIAndConfigProtocol(uri *url.URL, newProt info.Config.Sink.Protocol = util.AddressOf(newProtocol) } -// DownstreamType returns the type of the downstream. -func (info *ChangeFeedInfo) DownstreamType() (DownstreamType, error) { - uri, err := url.Parse(info.SinkURI) - if err != nil { - return Unknown, errors.Trace(err) - } - if sink.IsMySQLCompatibleScheme(uri.Scheme) { - return DB, nil - } - if sink.IsMQScheme(uri.Scheme) { - return MQ, nil - } - if sink.IsStorageScheme(uri.Scheme) { - return Storage, nil - } - return Unknown, nil -} - func (info *ChangeFeedInfo) fixMemoryQuota() { info.Config.FixMemoryQuota() } @@ -597,33 +579,6 @@ func (info *ChangeFeedInfo) fixScheduler(inheritV66 bool) { info.Config.FixScheduler(inheritV66) } -// DownstreamType is the type of downstream. -type DownstreamType int - -const ( - // DB is the type of Database. - DB DownstreamType = iota - // MQ is the type of MQ or Cloud Storage. - MQ - // Storage is the type of Cloud Storage. - Storage - // Unknown is the type of Unknown. - Unknown -) - -// String implements fmt.Stringer interface. -func (t DownstreamType) String() string { - switch t { - case DB: - return "DB" - case MQ: - return "MQ" - case Storage: - return "Storage" - } - return "Unknown" -} - // ChangeFeedStatusForAPI uses to transfer the status of changefeed for API. type ChangeFeedStatusForAPI struct { ResolvedTs uint64 `json:"resolved-ts"` diff --git a/cdc/owner/changefeed.go b/cdc/owner/changefeed.go index 8ef13a45395..c2efd841445 100644 --- a/cdc/owner/changefeed.go +++ b/cdc/owner/changefeed.go @@ -634,11 +634,6 @@ LOOP2: zap.String("namespace", c.id.Namespace), zap.String("changefeed", c.id.ID)) - downstreamType, err := c.latestInfo.DownstreamType() - if err != nil { - return errors.Trace(err) - } - c.ddlManager = newDDLManager( c.id, ddlStartTs, @@ -648,7 +643,6 @@ LOOP2: c.schema, c.redoDDLMgr, c.redoMetaMgr, - downstreamType, util.GetOrZero(c.latestInfo.Config.BDRMode)) // create scheduler diff --git a/cdc/owner/ddl_manager.go b/cdc/owner/ddl_manager.go index 6a0ec488c7e..3b6e0f7ee37 100644 --- a/cdc/owner/ddl_manager.go +++ b/cdc/owner/ddl_manager.go @@ -138,7 +138,6 @@ func newDDLManager( schema *schemaWrap4Owner, redoManager redo.DDLManager, redoMetaManager redo.MetaManager, - sinkType model.DownstreamType, bdrMode bool, ) *ddlManager { log.Info("owner create ddl manager", @@ -146,8 +145,7 @@ func newDDLManager( zap.String("changefeed", changefeedID.ID), zap.Uint64("startTs", startTs), zap.Uint64("checkpointTs", checkpointTs), - zap.Bool("bdrMode", bdrMode), - zap.Stringer("sinkType", sinkType)) + zap.Bool("bdrMode", bdrMode)) return &ddlManager{ changfeedID: changefeedID, diff --git a/cdc/owner/ddl_manager_test.go b/cdc/owner/ddl_manager_test.go index 3255608be32..bfc03c72686 100644 --- a/cdc/owner/ddl_manager_test.go +++ b/cdc/owner/ddl_manager_test.go @@ -48,7 +48,7 @@ func createDDLManagerForTest(t *testing.T) *ddlManager { schema, redo.NewDisabledDDLManager(), redo.NewDisabledMetaManager(), - model.DB, false) + false) return res } From 02034aaf80d5a74eff2fa6b0e57c403a49fe43d4 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Tue, 12 Dec 2023 17:15:54 +0800 Subject: [PATCH 14/24] tiny adjust --- pkg/sink/codec/simple/decoder.go | 8 +++++--- pkg/sink/codec/simple/encoder.go | 3 --- pkg/sink/codec/utils/mysql_types.go | 2 +- pkg/upstream/manager_test.go | 15 +++++++++++---- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/pkg/sink/codec/simple/decoder.go b/pkg/sink/codec/simple/decoder.go index 93847764eb3..c09cfb6d5b5 100644 --- a/pkg/sink/codec/simple/decoder.go +++ b/pkg/sink/codec/simple/decoder.go @@ -60,9 +60,11 @@ func NewDecoder(ctx context.Context, config *common.Config, db *sql.DB) (*decode } return &decoder{ - config: config, - storage: externalStorage, - memo: newMemoryTableInfoProvider(), + config: config, + storage: externalStorage, + upstreamTiDB: db, + + memo: newMemoryTableInfoProvider(), }, nil } diff --git a/pkg/sink/codec/simple/encoder.go b/pkg/sink/codec/simple/encoder.go index 6432086a7c0..7e5614b068d 100644 --- a/pkg/sink/codec/simple/encoder.go +++ b/pkg/sink/codec/simple/encoder.go @@ -64,8 +64,6 @@ func (e *encoder) AppendRowChangedEvent( Callback: callback, } - log.Info("simple encode DML message", zap.Int("length", result.Length())) - result.IncRowsCount() if result.Length() <= e.config.MaxMessageBytes { e.messages = append(e.messages, result) @@ -161,7 +159,6 @@ func (e *encoder) EncodeDDLEvent(event *model.DDLEvent) (*common.Message, error) } result := common.NewDDLMsg(config.ProtocolSimple, nil, value, event) - log.Info("simple encode DDL message", zap.Int("length", result.Length())) if result.Length() > e.config.MaxMessageBytes { log.Error("DDL message is too large for simple", zap.Int("maxMessageBytes", e.config.MaxMessageBytes), diff --git a/pkg/sink/codec/utils/mysql_types.go b/pkg/sink/codec/utils/mysql_types.go index 006799ee37f..d2eac54fbfa 100644 --- a/pkg/sink/codec/utils/mysql_types.go +++ b/pkg/sink/codec/utils/mysql_types.go @@ -40,7 +40,7 @@ func WithZerofill4MySQLType(mysqlType string, zerofill bool) string { return mysqlType } -// GetMySQLType get the mysql type from Column info +// GetMySQLType get the mysql type from column info func GetMySQLType(columnInfo *timodel.ColumnInfo, fullType bool) string { if !fullType { result := types.TypeToStr(columnInfo.GetType(), columnInfo.GetCharset()) diff --git a/pkg/upstream/manager_test.go b/pkg/upstream/manager_test.go index 4bc544d9868..04ebc7efa80 100644 --- a/pkg/upstream/manager_test.go +++ b/pkg/upstream/manager_test.go @@ -55,16 +55,23 @@ func TestUpstream(t *testing.T) { require.NotNil(t, up) // test Tick - _ = manager.Tick(context.Background(), &orchestrator.GlobalReactorState{}) + globalState := &orchestrator.GlobalReactorState{ + Changefeeds: make(map[model.ChangeFeedID]*orchestrator.ChangefeedReactorState), + } + // add one changefeed state whose info is nil to make sure it won't be checked + globalState.Changefeeds[model.DefaultChangeFeedID("1")] = &orchestrator.ChangefeedReactorState{ + Info: nil, + } + _ = manager.Tick(context.Background(), globalState) mockClock.Add(maxIdleDuration * 2) manager.lastTickTime = atomic.Time{} - _ = manager.Tick(context.Background(), &orchestrator.GlobalReactorState{}) + _ = manager.Tick(context.Background(), globalState) // wait until up2 is closed for !up2.IsClosed() { } manager.lastTickTime = atomic.Time{} - _ = manager.Tick(context.Background(), &orchestrator.GlobalReactorState{}) - _ = manager.Tick(context.Background(), &orchestrator.GlobalReactorState{}) + _ = manager.Tick(context.Background(), globalState) + _ = manager.Tick(context.Background(), globalState) up, ok = manager.Get(testID) require.False(t, ok) require.Nil(t, up) From cf2db54cccade452989e956cab45cddfb25d0459 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Tue, 12 Dec 2023 18:38:36 +0800 Subject: [PATCH 15/24] fix read data from tidb. --- pkg/sink/codec/simple/message.go | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/pkg/sink/codec/simple/message.go b/pkg/sink/codec/simple/message.go index 8002a38ade0..7c7ad80e2c9 100644 --- a/pkg/sink/codec/simple/message.go +++ b/pkg/sink/codec/simple/message.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/tiflow/cdc/entry" "github.com/pingcap/tiflow/cdc/model" cerror "github.com/pingcap/tiflow/pkg/errors" + "github.com/pingcap/tiflow/pkg/sink/codec/common" "github.com/pingcap/tiflow/pkg/sink/codec/utils" "go.uber.org/zap" ) @@ -467,13 +468,26 @@ func encodeValue(value interface{}, ft *types.FieldType) (interface{}, error) { return nil, nil } + var err error switch ft.GetType() { case mysql.TypeEnum: if v, ok := value.(string); ok { return v, nil } element := ft.GetElems() - number := value.(uint64) + + var number uint64 + switch v := value.(type) { + case uint64: + number = v + case []uint8: + number, err = strconv.ParseUint(string(v), 10, 64) + if err != nil { + return "", cerror.WrapError(cerror.ErrEncodeFailed, err) + } + default: + log.Panic("unexpected type for enum value", zap.Any("value", value)) + } enumVar, err := tiTypes.ParseEnumValue(element, number) if err != nil { return "", cerror.WrapError(cerror.ErrEncodeFailed, err) @@ -484,7 +498,16 @@ func encodeValue(value interface{}, ft *types.FieldType) (interface{}, error) { return v, nil } elements := ft.GetElems() - number := value.(uint64) + + var number uint64 + switch v := value.(type) { + case uint64: + number = v + case []uint8: + number, err = common.BinaryLiteralToInt(v) + default: + log.Panic("unexpected type for set value", zap.Any("value", value)) + } setVar, err := tiTypes.ParseSetValue(elements, number) if err != nil { return "", cerror.WrapError(cerror.ErrEncodeFailed, err) From d36b45349fe292f1483d80b575ddce8b46fa5435 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Tue, 12 Dec 2023 20:17:56 +0800 Subject: [PATCH 16/24] fix read data from tidb. --- pkg/sink/codec/simple/message.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/sink/codec/simple/message.go b/pkg/sink/codec/simple/message.go index 7c7ad80e2c9..ac851cb0703 100644 --- a/pkg/sink/codec/simple/message.go +++ b/pkg/sink/codec/simple/message.go @@ -505,6 +505,9 @@ func encodeValue(value interface{}, ft *types.FieldType) (interface{}, error) { number = v case []uint8: number, err = common.BinaryLiteralToInt(v) + if err != nil { + return "", cerror.WrapError(cerror.ErrEncodeFailed, err) + } default: log.Panic("unexpected type for set value", zap.Any("value", value)) } From f3fb23b1cb3d95a8cbc2671eb93d71ff91cb9086 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Wed, 13 Dec 2023 10:28:51 +0800 Subject: [PATCH 17/24] add more logs to debug the scheduler. --- pkg/sink/codec/simple/message.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/sink/codec/simple/message.go b/pkg/sink/codec/simple/message.go index ac851cb0703..db2055fa1cc 100644 --- a/pkg/sink/codec/simple/message.go +++ b/pkg/sink/codec/simple/message.go @@ -483,6 +483,7 @@ func encodeValue(value interface{}, ft *types.FieldType) (interface{}, error) { case []uint8: number, err = strconv.ParseUint(string(v), 10, 64) if err != nil { + log.Error("parse number for enum failed", zap.Any("number", number), zap.Error(err)) return "", cerror.WrapError(cerror.ErrEncodeFailed, err) } default: @@ -506,6 +507,7 @@ func encodeValue(value interface{}, ft *types.FieldType) (interface{}, error) { case []uint8: number, err = common.BinaryLiteralToInt(v) if err != nil { + log.Error("parse number for set failed", zap.Any("number", number), zap.Error(err)) return "", cerror.WrapError(cerror.ErrEncodeFailed, err) } default: From 335ab0a158d74813922fcb574f357b780f9f132e Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Wed, 13 Dec 2023 11:06:53 +0800 Subject: [PATCH 18/24] debug read data from tidb. --- pkg/sink/codec/simple/message.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/sink/codec/simple/message.go b/pkg/sink/codec/simple/message.go index db2055fa1cc..bf0ab93effe 100644 --- a/pkg/sink/codec/simple/message.go +++ b/pkg/sink/codec/simple/message.go @@ -483,7 +483,7 @@ func encodeValue(value interface{}, ft *types.FieldType) (interface{}, error) { case []uint8: number, err = strconv.ParseUint(string(v), 10, 64) if err != nil { - log.Error("parse number for enum failed", zap.Any("number", number), zap.Error(err)) + log.Error("parse number for enum failed", zap.Any("number", string(v)), zap.Error(err)) return "", cerror.WrapError(cerror.ErrEncodeFailed, err) } default: From 76a7adc9c8a80d217b1ae917d3a3ef704f515308 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Wed, 13 Dec 2023 11:43:49 +0800 Subject: [PATCH 19/24] debug read data from tidb. --- pkg/sink/codec/simple/message.go | 34 +++++++++++--------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/pkg/sink/codec/simple/message.go b/pkg/sink/codec/simple/message.go index bf0ab93effe..8fa7c4d60b4 100644 --- a/pkg/sink/codec/simple/message.go +++ b/pkg/sink/codec/simple/message.go @@ -468,56 +468,46 @@ func encodeValue(value interface{}, ft *types.FieldType) (interface{}, error) { return nil, nil } - var err error switch ft.GetType() { case mysql.TypeEnum: if v, ok := value.(string); ok { return v, nil } element := ft.GetElems() - - var number uint64 switch v := value.(type) { case uint64: - number = v - case []uint8: - number, err = strconv.ParseUint(string(v), 10, 64) + enumVar, err := tiTypes.ParseEnumValue(element, v) if err != nil { - log.Error("parse number for enum failed", zap.Any("number", string(v)), zap.Error(err)) return "", cerror.WrapError(cerror.ErrEncodeFailed, err) } + return enumVar.Name, nil + case []uint8: + return string(v), nil default: log.Panic("unexpected type for enum value", zap.Any("value", value)) } - enumVar, err := tiTypes.ParseEnumValue(element, number) - if err != nil { - return "", cerror.WrapError(cerror.ErrEncodeFailed, err) - } - return enumVar.Name, nil case mysql.TypeSet: if v, ok := value.(string); ok { return v, nil } elements := ft.GetElems() - - var number uint64 switch v := value.(type) { case uint64: - number = v + setVar, err := tiTypes.ParseSetValue(elements, v) + if err != nil { + return "", cerror.WrapError(cerror.ErrEncodeFailed, err) + } + return setVar.Name, nil case []uint8: - number, err = common.BinaryLiteralToInt(v) + number, err := common.BinaryLiteralToInt(v) if err != nil { - log.Error("parse number for set failed", zap.Any("number", number), zap.Error(err)) + log.Error("parse number for set failed", zap.Any("value", v), zap.Error(err)) return "", cerror.WrapError(cerror.ErrEncodeFailed, err) } + return number, nil default: log.Panic("unexpected type for set value", zap.Any("value", value)) } - setVar, err := tiTypes.ParseSetValue(elements, number) - if err != nil { - return "", cerror.WrapError(cerror.ErrEncodeFailed, err) - } - return setVar.Name, nil default: } From 8e786700c8d8c3a114382598cc00e4aa55fa38ed Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Wed, 13 Dec 2023 13:24:42 +0800 Subject: [PATCH 20/24] debug read data from tidb. --- pkg/sink/codec/simple/message.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pkg/sink/codec/simple/message.go b/pkg/sink/codec/simple/message.go index 8fa7c4d60b4..b6286901e98 100644 --- a/pkg/sink/codec/simple/message.go +++ b/pkg/sink/codec/simple/message.go @@ -504,10 +504,17 @@ func encodeValue(value interface{}, ft *types.FieldType) (interface{}, error) { log.Error("parse number for set failed", zap.Any("value", v), zap.Error(err)) return "", cerror.WrapError(cerror.ErrEncodeFailed, err) } - return number, nil + value = number default: log.Panic("unexpected type for set value", zap.Any("value", value)) } + case mysql.TypeBit: + rawValue := value.([]uint8) + bitValue, err := common.BinaryLiteralToInt(rawValue) + if err != nil { + return "", cerror.WrapError(cerror.ErrEncodeFailed, err) + } + value = bitValue default: } @@ -567,9 +574,10 @@ func decodeColumn(name string, value interface{}, fieldType *types.FieldType) (* case mysql.TypeBit: value, err = strconv.ParseUint(data, 10, 64) if err != nil { - log.Panic("invalid column value for bit or set", + log.Error("invalid column value for bit", zap.String("name", name), zap.Any("data", data), zap.Any("type", fieldType.GetType()), zap.Error(err)) + return nil, cerror.WrapError(cerror.ErrDecodeFailed, err) } case mysql.TypeTiny, mysql.TypeShort, mysql.TypeLong, mysql.TypeInt24, mysql.TypeYear: value, err = strconv.ParseInt(data, 10, 64) From ce12346c05d45138a7e90eb01fa1ed731abb2771 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Wed, 13 Dec 2023 13:48:41 +0800 Subject: [PATCH 21/24] debug read data from tidb. --- pkg/sink/codec/simple/message.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pkg/sink/codec/simple/message.go b/pkg/sink/codec/simple/message.go index b6286901e98..367f4dbff8d 100644 --- a/pkg/sink/codec/simple/message.go +++ b/pkg/sink/codec/simple/message.go @@ -509,12 +509,15 @@ func encodeValue(value interface{}, ft *types.FieldType) (interface{}, error) { log.Panic("unexpected type for set value", zap.Any("value", value)) } case mysql.TypeBit: - rawValue := value.([]uint8) - bitValue, err := common.BinaryLiteralToInt(rawValue) - if err != nil { - return "", cerror.WrapError(cerror.ErrEncodeFailed, err) + switch v := value.(type) { + case []uint8: + bitValue, err := common.BinaryLiteralToInt(v) + if err != nil { + return "", cerror.WrapError(cerror.ErrEncodeFailed, err) + } + value = bitValue + default: } - value = bitValue default: } From fdea735d5320e86fa72cd878aa85a84e679dd404 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Wed, 13 Dec 2023 14:37:55 +0800 Subject: [PATCH 22/24] debug read data from tidb. --- pkg/sink/codec/simple/message.go | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/pkg/sink/codec/simple/message.go b/pkg/sink/codec/simple/message.go index 367f4dbff8d..bd6d48969aa 100644 --- a/pkg/sink/codec/simple/message.go +++ b/pkg/sink/codec/simple/message.go @@ -487,27 +487,26 @@ func encodeValue(value interface{}, ft *types.FieldType) (interface{}, error) { log.Panic("unexpected type for enum value", zap.Any("value", value)) } case mysql.TypeSet: - if v, ok := value.(string); ok { - return v, nil - } - elements := ft.GetElems() + var ( + number uint64 + err error + ) switch v := value.(type) { case uint64: - setVar, err := tiTypes.ParseSetValue(elements, v) - if err != nil { - return "", cerror.WrapError(cerror.ErrEncodeFailed, err) - } - return setVar.Name, nil + number = v case []uint8: - number, err := common.BinaryLiteralToInt(v) + number, err = common.BinaryLiteralToInt(v) if err != nil { - log.Error("parse number for set failed", zap.Any("value", v), zap.Error(err)) return "", cerror.WrapError(cerror.ErrEncodeFailed, err) } - value = number default: log.Panic("unexpected type for set value", zap.Any("value", value)) } + setValue, err := tiTypes.ParseSetValue(ft.GetElems(), number) + if err != nil { + return "", cerror.WrapError(cerror.ErrEncodeFailed, err) + } + return setValue.Name, nil case mysql.TypeBit: switch v := value.(type) { case []uint8: From 4ad0774b44deb93b35f6766d202f63f3fd4e6f9a Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Wed, 13 Dec 2023 15:25:49 +0800 Subject: [PATCH 23/24] fix read data from tidb. --- pkg/sink/codec/simple/message.go | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/pkg/sink/codec/simple/message.go b/pkg/sink/codec/simple/message.go index bd6d48969aa..d5e20f7ad13 100644 --- a/pkg/sink/codec/simple/message.go +++ b/pkg/sink/codec/simple/message.go @@ -487,26 +487,18 @@ func encodeValue(value interface{}, ft *types.FieldType) (interface{}, error) { log.Panic("unexpected type for enum value", zap.Any("value", value)) } case mysql.TypeSet: - var ( - number uint64 - err error - ) switch v := value.(type) { case uint64: - number = v - case []uint8: - number, err = common.BinaryLiteralToInt(v) + setValue, err := tiTypes.ParseSetValue(ft.GetElems(), v) if err != nil { return "", cerror.WrapError(cerror.ErrEncodeFailed, err) } + return setValue.Name, nil + case []uint8: + return string(v), nil default: log.Panic("unexpected type for set value", zap.Any("value", value)) } - setValue, err := tiTypes.ParseSetValue(ft.GetElems(), number) - if err != nil { - return "", cerror.WrapError(cerror.ErrEncodeFailed, err) - } - return setValue.Name, nil case mysql.TypeBit: switch v := value.(type) { case []uint8: From 519f8b137168d24b3c360a8ce22bb456dd435cca Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Fri, 15 Dec 2023 14:20:12 +0800 Subject: [PATCH 24/24] fix build --- pkg/sink/codec/simple/decoder.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/sink/codec/simple/decoder.go b/pkg/sink/codec/simple/decoder.go index c09cfb6d5b5..5ff71e8bda0 100644 --- a/pkg/sink/codec/simple/decoder.go +++ b/pkg/sink/codec/simple/decoder.go @@ -21,7 +21,7 @@ import ( "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/storage" - "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tiflow/cdc/model" cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/sink/codec/common"