From 92cccfd7a015d3cc277b28b7334fd6bb81ee4309 Mon Sep 17 00:00:00 2001 From: CharlesCheung Date: Sat, 23 Dec 2023 16:48:33 +0800 Subject: [PATCH] use pd clock in storage sink --- cdc/api/v1/validator.go | 5 +- cdc/api/v2/api_helpers.go | 5 +- cdc/processor/sinkmanager/manager.go | 2 +- .../cloudstorage/cloud_storage_dml_sink.go | 6 +- .../cloud_storage_dml_sink_test.go | 21 ++++--- cdc/sink/dmlsink/cloudstorage/dml_worker.go | 6 +- .../dmlsink/cloudstorage/dml_worker_test.go | 4 +- cdc/sink/dmlsink/factory/factory.go | 4 +- cdc/sink/validator/validator.go | 4 +- cdc/sink/validator/validator_test.go | 8 +-- cmd/kafka-consumer/main.go | 2 +- cmd/pulsar-consumer/main.go | 2 +- cmd/storage-consumer/main.go | 1 + pkg/applier/redo.go | 2 +- pkg/errors/cdc_errors.go | 4 ++ pkg/pdutil/clock.go | 22 +++++++ pkg/sink/cloudstorage/path.go | 59 ++++++++++++------- pkg/sink/cloudstorage/path_test.go | 14 +++-- 18 files changed, 117 insertions(+), 54 deletions(-) diff --git a/cdc/api/v1/validator.go b/cdc/api/v1/validator.go index b75428d5ca9..a10060bff6f 100644 --- a/cdc/api/v1/validator.go +++ b/cdc/api/v1/validator.go @@ -173,7 +173,8 @@ func verifyCreateChangefeedConfig( } if err := validator.Validate(ctx, model.ChangeFeedID{Namespace: changefeedConfig.Namespace, ID: changefeedConfig.ID}, - info.SinkURI, info.Config); err != nil { + info.SinkURI, info.Config, up.PDClock, + ); err != nil { return nil, err } @@ -233,7 +234,7 @@ func VerifyUpdateChangefeedConfig(ctx context.Context, if err := validator.Validate(ctx, model.ChangeFeedID{Namespace: changefeedConfig.Namespace, ID: changefeedConfig.ID}, - newInfo.SinkURI, newInfo.Config); err != nil { + newInfo.SinkURI, newInfo.Config, nil); err != nil { return nil, cerror.ErrChangefeedUpdateRefused.GenWithStackByCause(err) } } diff --git a/cdc/api/v2/api_helpers.go b/cdc/api/v2/api_helpers.go index 6ac56d1f713..4f0bf88783b 100644 --- a/cdc/api/v2/api_helpers.go +++ b/cdc/api/v2/api_helpers.go @@ -235,7 +235,8 @@ func (APIV2HelpersImpl) verifyCreateChangefeedConfig( // verify sink if err := validator.Validate(ctx, model.ChangeFeedID{Namespace: cfg.Namespace, ID: cfg.ID}, - cfg.SinkURI, replicaCfg); err != nil { + cfg.SinkURI, replicaCfg, nil, + ); err != nil { return nil, err } @@ -361,7 +362,7 @@ func (APIV2HelpersImpl) verifyUpdateChangefeedConfig( if err := validator.Validate(ctx, model.ChangeFeedID{Namespace: cfg.Namespace, ID: cfg.ID}, - newInfo.SinkURI, newInfo.Config); err != nil { + newInfo.SinkURI, newInfo.Config, nil); err != nil { return nil, nil, cerror.ErrChangefeedUpdateRefused.GenWithStackByCause(err) } } diff --git a/cdc/processor/sinkmanager/manager.go b/cdc/processor/sinkmanager/manager.go index 56e2c76f755..06dce03e10c 100644 --- a/cdc/processor/sinkmanager/manager.go +++ b/cdc/processor/sinkmanager/manager.go @@ -352,7 +352,7 @@ func (m *SinkManager) initSinkFactory() (chan error, uint64) { return m.sinkFactory.errors, m.sinkFactory.version } - m.sinkFactory.f, err = factory.New(m.managerCtx, m.changefeedID, uri, cfg, m.sinkFactory.errors) + m.sinkFactory.f, err = factory.New(m.managerCtx, m.changefeedID, uri, cfg, m.sinkFactory.errors, m.up.PDClock) if err != nil { emitError(err) return m.sinkFactory.errors, m.sinkFactory.version diff --git a/cdc/sink/dmlsink/cloudstorage/cloud_storage_dml_sink.go b/cdc/sink/dmlsink/cloudstorage/cloud_storage_dml_sink.go index 0d14e1598f4..77b6c1a273a 100644 --- a/cdc/sink/dmlsink/cloudstorage/cloud_storage_dml_sink.go +++ b/cdc/sink/dmlsink/cloudstorage/cloud_storage_dml_sink.go @@ -28,10 +28,10 @@ import ( "github.com/pingcap/tiflow/cdc/sink/metrics" "github.com/pingcap/tiflow/cdc/sink/tablesink/state" "github.com/pingcap/tiflow/cdc/sink/util" - "github.com/pingcap/tiflow/engine/pkg/clock" "github.com/pingcap/tiflow/pkg/chann" "github.com/pingcap/tiflow/pkg/config" cerror "github.com/pingcap/tiflow/pkg/errors" + "github.com/pingcap/tiflow/pkg/pdutil" "github.com/pingcap/tiflow/pkg/sink" "github.com/pingcap/tiflow/pkg/sink/cloudstorage" "github.com/pingcap/tiflow/pkg/sink/codec/builder" @@ -103,6 +103,7 @@ type DMLSink struct { // NewDMLSink creates a cloud storage sink. func NewDMLSink(ctx context.Context, changefeedID model.ChangeFeedID, + pdClock pdutil.Clock, sinkURI *url.URL, replicaConfig *config.ReplicaConfig, errCh chan error, @@ -163,11 +164,10 @@ func NewDMLSink(ctx context.Context, } // create a group of dml workers. - clock := clock.New() for i := 0; i < cfg.WorkerCount; i++ { inputCh := chann.NewAutoDrainChann[eventFragment]() s.workers[i] = newDMLWorker(i, s.changefeedID, storage, cfg, ext, - inputCh, clock, s.statistics) + inputCh, pdClock, s.statistics) workerChannels[i] = inputCh } diff --git a/cdc/sink/dmlsink/cloudstorage/cloud_storage_dml_sink_test.go b/cdc/sink/dmlsink/cloudstorage/cloud_storage_dml_sink_test.go index b2c555346f1..3d6887006f5 100644 --- a/cdc/sink/dmlsink/cloudstorage/cloud_storage_dml_sink_test.go +++ b/cdc/sink/dmlsink/cloudstorage/cloud_storage_dml_sink_test.go @@ -31,13 +31,14 @@ import ( "github.com/pingcap/tiflow/cdc/sink/tablesink/state" "github.com/pingcap/tiflow/engine/pkg/clock" "github.com/pingcap/tiflow/pkg/config" + "github.com/pingcap/tiflow/pkg/pdutil" "github.com/pingcap/tiflow/pkg/util" "github.com/stretchr/testify/require" ) func setClock(s *DMLSink, clock clock.Clock) { for _, w := range s.workers { - w.filePathGenerator.SetClock(clock) + w.filePathGenerator.SetClock(pdutil.NewMonotonicClock(clock)) } } @@ -129,6 +130,7 @@ func TestCloudStorageWriteEventsWithoutDateSeparator(t *testing.T) { errCh := make(chan error, 5) s, err := NewDMLSink(ctx, model.DefaultChangeFeedID("test"), + pdutil.NewMonotonicClock(clock.New()), sinkURI, replicaConfig, errCh) require.Nil(t, err) var cnt uint64 = 0 @@ -197,11 +199,12 @@ func TestCloudStorageWriteEventsWithDateSeparator(t *testing.T) { replicaConfig.Sink.FileIndexWidth = util.AddressOf(6) errCh := make(chan error, 5) + mockClock := clock.NewMock() s, err := NewDMLSink(ctx, - model.DefaultChangeFeedID("test"), sinkURI, replicaConfig, errCh) + model.DefaultChangeFeedID("test"), + pdutil.NewMonotonicClock(mockClock), + sinkURI, replicaConfig, errCh) require.Nil(t, err) - mockClock := clock.NewMock() - setClock(s, mockClock) var cnt uint64 = 0 batch := 100 @@ -272,12 +275,14 @@ func TestCloudStorageWriteEventsWithDateSeparator(t *testing.T) { // test table is scheduled from one node to another cnt = 0 ctx, cancel = context.WithCancel(context.Background()) - s, err = NewDMLSink(ctx, - model.DefaultChangeFeedID("test"), sinkURI, replicaConfig, errCh) - require.Nil(t, err) + mockClock = clock.NewMock() mockClock.Set(time.Date(2023, 3, 9, 0, 1, 10, 0, time.UTC)) - setClock(s, mockClock) + s, err = NewDMLSink(ctx, + model.DefaultChangeFeedID("test"), + pdutil.NewMonotonicClock(mockClock), + sinkURI, replicaConfig, errCh) + require.Nil(t, err) err = s.WriteEvents(txns...) require.Nil(t, err) diff --git a/cdc/sink/dmlsink/cloudstorage/dml_worker.go b/cdc/sink/dmlsink/cloudstorage/dml_worker.go index f7040f7bdcf..60d9d64b6db 100644 --- a/cdc/sink/dmlsink/cloudstorage/dml_worker.go +++ b/cdc/sink/dmlsink/cloudstorage/dml_worker.go @@ -25,9 +25,9 @@ import ( "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/sink/metrics" mcloudstorage "github.com/pingcap/tiflow/cdc/sink/metrics/cloudstorage" - "github.com/pingcap/tiflow/engine/pkg/clock" "github.com/pingcap/tiflow/pkg/chann" "github.com/pingcap/tiflow/pkg/errors" + "github.com/pingcap/tiflow/pkg/pdutil" "github.com/pingcap/tiflow/pkg/sink/cloudstorage" "github.com/pingcap/tiflow/pkg/sink/codec/common" "github.com/prometheus/client_golang/prometheus" @@ -109,7 +109,7 @@ func newDMLWorker( config *cloudstorage.Config, extension string, inputCh *chann.DrainableChann[eventFragment], - clock clock.Clock, + pdClock pdutil.Clock, statistics *metrics.Statistics, ) *dmlWorker { d := &dmlWorker{ @@ -120,7 +120,7 @@ func newDMLWorker( inputCh: inputCh, toBeFlushedCh: make(chan batchedTask, 64), statistics: statistics, - filePathGenerator: cloudstorage.NewFilePathGenerator(config, storage, extension, clock), + filePathGenerator: cloudstorage.NewFilePathGenerator(changefeedID, config, storage, extension, pdClock), metricWriteBytes: mcloudstorage.CloudStorageWriteBytesGauge. WithLabelValues(changefeedID.Namespace, changefeedID.ID), metricFileCount: mcloudstorage.CloudStorageFileCountGauge. diff --git a/cdc/sink/dmlsink/cloudstorage/dml_worker_test.go b/cdc/sink/dmlsink/cloudstorage/dml_worker_test.go index c0e8ee47600..b202a20f255 100644 --- a/cdc/sink/dmlsink/cloudstorage/dml_worker_test.go +++ b/cdc/sink/dmlsink/cloudstorage/dml_worker_test.go @@ -30,6 +30,7 @@ import ( "github.com/pingcap/tiflow/engine/pkg/clock" "github.com/pingcap/tiflow/pkg/chann" "github.com/pingcap/tiflow/pkg/config" + "github.com/pingcap/tiflow/pkg/pdutil" "github.com/pingcap/tiflow/pkg/sink" "github.com/pingcap/tiflow/pkg/sink/cloudstorage" "github.com/pingcap/tiflow/pkg/sink/codec/common" @@ -52,8 +53,9 @@ func testDMLWorker(ctx context.Context, t *testing.T, dir string) *dmlWorker { statistics := metrics.NewStatistics(ctx, model.DefaultChangeFeedID("dml-worker-test"), sink.TxnSink) + pdlock := pdutil.NewMonotonicClock(clock.New()) d := newDMLWorker(1, model.DefaultChangeFeedID("dml-worker-test"), storage, - cfg, ".json", chann.NewAutoDrainChann[eventFragment](), clock.New(), statistics) + cfg, ".json", chann.NewAutoDrainChann[eventFragment](), pdlock, statistics) return d } diff --git a/cdc/sink/dmlsink/factory/factory.go b/cdc/sink/dmlsink/factory/factory.go index 5d520630fe9..530e078ee58 100644 --- a/cdc/sink/dmlsink/factory/factory.go +++ b/cdc/sink/dmlsink/factory/factory.go @@ -30,6 +30,7 @@ import ( "github.com/pingcap/tiflow/cdc/sink/tablesink" "github.com/pingcap/tiflow/pkg/config" cerror "github.com/pingcap/tiflow/pkg/errors" + "github.com/pingcap/tiflow/pkg/pdutil" "github.com/pingcap/tiflow/pkg/sink" "github.com/pingcap/tiflow/pkg/sink/kafka" v2 "github.com/pingcap/tiflow/pkg/sink/kafka/v2" @@ -70,6 +71,7 @@ func New( sinkURIStr string, cfg *config.ReplicaConfig, errCh chan error, + pdClock pdutil.Clock, ) (*SinkFactory, error) { sinkURI, err := url.Parse(sinkURIStr) if err != nil { @@ -100,7 +102,7 @@ func New( s.txnSink = mqs s.category = CategoryMQ case sink.S3Scheme, sink.FileScheme, sink.GCSScheme, sink.GSScheme, sink.AzblobScheme, sink.AzureScheme, sink.CloudStorageNoopScheme: - storageSink, err := cloudstorage.NewDMLSink(ctx, changefeedID, sinkURI, cfg, errCh) + storageSink, err := cloudstorage.NewDMLSink(ctx, changefeedID, pdClock, sinkURI, cfg, errCh) if err != nil { return nil, err } diff --git a/cdc/sink/validator/validator.go b/cdc/sink/validator/validator.go index e411ca5e2f0..161002e3381 100644 --- a/cdc/sink/validator/validator.go +++ b/cdc/sink/validator/validator.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/tiflow/cdc/sink/dmlsink/factory" "github.com/pingcap/tiflow/pkg/config" cerror "github.com/pingcap/tiflow/pkg/errors" + "github.com/pingcap/tiflow/pkg/pdutil" "github.com/pingcap/tiflow/pkg/sink" pmysql "github.com/pingcap/tiflow/pkg/sink/mysql" "github.com/pingcap/tiflow/pkg/util" @@ -32,6 +33,7 @@ import ( func Validate(ctx context.Context, changefeedID model.ChangeFeedID, sinkURI string, cfg *config.ReplicaConfig, + pdClock pdutil.Clock, ) error { uri, err := preCheckSinkURI(sinkURI) if err != nil { @@ -50,7 +52,7 @@ func Validate(ctx context.Context, } ctx, cancel := context.WithCancel(ctx) - s, err := factory.New(ctx, changefeedID, sinkURI, cfg, make(chan error)) + s, err := factory.New(ctx, changefeedID, sinkURI, cfg, make(chan error), pdClock) if err != nil { cancel() return err diff --git a/cdc/sink/validator/validator_test.go b/cdc/sink/validator/validator_test.go index 0394fac809d..ccfd8c125ff 100644 --- a/cdc/sink/validator/validator_test.go +++ b/cdc/sink/validator/validator_test.go @@ -101,26 +101,26 @@ func TestValidateSink(t *testing.T) { // test sink uri error sinkURI := "mysql://root:111@127.0.0.1:3306/" - err := Validate(ctx, model.DefaultChangeFeedID("test"), sinkURI, replicateConfig) + err := Validate(ctx, model.DefaultChangeFeedID("test"), sinkURI, replicateConfig, nil) require.NotNil(t, err) require.Contains(t, err.Error(), "fail to open MySQL connection") // test sink uri right sinkURI = "blackhole://" - err = Validate(ctx, model.DefaultChangeFeedID("test"), sinkURI, replicateConfig) + err = Validate(ctx, model.DefaultChangeFeedID("test"), sinkURI, replicateConfig, nil) require.Nil(t, err) // test bdr mode error replicateConfig.BDRMode = util.AddressOf(true) sinkURI = "blackhole://" - err = Validate(ctx, model.DefaultChangeFeedID("test"), sinkURI, replicateConfig) + err = Validate(ctx, model.DefaultChangeFeedID("test"), sinkURI, replicateConfig, nil) require.NotNil(t, err) require.Contains(t, err.Error(), "sink uri scheme is not supported in BDR mode") // test sink-scheme/syncpoint error replicateConfig.EnableSyncPoint = util.AddressOf(true) sinkURI = "kafka://" - err = Validate(ctx, model.DefaultChangeFeedID("test"), sinkURI, replicateConfig) + err = Validate(ctx, model.DefaultChangeFeedID("test"), sinkURI, replicateConfig, nil) require.NotNil(t, err) require.Contains( t, err.Error(), diff --git a/cmd/kafka-consumer/main.go b/cmd/kafka-consumer/main.go index bb953e948f0..493c2d74af7 100644 --- a/cmd/kafka-consumer/main.go +++ b/cmd/kafka-consumer/main.go @@ -494,7 +494,7 @@ func NewConsumer(ctx context.Context, o *consumerOption) (*Consumer, error) { zap.Int("quota", memoryQuotaPerPartition)) changefeedID := model.DefaultChangeFeedID("kafka-consumer") - f, err := eventsinkfactory.New(ctx, changefeedID, o.downstreamURI, config.GetDefaultReplicaConfig(), errChan) + f, err := eventsinkfactory.New(ctx, changefeedID, o.downstreamURI, config.GetDefaultReplicaConfig(), errChan, nil) if err != nil { cancel() return nil, cerror.Trace(err) diff --git a/cmd/pulsar-consumer/main.go b/cmd/pulsar-consumer/main.go index e8aa548795e..475507d301f 100644 --- a/cmd/pulsar-consumer/main.go +++ b/cmd/pulsar-consumer/main.go @@ -328,7 +328,7 @@ func NewConsumer(ctx context.Context, o *ConsumerOption) (*Consumer, error) { } changefeedID := model.DefaultChangeFeedID("pulsar-consumer") - f, err := eventsinkfactory.New(ctx, changefeedID, o.downstreamURI, config.GetDefaultReplicaConfig(), errChan) + f, err := eventsinkfactory.New(ctx, changefeedID, o.downstreamURI, config.GetDefaultReplicaConfig(), errChan, nil) if err != nil { cancel() return nil, errors.Trace(err) diff --git a/cmd/storage-consumer/main.go b/cmd/storage-consumer/main.go index 6b9c29bcfa9..d9d20f41a44 100644 --- a/cmd/storage-consumer/main.go +++ b/cmd/storage-consumer/main.go @@ -194,6 +194,7 @@ func newConsumer(ctx context.Context) (*consumer, error) { downstreamURIStr, config.GetDefaultReplicaConfig(), errCh, + nil, ) if err != nil { log.Error("failed to create event sink factory", zap.Error(err)) diff --git a/pkg/applier/redo.go b/pkg/applier/redo.go index 85ab309e716..708e5e44fb0 100644 --- a/pkg/applier/redo.go +++ b/pkg/applier/redo.go @@ -124,7 +124,7 @@ func (ra *RedoApplier) catchError(ctx context.Context) error { func (ra *RedoApplier) initSink(ctx context.Context) (err error) { replicaConfig := config.GetDefaultReplicaConfig() - ra.sinkFactory, err = dmlfactory.New(ctx, ra.changefeedID, ra.cfg.SinkURI, replicaConfig, ra.errCh) + ra.sinkFactory, err = dmlfactory.New(ctx, ra.changefeedID, ra.cfg.SinkURI, replicaConfig, ra.errCh, nil) if err != nil { return err } diff --git a/pkg/errors/cdc_errors.go b/pkg/errors/cdc_errors.go index bc4f1142b7b..c54f90d112a 100644 --- a/pkg/errors/cdc_errors.go +++ b/pkg/errors/cdc_errors.go @@ -933,6 +933,10 @@ var ( "invalid replica config, %s", errors.RFCCodeText("CDC:ErrInvalidReplicaConfig"), ) + ErrInternalCheckFailed = errors.Normalize( + "internal check failed, %s", + errors.RFCCodeText("CDC:ErrInternalCheckFailed"), + ) ErrHandleDDLFailed = errors.Normalize( "handle ddl failed, query: %s, startTs: %d. "+ diff --git a/pkg/pdutil/clock.go b/pkg/pdutil/clock.go index fa79ec4074b..58ef6cc415e 100644 --- a/pkg/pdutil/clock.go +++ b/pkg/pdutil/clock.go @@ -20,6 +20,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" + pclock "github.com/pingcap/tiflow/engine/pkg/clock" "github.com/pingcap/tiflow/pkg/retry" "github.com/tikv/client-go/v2/oracle" pd "github.com/tikv/pd/client" @@ -136,3 +137,24 @@ func (c *clock4Test) Run(ctx context.Context) { func (c *clock4Test) Stop() { } + +type monotonicClock struct { + clock pclock.Clock +} + +// NewMonotonicClock return a new monotonic clock. +func NewMonotonicClock(pClock pclock.Clock) Clock { + return &monotonicClock{ + clock: pClock, + } +} + +func (m *monotonicClock) CurrentTime() time.Time { + return m.clock.Now() +} + +func (c *monotonicClock) Run(ctx context.Context) { +} + +func (c *monotonicClock) Stop() { +} diff --git a/pkg/sink/cloudstorage/path.go b/pkg/sink/cloudstorage/path.go index d4e492d5be5..2217ff83079 100644 --- a/pkg/sink/cloudstorage/path.go +++ b/pkg/sink/cloudstorage/path.go @@ -33,6 +33,7 @@ import ( "github.com/pingcap/tiflow/pkg/config" "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/hash" + "github.com/pingcap/tiflow/pkg/pdutil" "github.com/pingcap/tiflow/pkg/util" "github.com/tikv/client-go/v2/oracle" "go.uber.org/zap" @@ -133,11 +134,12 @@ type VersionedTableName struct { // FilePathGenerator is used to generate data file path and index file path. type FilePathGenerator struct { - extension string - config *Config - clock clock.Clock - storage storage.ExternalStorage - fileIndex map[VersionedTableName]*indexWithDate + changefeedID model.ChangeFeedID + extension string + config *Config + pdClock pdutil.Clock + storage storage.ExternalStorage + fileIndex map[VersionedTableName]*indexWithDate hasher *hash.PositionInertia versionMap map[VersionedTableName]uint64 @@ -145,19 +147,27 @@ type FilePathGenerator struct { // NewFilePathGenerator creates a FilePathGenerator. func NewFilePathGenerator( + changefeedID model.ChangeFeedID, config *Config, storage storage.ExternalStorage, extension string, - clock clock.Clock, + pdclock pdutil.Clock, ) *FilePathGenerator { + if pdclock == nil { + pdclock = pdutil.NewMonotonicClock(clock.New()) + log.Warn("pd clock is not set in storage sink, use local clock instead", + zap.String("namespace", changefeedID.Namespace), + zap.String("changefeedID", changefeedID.ID)) + } return &FilePathGenerator{ - config: config, - extension: extension, - storage: storage, - clock: clock, - fileIndex: make(map[VersionedTableName]*indexWithDate), - hasher: hash.NewPositionInertia(), - versionMap: make(map[VersionedTableName]uint64), + changefeedID: changefeedID, + config: config, + extension: extension, + storage: storage, + pdClock: pdclock, + fileIndex: make(map[VersionedTableName]*indexWithDate), + hasher: hash.NewPositionInertia(), + versionMap: make(map[VersionedTableName]uint64), } } @@ -176,8 +186,12 @@ func (f *FilePathGenerator) CheckOrWriteSchema( def.FromTableInfo(tableInfo, table.TableInfoVersion, f.config.OutputColumnID) if !def.IsTableSchema() { // only check schema for table - log.Panic("invalid table schema", zap.Any("versionedTableName", table), + log.Error("invalid table schema", + zap.String("namespace", f.changefeedID.Namespace), + zap.String("changefeedID", f.changefeedID.ID), + zap.Any("versionedTableName", table), zap.Any("tableInfo", tableInfo)) + return errors.ErrInternalCheckFailed.GenWithStackByArgs("invalid table schema in FilePathGenerator") } // Case 1: point check if the schema file exists. @@ -210,10 +224,13 @@ func (f *FilePathGenerator) CheckOrWriteSchema( } version, parsedChecksum := mustParseSchemaName(path) if parsedChecksum != checksum { - // TODO: parsedChecksum should be ignored, remove this panic - // after the new path protocol is verified. - log.Panic("invalid schema file name", + log.Error("invalid schema file name", + zap.String("namespace", f.changefeedID.Namespace), + zap.String("changefeedID", f.changefeedID.ID), zap.String("path", path), zap.Any("checksum", checksum)) + errMsg := fmt.Sprintf("invalid schema filename in storage sink, "+ + "expected checksum: %d, actual checksum: %d", checksum, parsedChecksum) + return errors.ErrInternalCheckFailed.GenWithStackByArgs(errMsg) } if version > lastVersion { lastVersion = version @@ -235,6 +252,8 @@ func (f *FilePathGenerator) CheckOrWriteSchema( // b. the schema file is deleted by the consumer. We write schema file to external storage too. if schemaFileCnt != 0 && lastVersion == 0 { log.Warn("no table schema file found in an non-empty meta path", + zap.String("namespace", f.changefeedID.Namespace), + zap.String("changefeedID", f.changefeedID.ID), zap.Any("versionedTableName", table), zap.Uint32("checksum", checksum)) } @@ -247,8 +266,8 @@ func (f *FilePathGenerator) CheckOrWriteSchema( } // SetClock is used for unit test -func (f *FilePathGenerator) SetClock(clock clock.Clock) { - f.clock = clock +func (f *FilePathGenerator) SetClock(pdClock pdutil.Clock) { + f.pdClock = pdClock } // GenerateDateStr generates a date string base on current time @@ -256,7 +275,7 @@ func (f *FilePathGenerator) SetClock(clock clock.Clock) { func (f *FilePathGenerator) GenerateDateStr() string { var dateStr string - currTime := f.clock.Now() + currTime := f.pdClock.CurrentTime() switch f.config.DateSeparator { case config.DateSeparatorYear.String(): dateStr = currTime.Format("2006") diff --git a/pkg/sink/cloudstorage/path_test.go b/pkg/sink/cloudstorage/path_test.go index 973840dba95..78980d27a48 100644 --- a/pkg/sink/cloudstorage/path_test.go +++ b/pkg/sink/cloudstorage/path_test.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/engine/pkg/clock" "github.com/pingcap/tiflow/pkg/config" + "github.com/pingcap/tiflow/pkg/pdutil" "github.com/pingcap/tiflow/pkg/util" "github.com/stretchr/testify/require" "github.com/tikv/client-go/v2/oracle" @@ -49,7 +50,7 @@ func testFilePathGenerator(ctx context.Context, t *testing.T, dir string) *FileP err = cfg.Apply(ctx, sinkURI, replicaConfig) require.NoError(t, err) - f := NewFilePathGenerator(cfg, storage, ".json", clock.New()) + f := NewFilePathGenerator(model.ChangeFeedID{}, cfg, storage, ".json", pdutil.NewMonotonicClock(clock.New())) return f } @@ -84,7 +85,7 @@ func TestGenerateDataFilePath(t *testing.T) { f = testFilePathGenerator(ctx, t, dir) f.versionMap[table] = table.TableInfoVersion f.config.DateSeparator = config.DateSeparatorYear.String() - f.clock = mockClock + f.SetClock(pdutil.NewMonotonicClock(mockClock)) mockClock.Set(time.Date(2022, 12, 31, 23, 59, 59, 0, time.UTC)) date = f.GenerateDateStr() path, err = f.GenerateDataFilePath(ctx, table, date) @@ -108,7 +109,8 @@ func TestGenerateDataFilePath(t *testing.T) { f = testFilePathGenerator(ctx, t, dir) f.versionMap[table] = table.TableInfoVersion f.config.DateSeparator = config.DateSeparatorMonth.String() - f.clock = mockClock + f.SetClock(pdutil.NewMonotonicClock(mockClock)) + mockClock.Set(time.Date(2022, 12, 31, 23, 59, 59, 0, time.UTC)) date = f.GenerateDateStr() path, err = f.GenerateDataFilePath(ctx, table, date) @@ -132,7 +134,8 @@ func TestGenerateDataFilePath(t *testing.T) { f = testFilePathGenerator(ctx, t, dir) f.versionMap[table] = table.TableInfoVersion f.config.DateSeparator = config.DateSeparatorDay.String() - f.clock = mockClock + f.SetClock(pdutil.NewMonotonicClock(mockClock)) + mockClock.Set(time.Date(2022, 12, 31, 23, 59, 59, 0, time.UTC)) date = f.GenerateDateStr() path, err = f.GenerateDataFilePath(ctx, table, date) @@ -210,7 +213,8 @@ func TestGenerateDataFilePathWithIndexFile(t *testing.T) { f := testFilePathGenerator(ctx, t, dir) mockClock := clock.NewMock() f.config.DateSeparator = config.DateSeparatorDay.String() - f.clock = mockClock + f.SetClock(pdutil.NewMonotonicClock(mockClock)) + mockClock.Set(time.Date(2023, 3, 9, 23, 59, 59, 0, time.UTC)) table := VersionedTableName{ TableNameWithPhysicTableID: model.TableName{