From 0f9a985f2ffcf6c58b81d97e8b488bd8c64f4c05 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Tue, 22 Oct 2024 17:08:48 -0400 Subject: [PATCH] VDiff: Comment the VDiffCreate proto msg and set reasonable server side defaults (#17026) Signed-off-by: Matt Lord --- .../command/vreplication/vdiff/vdiff.go | 16 +-- go/vt/proto/vtctldata/vtctldata.pb.go | 102 ++++++++++++++---- go/vt/vtctl/workflow/server.go | 8 +- proto/vtctldata.proto | 58 ++++++++++ 4 files changed, 154 insertions(+), 30 deletions(-) diff --git a/go/cmd/vtctldclient/command/vreplication/vdiff/vdiff.go b/go/cmd/vtctldclient/command/vreplication/vdiff/vdiff.go index 2a4143172a7..54b2eec0840 100644 --- a/go/cmd/vtctldclient/command/vreplication/vdiff/vdiff.go +++ b/go/cmd/vtctldclient/command/vreplication/vdiff/vdiff.go @@ -417,12 +417,14 @@ type summary struct { RowsCompared int64 HasMismatch bool Shards string - StartedAt string `json:"StartedAt,omitempty"` - CompletedAt string `json:"CompletedAt,omitempty"` - TableSummaryMap map[string]tableSummary `json:"TableSummary,omitempty"` - Reports map[string]map[string]vdiff.DiffReport `json:"Reports,omitempty"` - Errors map[string]string `json:"Errors,omitempty"` - Progress *vdiff.ProgressReport `json:"Progress,omitempty"` + StartedAt string `json:"StartedAt,omitempty"` + CompletedAt string `json:"CompletedAt,omitempty"` + TableSummaryMap map[string]tableSummary `json:"TableSummary,omitempty"` + // This is keyed by table name and then by shard name. + Reports map[string]map[string]vdiff.DiffReport `json:"Reports,omitempty"` + // This is keyed by shard name. + Errors map[string]string `json:"Errors,omitempty"` + Progress *vdiff.ProgressReport `json:"Progress,omitempty"` } const summaryTextTemplate = ` @@ -892,7 +894,7 @@ func registerCommands(root *cobra.Command) { create.Flags().Int64Var(&createOptions.Limit, "limit", math.MaxInt64, "Max rows to stop comparing after.") create.Flags().BoolVar(&createOptions.DebugQuery, "debug-query", false, "Adds a mysql query to the report that can be used for further debugging.") create.Flags().Int64Var(&createOptions.MaxReportSampleRows, "max-report-sample-rows", 10, "Maximum number of row differences to report (0 for all differences). NOTE: when increasing this value it is highly recommended to also specify --only-pks") - create.Flags().BoolVar(&createOptions.OnlyPKs, "only-pks", false, "When reporting missing rows, only show primary keys in the report.") + create.Flags().BoolVar(&createOptions.OnlyPKs, "only-pks", false, "When reporting row differences, only show primary keys in the report.") create.Flags().StringSliceVar(&createOptions.Tables, "tables", nil, "Only run vdiff for these tables in the workflow.") create.Flags().Int64Var(&createOptions.MaxExtraRowsToCompare, "max-extra-rows-to-compare", 1000, "If there are collation differences between the source and target, you can have rows that are identical but simply returned in a different order from MySQL. We will do a second pass to compare the rows for any actual differences in this case and this flag allows you to control the resources used for this operation.") create.Flags().BoolVar(&createOptions.Wait, "wait", false, "When creating or resuming a vdiff, wait for it to finish before exiting.") diff --git a/go/vt/proto/vtctldata/vtctldata.pb.go b/go/vt/proto/vtctldata/vtctldata.pb.go index 585fb93ce35..d9697e29a21 100644 --- a/go/vt/proto/vtctldata/vtctldata.pb.go +++ b/go/vt/proto/vtctldata/vtctldata.pb.go @@ -14580,28 +14580,86 @@ type VDiffCreateRequest struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Workflow string `protobuf:"bytes,1,opt,name=workflow,proto3" json:"workflow,omitempty"` - TargetKeyspace string `protobuf:"bytes,2,opt,name=target_keyspace,json=targetKeyspace,proto3" json:"target_keyspace,omitempty"` - Uuid string `protobuf:"bytes,3,opt,name=uuid,proto3" json:"uuid,omitempty"` - SourceCells []string `protobuf:"bytes,4,rep,name=source_cells,json=sourceCells,proto3" json:"source_cells,omitempty"` - TargetCells []string `protobuf:"bytes,5,rep,name=target_cells,json=targetCells,proto3" json:"target_cells,omitempty"` - TabletTypes []topodata.TabletType `protobuf:"varint,6,rep,packed,name=tablet_types,json=tabletTypes,proto3,enum=topodata.TabletType" json:"tablet_types,omitempty"` - TabletSelectionPreference tabletmanagerdata.TabletSelectionPreference `protobuf:"varint,7,opt,name=tablet_selection_preference,json=tabletSelectionPreference,proto3,enum=tabletmanagerdata.TabletSelectionPreference" json:"tablet_selection_preference,omitempty"` - Tables []string `protobuf:"bytes,8,rep,name=tables,proto3" json:"tables,omitempty"` - Limit int64 `protobuf:"varint,9,opt,name=limit,proto3" json:"limit,omitempty"` - FilteredReplicationWaitTime *vttime.Duration `protobuf:"bytes,10,opt,name=filtered_replication_wait_time,json=filteredReplicationWaitTime,proto3" json:"filtered_replication_wait_time,omitempty"` - DebugQuery bool `protobuf:"varint,11,opt,name=debug_query,json=debugQuery,proto3" json:"debug_query,omitempty"` - OnlyPKs bool `protobuf:"varint,12,opt,name=only_p_ks,json=onlyPKs,proto3" json:"only_p_ks,omitempty"` - UpdateTableStats bool `protobuf:"varint,13,opt,name=update_table_stats,json=updateTableStats,proto3" json:"update_table_stats,omitempty"` - MaxExtraRowsToCompare int64 `protobuf:"varint,14,opt,name=max_extra_rows_to_compare,json=maxExtraRowsToCompare,proto3" json:"max_extra_rows_to_compare,omitempty"` - Wait bool `protobuf:"varint,15,opt,name=wait,proto3" json:"wait,omitempty"` - WaitUpdateInterval *vttime.Duration `protobuf:"bytes,16,opt,name=wait_update_interval,json=waitUpdateInterval,proto3" json:"wait_update_interval,omitempty"` - AutoRetry bool `protobuf:"varint,17,opt,name=auto_retry,json=autoRetry,proto3" json:"auto_retry,omitempty"` - Verbose bool `protobuf:"varint,18,opt,name=verbose,proto3" json:"verbose,omitempty"` - MaxReportSampleRows int64 `protobuf:"varint,19,opt,name=max_report_sample_rows,json=maxReportSampleRows,proto3" json:"max_report_sample_rows,omitempty"` - MaxDiffDuration *vttime.Duration `protobuf:"bytes,20,opt,name=max_diff_duration,json=maxDiffDuration,proto3" json:"max_diff_duration,omitempty"` - RowDiffColumnTruncateAt int64 `protobuf:"varint,21,opt,name=row_diff_column_truncate_at,json=rowDiffColumnTruncateAt,proto3" json:"row_diff_column_truncate_at,omitempty"` - AutoStart *bool `protobuf:"varint,22,opt,name=auto_start,json=autoStart,proto3,oneof" json:"auto_start,omitempty"` + // The name of the workflow that we're diffing tables for. + Workflow string `protobuf:"bytes,1,opt,name=workflow,proto3" json:"workflow,omitempty"` + // The keyspace where the vreplication workflow is running. + TargetKeyspace string `protobuf:"bytes,2,opt,name=target_keyspace,json=targetKeyspace,proto3" json:"target_keyspace,omitempty"` + // A unique identifier for the vdiff. + // If empty, a new UUID will be generated. + Uuid string `protobuf:"bytes,3,opt,name=uuid,proto3" json:"uuid,omitempty"` + // The cells to look for source tablets in. + // If empty, all cells are used. + SourceCells []string `protobuf:"bytes,4,rep,name=source_cells,json=sourceCells,proto3" json:"source_cells,omitempty"` + // The cells to look for target tablets in. + // If empty, all cells are used. + TargetCells []string `protobuf:"bytes,5,rep,name=target_cells,json=targetCells,proto3" json:"target_cells,omitempty"` + // The tablet types to use when searching for tablets to use when streaming + // results. + // A default value of "replica,rdonly,primary" is used by the tablet picker. + TabletTypes []topodata.TabletType `protobuf:"varint,6,rep,packed,name=tablet_types,json=tabletTypes,proto3,enum=topodata.TabletType" json:"tablet_types,omitempty"` + // When performing source tablet selection, look for candidates in the type + // order as they are listed in the tablet_types value (or the default of + // "replica,rdonly,primary" that the tablet picker uses). + // The default is ANY (0) and you can use INORDER (1) to e.g. ensure that a + // primary tablet is only used if there are no available replica or rdonly + // tablets. + TabletSelectionPreference tabletmanagerdata.TabletSelectionPreference `protobuf:"varint,7,opt,name=tablet_selection_preference,json=tabletSelectionPreference,proto3,enum=tabletmanagerdata.TabletSelectionPreference" json:"tablet_selection_preference,omitempty"` + // The tables to compare. If empty, all tables in the workflow are compared. + Tables []string `protobuf:"bytes,8,rep,name=tables,proto3" json:"tables,omitempty"` + // The maximum number of rows to compare for each table on each shard. + // The default is a max int64 value: 2^63 - 1 or 9,223,372,036,854,775,807. + Limit int64 `protobuf:"varint,9,opt,name=limit,proto3" json:"limit,omitempty"` + // How long to wait for the relevant vreplication stream(s) to catch up when + // attempting to setup the table snapshots on the source and target to use for + // the diff on each shard. + // The default is 30s. + FilteredReplicationWaitTime *vttime.Duration `protobuf:"bytes,10,opt,name=filtered_replication_wait_time,json=filteredReplicationWaitTime,proto3" json:"filtered_replication_wait_time,omitempty"` + // Include the MySQL query used for the diff in the report that is stored on + // each shard primary tablet in the _vt.vdiff_table records. + DebugQuery bool `protobuf:"varint,11,opt,name=debug_query,json=debugQuery,proto3" json:"debug_query,omitempty"` + // Only show the Primary Key columns in any row diff output. You would + // typically want to use this if you set the max_sample_rows very high. + OnlyPKs bool `protobuf:"varint,12,opt,name=only_p_ks,json=onlyPKs,proto3" json:"only_p_ks,omitempty"` + // Update the table statistics, using ANALYZE TABLE, on each table involved + // in the vdiff during initialization on each target shard. This will ensure + // that progress estimates are as accurate as possible -- but it does involve + // locks and can potentially impact query processing on the target keyspace. + UpdateTableStats bool `protobuf:"varint,13,opt,name=update_table_stats,json=updateTableStats,proto3" json:"update_table_stats,omitempty"` + // If there are collation differences between the source and target, you can + // have rows that are identical but simply returned in a different order from + // MySQL. We will do a second pass to compare the rows for any actual + // differences in this case and this flag allows you to control the resources + // used for this operation. + // The default is 0, comparing no extra rows. + MaxExtraRowsToCompare int64 `protobuf:"varint,14,opt,name=max_extra_rows_to_compare,json=maxExtraRowsToCompare,proto3" json:"max_extra_rows_to_compare,omitempty"` + // Wait for the vdiff to complete before returning (making the call synchronous + // vs asynchronous by default). + Wait bool `protobuf:"varint,15,opt,name=wait,proto3" json:"wait,omitempty"` + // When wait is true, this is how frequently the vdiff progress will be shown. + WaitUpdateInterval *vttime.Duration `protobuf:"bytes,16,opt,name=wait_update_interval,json=waitUpdateInterval,proto3" json:"wait_update_interval,omitempty"` + // Automatically retry the vdiff if we encounter an error. This should almost + // always be set to true (default is false). + AutoRetry bool `protobuf:"varint,17,opt,name=auto_retry,json=autoRetry,proto3" json:"auto_retry,omitempty"` + // Include additional information in the vdiff report that is produced and + // stored on each target shard primary's _vt sidecar database. + Verbose bool `protobuf:"varint,18,opt,name=verbose,proto3" json:"verbose,omitempty"` + // The maximum number of rows to include in the row diff report (when + // differences are found) for each table on each shard. + // The default is 0, which will include no sample rows that differed. + MaxReportSampleRows int64 `protobuf:"varint,19,opt,name=max_report_sample_rows,json=maxReportSampleRows,proto3" json:"max_report_sample_rows,omitempty"` + // The maximum time that a diff of a single table can run on each target shard + // before it is stopped and then later resumed from where we left off. This + // can be helpful in limiting the impact of holding open that large transaction + // where we scan up to every row in the table. + // The default is 0 or no limit. + MaxDiffDuration *vttime.Duration `protobuf:"bytes,20,opt,name=max_diff_duration,json=maxDiffDuration,proto3" json:"max_diff_duration,omitempty"` + // At what length should we truncate the column values in the row diff report + // generated for each table on each shard when differences are detected. + // The default is 0, meaning do not truncate. + RowDiffColumnTruncateAt int64 `protobuf:"varint,21,opt,name=row_diff_column_truncate_at,json=rowDiffColumnTruncateAt,proto3" json:"row_diff_column_truncate_at,omitempty"` + // Auto start the vdiff after creating it. + // The default is true if no value is specified. + AutoStart *bool `protobuf:"varint,22,opt,name=auto_start,json=autoStart,proto3,oneof" json:"auto_start,omitempty"` } func (x *VDiffCreateRequest) Reset() { diff --git a/go/vt/vtctl/workflow/server.go b/go/vt/vtctl/workflow/server.go index a9242016f40..4a7d5ed7725 100644 --- a/go/vt/vtctl/workflow/server.go +++ b/go/vt/vtctl/workflow/server.go @@ -1850,6 +1850,9 @@ func (s *Server) VDiffCreate(ctx context.Context, req *vtctldatapb.VDiffCreateRe tabletTypesStr := discovery.BuildTabletTypesString(req.TabletTypes, req.TabletSelectionPreference) + if req.Limit == 0 { // This would produce no useful results + req.Limit = math.MaxInt64 + } // This is a pointer so there's no ZeroValue in the message // and an older v18 client will not provide it. if req.MaxDiffDuration == nil { @@ -1859,7 +1862,10 @@ func (s *Server) VDiffCreate(ctx context.Context, req *vtctldatapb.VDiffCreateRe // client should always provide them, but we check anyway to // be safe. if req.FilteredReplicationWaitTime == nil { - req.FilteredReplicationWaitTime = &vttimepb.Duration{} + // A value of 0 is not valid as the vdiff will never succeed. + req.FilteredReplicationWaitTime = &vttimepb.Duration{ + Seconds: int64(DefaultTimeout.Seconds()), + } } if req.WaitUpdateInterval == nil { req.WaitUpdateInterval = &vttimepb.Duration{} diff --git a/proto/vtctldata.proto b/proto/vtctldata.proto index 406503a4be3..a77c28c1e99 100644 --- a/proto/vtctldata.proto +++ b/proto/vtctldata.proto @@ -1896,27 +1896,85 @@ message ValidateVSchemaResponse { } message VDiffCreateRequest { + // The name of the workflow that we're diffing tables for. string workflow = 1; + // The keyspace where the vreplication workflow is running. string target_keyspace = 2; + // A unique identifier for the vdiff. + // If empty, a new UUID will be generated. string uuid = 3; + // The cells to look for source tablets in. + // If empty, all cells are used. repeated string source_cells = 4; + // The cells to look for target tablets in. + // If empty, all cells are used. repeated string target_cells = 5; + // The tablet types to use when searching for tablets to use when streaming + // results. + // A default value of "replica,rdonly,primary" is used by the tablet picker. repeated topodata.TabletType tablet_types = 6; + // When performing source tablet selection, look for candidates in the type + // order as they are listed in the tablet_types value (or the default of + // "replica,rdonly,primary" that the tablet picker uses). + // The default is ANY (0) and you can use INORDER (1) to e.g. ensure that a + // primary tablet is only used if there are no available replica or rdonly + // tablets. tabletmanagerdata.TabletSelectionPreference tablet_selection_preference = 7; + // The tables to compare. If empty, all tables in the workflow are compared. repeated string tables = 8; + // The maximum number of rows to compare for each table on each shard. + // The default is a max int64 value: 2^63 - 1 or 9,223,372,036,854,775,807. int64 limit = 9; + // How long to wait for the relevant vreplication stream(s) to catch up when + // attempting to setup the table snapshots on the source and target to use for + // the diff on each shard. + // The default is 30s. vttime.Duration filtered_replication_wait_time = 10; + // Include the MySQL query used for the diff in the report that is stored on + // each shard primary tablet in the _vt.vdiff_table records. bool debug_query = 11; + // Only show the Primary Key columns in any row diff output. You would + // typically want to use this if you set the max_sample_rows very high. bool only_p_ks = 12; + // Update the table statistics, using ANALYZE TABLE, on each table involved + // in the vdiff during initialization on each target shard. This will ensure + // that progress estimates are as accurate as possible -- but it does involve + // locks and can potentially impact query processing on the target keyspace. bool update_table_stats = 13; + // If there are collation differences between the source and target, you can + // have rows that are identical but simply returned in a different order from + // MySQL. We will do a second pass to compare the rows for any actual + // differences in this case and this flag allows you to control the resources + // used for this operation. + // The default is 0, comparing no extra rows. int64 max_extra_rows_to_compare = 14; + // Wait for the vdiff to complete before returning (making the call synchronous + // vs asynchronous by default). bool wait = 15; + // When wait is true, this is how frequently the vdiff progress will be shown. vttime.Duration wait_update_interval = 16; + // Automatically retry the vdiff if we encounter an error. This should almost + // always be set to true (default is false). bool auto_retry = 17; + // Include additional information in the vdiff report that is produced and + // stored on each target shard primary's _vt sidecar database. bool verbose = 18; + // The maximum number of rows to include in the row diff report (when + // differences are found) for each table on each shard. + // The default is 0, which will include no sample rows that differed. int64 max_report_sample_rows = 19; + // The maximum time that a diff of a single table can run on each target shard + // before it is stopped and then later resumed from where we left off. This + // can be helpful in limiting the impact of holding open that large transaction + // where we scan up to every row in the table. + // The default is 0 or no limit. vttime.Duration max_diff_duration = 20; + // At what length should we truncate the column values in the row diff report + // generated for each table on each shard when differences are detected. + // The default is 0, meaning do not truncate. int64 row_diff_column_truncate_at = 21; + // Auto start the vdiff after creating it. + // The default is true if no value is specified. optional bool auto_start = 22; }