Skip to content

Commit

Permalink
VDiff: Comment the VDiffCreate proto msg and set reasonable server si…
Browse files Browse the repository at this point in the history
…de defaults (#17026)

Signed-off-by: Matt Lord <[email protected]>
  • Loading branch information
mattlord authored Oct 22, 2024
1 parent 2203cfb commit 0f9a985
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 30 deletions.
16 changes: 9 additions & 7 deletions go/cmd/vtctldclient/command/vreplication/vdiff/vdiff.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,12 +417,14 @@ type summary struct {
RowsCompared int64
HasMismatch bool
Shards string
StartedAt string `json:"StartedAt,omitempty"`
CompletedAt string `json:"CompletedAt,omitempty"`
TableSummaryMap map[string]tableSummary `json:"TableSummary,omitempty"`
Reports map[string]map[string]vdiff.DiffReport `json:"Reports,omitempty"`
Errors map[string]string `json:"Errors,omitempty"`
Progress *vdiff.ProgressReport `json:"Progress,omitempty"`
StartedAt string `json:"StartedAt,omitempty"`
CompletedAt string `json:"CompletedAt,omitempty"`
TableSummaryMap map[string]tableSummary `json:"TableSummary,omitempty"`
// This is keyed by table name and then by shard name.
Reports map[string]map[string]vdiff.DiffReport `json:"Reports,omitempty"`
// This is keyed by shard name.
Errors map[string]string `json:"Errors,omitempty"`
Progress *vdiff.ProgressReport `json:"Progress,omitempty"`
}

const summaryTextTemplate = `
Expand Down Expand Up @@ -892,7 +894,7 @@ func registerCommands(root *cobra.Command) {
create.Flags().Int64Var(&createOptions.Limit, "limit", math.MaxInt64, "Max rows to stop comparing after.")
create.Flags().BoolVar(&createOptions.DebugQuery, "debug-query", false, "Adds a mysql query to the report that can be used for further debugging.")
create.Flags().Int64Var(&createOptions.MaxReportSampleRows, "max-report-sample-rows", 10, "Maximum number of row differences to report (0 for all differences). NOTE: when increasing this value it is highly recommended to also specify --only-pks")
create.Flags().BoolVar(&createOptions.OnlyPKs, "only-pks", false, "When reporting missing rows, only show primary keys in the report.")
create.Flags().BoolVar(&createOptions.OnlyPKs, "only-pks", false, "When reporting row differences, only show primary keys in the report.")
create.Flags().StringSliceVar(&createOptions.Tables, "tables", nil, "Only run vdiff for these tables in the workflow.")
create.Flags().Int64Var(&createOptions.MaxExtraRowsToCompare, "max-extra-rows-to-compare", 1000, "If there are collation differences between the source and target, you can have rows that are identical but simply returned in a different order from MySQL. We will do a second pass to compare the rows for any actual differences in this case and this flag allows you to control the resources used for this operation.")
create.Flags().BoolVar(&createOptions.Wait, "wait", false, "When creating or resuming a vdiff, wait for it to finish before exiting.")
Expand Down
102 changes: 80 additions & 22 deletions go/vt/proto/vtctldata/vtctldata.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion go/vt/vtctl/workflow/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1850,6 +1850,9 @@ func (s *Server) VDiffCreate(ctx context.Context, req *vtctldatapb.VDiffCreateRe

tabletTypesStr := discovery.BuildTabletTypesString(req.TabletTypes, req.TabletSelectionPreference)

if req.Limit == 0 { // This would produce no useful results
req.Limit = math.MaxInt64
}
// This is a pointer so there's no ZeroValue in the message
// and an older v18 client will not provide it.
if req.MaxDiffDuration == nil {
Expand All @@ -1859,7 +1862,10 @@ func (s *Server) VDiffCreate(ctx context.Context, req *vtctldatapb.VDiffCreateRe
// client should always provide them, but we check anyway to
// be safe.
if req.FilteredReplicationWaitTime == nil {
req.FilteredReplicationWaitTime = &vttimepb.Duration{}
// A value of 0 is not valid as the vdiff will never succeed.
req.FilteredReplicationWaitTime = &vttimepb.Duration{
Seconds: int64(DefaultTimeout.Seconds()),
}
}
if req.WaitUpdateInterval == nil {
req.WaitUpdateInterval = &vttimepb.Duration{}
Expand Down
58 changes: 58 additions & 0 deletions proto/vtctldata.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1896,27 +1896,85 @@ message ValidateVSchemaResponse {
}

message VDiffCreateRequest {
// The name of the workflow that we're diffing tables for.
string workflow = 1;
// The keyspace where the vreplication workflow is running.
string target_keyspace = 2;
// A unique identifier for the vdiff.
// If empty, a new UUID will be generated.
string uuid = 3;
// The cells to look for source tablets in.
// If empty, all cells are used.
repeated string source_cells = 4;
// The cells to look for target tablets in.
// If empty, all cells are used.
repeated string target_cells = 5;
// The tablet types to use when searching for tablets to use when streaming
// results.
// A default value of "replica,rdonly,primary" is used by the tablet picker.
repeated topodata.TabletType tablet_types = 6;
// When performing source tablet selection, look for candidates in the type
// order as they are listed in the tablet_types value (or the default of
// "replica,rdonly,primary" that the tablet picker uses).
// The default is ANY (0) and you can use INORDER (1) to e.g. ensure that a
// primary tablet is only used if there are no available replica or rdonly
// tablets.
tabletmanagerdata.TabletSelectionPreference tablet_selection_preference = 7;
// The tables to compare. If empty, all tables in the workflow are compared.
repeated string tables = 8;
// The maximum number of rows to compare for each table on each shard.
// The default is a max int64 value: 2^63 - 1 or 9,223,372,036,854,775,807.
int64 limit = 9;
// How long to wait for the relevant vreplication stream(s) to catch up when
// attempting to setup the table snapshots on the source and target to use for
// the diff on each shard.
// The default is 30s.
vttime.Duration filtered_replication_wait_time = 10;
// Include the MySQL query used for the diff in the report that is stored on
// each shard primary tablet in the _vt.vdiff_table records.
bool debug_query = 11;
// Only show the Primary Key columns in any row diff output. You would
// typically want to use this if you set the max_sample_rows very high.
bool only_p_ks = 12;
// Update the table statistics, using ANALYZE TABLE, on each table involved
// in the vdiff during initialization on each target shard. This will ensure
// that progress estimates are as accurate as possible -- but it does involve
// locks and can potentially impact query processing on the target keyspace.
bool update_table_stats = 13;
// If there are collation differences between the source and target, you can
// have rows that are identical but simply returned in a different order from
// MySQL. We will do a second pass to compare the rows for any actual
// differences in this case and this flag allows you to control the resources
// used for this operation.
// The default is 0, comparing no extra rows.
int64 max_extra_rows_to_compare = 14;
// Wait for the vdiff to complete before returning (making the call synchronous
// vs asynchronous by default).
bool wait = 15;
// When wait is true, this is how frequently the vdiff progress will be shown.
vttime.Duration wait_update_interval = 16;
// Automatically retry the vdiff if we encounter an error. This should almost
// always be set to true (default is false).
bool auto_retry = 17;
// Include additional information in the vdiff report that is produced and
// stored on each target shard primary's _vt sidecar database.
bool verbose = 18;
// The maximum number of rows to include in the row diff report (when
// differences are found) for each table on each shard.
// The default is 0, which will include no sample rows that differed.
int64 max_report_sample_rows = 19;
// The maximum time that a diff of a single table can run on each target shard
// before it is stopped and then later resumed from where we left off. This
// can be helpful in limiting the impact of holding open that large transaction
// where we scan up to every row in the table.
// The default is 0 or no limit.
vttime.Duration max_diff_duration = 20;
// At what length should we truncate the column values in the row diff report
// generated for each table on each shard when differences are detected.
// The default is 0, meaning do not truncate.
int64 row_diff_column_truncate_at = 21;
// Auto start the vdiff after creating it.
// The default is true if no value is specified.
optional bool auto_start = 22;
}

Expand Down

0 comments on commit 0f9a985

Please sign in to comment.