From 8da8a9104c6c5c6e92e7347f39313416a82b0412 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 28 Aug 2024 15:29:28 -0400 Subject: [PATCH 1/2] roachtest: remove COCKROACH_DISABLE_LEADER_FOLLOWS_LEASEHOLDER from failover Fixes #125259. This commit removes the use of COCKROACH_DISABLE_LEADER_FOLLOWS_LEASEHOLDER from the `failover/partial/lease-leader` tests. Instead of using the cluster setting to disable leader follows leaseholder, the tests now creates the kind of situation where leader and leaseholder could be split indefinitely when using epoch-based leases. This is more realistic and allows us to test lease+leader failover with leader leases when the leaseholder gets partitioned from its followers but remains connected to the liveness range. I confirmed that by default, both the epoch-based and expiration-based lease variants of this test still fail at 60s due to stuck requests. This is what we see in roachperf on master as well: - https://roachperf.crdb.dev/?filter=leader&view=failover%2Fpartial%2Flease-leader&tab=gce - https://roachperf.crdb.dev/?filter=leader&view=failover%2Fpartial%2Flease-leader%2Flease%3Dexpiration&tab=gce However, notice that the expiration-based variant temporarily improved when we enabled DistSender circuit breakers, which add a timeout to all KV requests. This is because the expiration-based lease setup can actually recover range availability, we just don't currently unwedge requests that were stuck before the recovery. This is of arguable importance, as most real-world users run with application side timeouts. Regardless, I see the same improvement if I re-enable DistSender circuit breakers. With them, `failover/partial/lease-leader` stays at 60s (infinite) recovery time while `failover/partial/lease-leader/lease=expiration` drops to recovering in ~20s. Release note: None --- pkg/cmd/roachtest/tests/failover.go | 59 ++++++++++++----------------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/pkg/cmd/roachtest/tests/failover.go b/pkg/cmd/roachtest/tests/failover.go index 1c98978a7b1c..2ff00e3d0c9a 100644 --- a/pkg/cmd/roachtest/tests/failover.go +++ b/pkg/cmd/roachtest/tests/failover.go @@ -526,10 +526,11 @@ func runFailoverPartialLeaseGateway(ctx context.Context, t test.Test, c cluster. // n1-n3: system and liveness ranges, SQL gateway // n4-n6: user ranges // -// The cluster runs with COCKROACH_DISABLE_LEADER_FOLLOWS_LEASEHOLDER, which -// will place Raft leaders and leases independently of each other. We can then -// assume that some number of user ranges will randomly have split leader/lease, -// and simply create partial partitions between each of n4-n6 in sequence. +// We then create partial partitions where one of n4-n6 is unable to reach the +// other two nodes but is still able to reach the liveness range. This will +// cause split leader/leaseholder scenarios if raft leadership fails over from a +// partitioned node but the lease does not. We expect this to be a problem for +// epoch-based leases, but not for other types of leases. // // We run a kv50 workload on SQL gateways and collect pMax latency for graphing. func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.Cluster) { @@ -537,11 +538,9 @@ func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.C rng, _ := randutil.NewTestRand() - // Create cluster, disabling leader/leaseholder colocation. We only start - // n1-n3, to precisely place system ranges, since we'll have to disable the - // replicate queue shortly. + // Create cluster. We only start n1-n3, to precisely place system ranges, + // since we'll have to disable the replicate queue shortly. settings := install.MakeClusterSettings() - settings.Env = append(settings.Env, "COCKROACH_DISABLE_LEADER_FOLLOWS_LEASEHOLDER=true") settings.Env = append(settings.Env, "COCKROACH_SCAN_MAX_IDLE_TIME=100ms") // speed up replication m := c.NewMonitor(ctx, c.CRDBNodes()) @@ -578,22 +577,6 @@ func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.C relocateRanges(t, ctx, conn, `database_name = 'kv'`, []int{1, 2, 3}, []int{4, 5, 6}) relocateRanges(t, ctx, conn, `database_name != 'kv'`, []int{4, 5, 6}, []int{1, 2, 3}) - // Check that we have a few split leaders/leaseholders on n4-n6. We give - // it a few seconds, since metrics are updated every 10 seconds. - for i := 0; ; i++ { - var count float64 - for _, node := range []int{4, 5, 6} { - count += nodeMetric(ctx, t, c, node, "replicas.leaders_not_leaseholders") - } - t.L().Printf("%.0f split leaders/leaseholders", count) - if count >= 3 { - break - } else if i >= 10 { - t.Fatalf("timed out waiting for 3 split leaders/leaseholders") - } - time.Sleep(time.Second) - } - // Run workload on n7 via n1-n3 gateways until test ends (context cancels). t.L().Printf("running workload") cancelWorkload := m.GoWithCancel(func(ctx context.Context) error { @@ -606,13 +589,21 @@ func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.C return err }) - // Start a worker to fail and recover partial partitions between each pair of - // n4-n6 for 3 cycles (9 failures total). + // Start a worker to fail and recover partial partitions between each of n4-n6 + // and the other two nodes for 3 cycles (9 failures total). m.Go(func(ctx context.Context) error { defer cancelWorkload() + nodes := []int{4, 5, 6} for i := 0; i < 3; i++ { - for _, node := range []int{4, 5, 6} { + for _, node := range nodes { + var peers []int + for _, peer := range nodes { + if peer != node { + peers = append(peers, peer) + } + } + sleepFor(ctx, t, time.Minute) // Ranges may occasionally escape their constraints. Move them to where @@ -626,17 +617,17 @@ func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.C failer.Ready(ctx, node) - peer := node + 1 - if peer > 6 { - peer = 4 + for _, peer := range peers { + t.L().Printf("failing n%d to n%d (%s lease/leader)", node, peer, failer) + failer.FailPartial(ctx, node, []int{peer}) } - t.L().Printf("failing n%d to n%d (%s lease/leader)", node, peer, failer) - failer.FailPartial(ctx, node, []int{peer}) sleepFor(ctx, t, time.Minute) - t.L().Printf("recovering n%d to n%d (%s lease/leader)", node, peer, failer) - failer.Recover(ctx, node) + for _, peer := range peers { + t.L().Printf("recovering n%d to n%d (%s lease/leader)", node, peer, failer) + failer.Recover(ctx, node) + } } } From 84f5cec54c808fec4234c7189171341a143f2c94 Mon Sep 17 00:00:00 2001 From: Naveen Setlur Date: Tue, 27 Aug 2024 16:00:40 -0400 Subject: [PATCH 2/2] logical: Add the EXCLUDE COLUMNS option to SHOW EXPERIMENTAL_FINGERPRINT Currently tables used for logical data replication can't be fingerprinted as they contain a column crdb_replication_origin_timestamp which will vary based on the internal MVCC timestamp. Adding an option to SHOW EXPERIMENTAL FINGERPRINT will allow LDR tables to be fingerprinted by excluding the replication column. A follow up will use this to verify tables in roachtests Release note: none Fixes: #129497 --- .../generated/sql/bnf/fingerprint_options.bnf | 1 + .../testdata/logic_test/show_fingerprints | 22 ++++++++ pkg/sql/parser/sql.y | 9 +++- pkg/sql/parser/testdata/show | 8 +++ pkg/sql/sem/tree/show.go | 24 ++++++++- pkg/sql/show_fingerprints.go | 53 +++++++++++++++---- 6 files changed, 102 insertions(+), 15 deletions(-) diff --git a/docs/generated/sql/bnf/fingerprint_options.bnf b/docs/generated/sql/bnf/fingerprint_options.bnf index 3f4acb04db4e..78b6983806d8 100644 --- a/docs/generated/sql/bnf/fingerprint_options.bnf +++ b/docs/generated/sql/bnf/fingerprint_options.bnf @@ -1,2 +1,3 @@ fingerprint_options ::= 'START' 'TIMESTAMP' '=' d_expr + | 'EXCLUDE' 'COLUMNS' '=' string_or_placeholder_opt_list diff --git a/pkg/sql/logictest/testdata/logic_test/show_fingerprints b/pkg/sql/logictest/testdata/logic_test/show_fingerprints index 0886643118e0..a8448f694f97 100644 --- a/pkg/sql/logictest/testdata/logic_test/show_fingerprints +++ b/pkg/sql/logictest/testdata/logic_test/show_fingerprints @@ -18,6 +18,28 @@ SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE t t_pkey -7903300865687235210 t_b_idx -5073888452016928166 +# Test excluded columns +query TT rowsort +SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE t WITH EXCLUDE COLUMNS = ('c') +---- +t_pkey -2938394162542358272 +t_b_idx -5073888452016928166 + +# Test multiple excluded columns +query TT rowsort +SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE t WITH EXCLUDE COLUMNS = ('a', 'b') +---- +t_pkey -3539648437866042702 +t_b_idx 590700560494856532 + +# START TIMESTAMP is only for VIRTUAL CLUSTERS +query error pgcode 22023 cannot use the START TIMESTAMP option when fingerprinting a table. +SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE t WITH START TIMESTAMP = '132412341234.000000' + +# EXCLUDE COLUMNS is only for tables +query error pgcode 22023 cannot use the EXCLUDE COLUMNS option when fingerprinting a tenant. +SHOW EXPERIMENTAL_FINGERPRINTS FROM VIRTUAL CLUSTER t WITH EXCLUDE COLUMNS = ('a', 'b') + # Test a partial index. We expect this index to have the same fingerprint # as t_b_idx since the predicate covers all values. statement ok diff --git a/pkg/sql/parser/sql.y b/pkg/sql/parser/sql.y index 13d1865d575c..5e634107e8e0 100644 --- a/pkg/sql/parser/sql.y +++ b/pkg/sql/parser/sql.y @@ -9634,10 +9634,10 @@ show_locality_stmt: } show_fingerprints_stmt: - SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE table_name + SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE table_name opt_with_show_fingerprints_options { /* SKIP DOC */ - $$.val = &tree.ShowFingerprints{Table: $5.unresolvedObjectName()} + $$.val = &tree.ShowFingerprints{Table: $5.unresolvedObjectName(), Options: *$6.showFingerprintOptions()} } | SHOW EXPERIMENTAL_FINGERPRINTS FROM virtual_cluster virtual_cluster_spec opt_with_show_fingerprints_options { @@ -9678,6 +9678,11 @@ fingerprint_options: { $$.val = &tree.ShowFingerprintOptions{StartTimestamp: $4.expr()} } +| EXCLUDE COLUMNS '=' string_or_placeholder_opt_list + { + $$.val = &tree.ShowFingerprintOptions{ExcludedUserColumns: $4.stringOrPlaceholderOptList()} + } + show_full_scans_stmt: diff --git a/pkg/sql/parser/testdata/show b/pkg/sql/parser/testdata/show index ea47c96d75ac..2a5d40e5369b 100644 --- a/pkg/sql/parser/testdata/show +++ b/pkg/sql/parser/testdata/show @@ -1323,6 +1323,14 @@ SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t -- fully parenthesized SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t -- literals removed SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE _._ -- identifiers removed +parse +SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t WITH START TIMESTAMP = '132412341234.000000', EXCLUDE COLUMNS = ('crdb_original_replication_timestamp', 'other_column') +---- +SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t WITH OPTIONS (START TIMESTAMP = '132412341234.000000', EXCLUDE COLUMNS = ('crdb_original_replication_timestamp', 'other_column')) -- normalized! +SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t WITH OPTIONS (START TIMESTAMP = ('132412341234.000000'), EXCLUDE COLUMNS = (('crdb_original_replication_timestamp'), ('other_column'))) -- fully parenthesized +SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t WITH OPTIONS (START TIMESTAMP = '_', EXCLUDE COLUMNS = ('_', '_')) -- literals removed +SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE _._ WITH OPTIONS (START TIMESTAMP = '132412341234.000000', EXCLUDE COLUMNS = ('crdb_original_replication_timestamp', 'other_column')) -- identifiers removed + parse SHOW EXPERIMENTAL_FINGERPRINTS FROM VIRTUAL CLUSTER t ---- diff --git a/pkg/sql/sem/tree/show.go b/pkg/sql/sem/tree/show.go index 6cadeedeb769..d70114e00131 100644 --- a/pkg/sql/sem/tree/show.go +++ b/pkg/sql/sem/tree/show.go @@ -1160,11 +1160,21 @@ func (node *ShowFingerprints) Format(ctx *FmtCtx) { // ShowFingerprintOptions describes options for the SHOW EXPERIMENTAL_FINGERPINT // execution. type ShowFingerprintOptions struct { - StartTimestamp Expr + StartTimestamp Expr + ExcludedUserColumns StringOrPlaceholderOptList } func (s *ShowFingerprintOptions) Format(ctx *FmtCtx) { + var addSep bool + maybeAddSep := func() { + if addSep { + ctx.WriteString(", ") + } + addSep = true + } + if s.StartTimestamp != nil { + maybeAddSep() ctx.WriteString("START TIMESTAMP = ") _, canOmitParentheses := s.StartTimestamp.(alreadyDelimitedAsSyntacticDExpr) if !canOmitParentheses { @@ -1175,6 +1185,11 @@ func (s *ShowFingerprintOptions) Format(ctx *FmtCtx) { ctx.WriteByte(')') } } + if s.ExcludedUserColumns != nil { + maybeAddSep() + ctx.WriteString("EXCLUDE COLUMNS = ") + s.ExcludedUserColumns.Format(ctx) + } } // CombineWith merges other TenantReplicationOptions into this struct. @@ -1188,13 +1203,18 @@ func (s *ShowFingerprintOptions) CombineWith(other *ShowFingerprintOptions) erro s.StartTimestamp = other.StartTimestamp } + var err error + s.ExcludedUserColumns, err = combineStringOrPlaceholderOptList(s.ExcludedUserColumns, other.ExcludedUserColumns, "excluded_user_columns") + if err != nil { + return err + } return nil } // IsDefault returns true if this backup options struct has default value. func (s ShowFingerprintOptions) IsDefault() bool { options := ShowFingerprintOptions{} - return s.StartTimestamp == options.StartTimestamp + return s.StartTimestamp == options.StartTimestamp && cmp.Equal(s.ExcludedUserColumns, options.ExcludedUserColumns) } var _ NodeFormatter = &ShowFingerprintOptions{} diff --git a/pkg/sql/show_fingerprints.go b/pkg/sql/show_fingerprints.go index d6516ac8a05d..225c3c927c49 100644 --- a/pkg/sql/show_fingerprints.go +++ b/pkg/sql/show_fingerprints.go @@ -23,6 +23,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/catalog" "github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo" "github.com/cockroachdb/cockroach/pkg/sql/clusterunique" + "github.com/cockroachdb/cockroach/pkg/sql/exprutil" "github.com/cockroachdb/cockroach/pkg/sql/isql" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" @@ -71,8 +72,27 @@ type showFingerprintsNode struct { func (p *planner) ShowFingerprints( ctx context.Context, n *tree.ShowFingerprints, ) (planNode, error) { + + op := "SHOW EXPERIMENTAL_FINGERPRINTS" + evalOptions, err := evalShowFingerprintOptions(ctx, n.Options, p.EvalContext(), p.SemaCtx(), + op, p.ExprEvaluator(op)) + if err != nil { + return nil, err + } + if n.TenantSpec != nil { - return p.planShowTenantFingerprint(ctx, n.TenantSpec, n.Options) + // Tenant fingerprints use the KV fingerprint method and can't exclude columns this way + if evalOptions.excludedUserColumns != nil { + err = pgerror.New(pgcode.InvalidParameterValue, "cannot use the EXCLUDE COLUMNS option when fingerprinting a tenant.") + return nil, err + } + return p.planShowTenantFingerprint(ctx, n.TenantSpec, evalOptions) + } + + // Only allow this for virtual clusters as it uses the KV fingerprint method instead of SQL + if !evalOptions.startTimestamp.IsEmpty() { + err = pgerror.New(pgcode.InvalidParameterValue, "cannot use the START TIMESTAMP option when fingerprinting a table.") + return nil, err } // We avoid the cache so that we can observe the fingerprints without @@ -91,19 +111,22 @@ func (p *planner) ShowFingerprints( columns: colinfo.ShowFingerprintsColumns, tableDesc: tableDesc, indexes: tableDesc.ActiveIndexes(), + options: evalOptions, }, nil } type resolvedShowTenantFingerprintOptions struct { - startTimestamp hlc.Timestamp + startTimestamp hlc.Timestamp + excludedUserColumns []string } -func evalShowTenantFingerprintOptions( +func evalShowFingerprintOptions( ctx context.Context, options tree.ShowFingerprintOptions, evalCtx *eval.Context, semaCtx *tree.SemaContext, op string, + eval exprutil.Evaluator, ) (*resolvedShowTenantFingerprintOptions, error) { r := &resolvedShowTenantFingerprintOptions{} if options.StartTimestamp != nil { @@ -114,11 +137,21 @@ func evalShowTenantFingerprintOptions( r.startTimestamp = ts } + if options.ExcludedUserColumns != nil { + cols, err := eval.StringArray( + ctx, tree.Exprs(options.ExcludedUserColumns)) + + if err != nil { + return nil, err + } + r.excludedUserColumns = cols + } + return r, nil } func (p *planner) planShowTenantFingerprint( - ctx context.Context, ts *tree.TenantSpec, options tree.ShowFingerprintOptions, + ctx context.Context, ts *tree.TenantSpec, evalOptions *resolvedShowTenantFingerprintOptions, ) (planNode, error) { if err := CanManageTenant(ctx, p); err != nil { return nil, err @@ -133,12 +166,6 @@ func (p *planner) planShowTenantFingerprint( return nil, err } - evalOptions, err := evalShowTenantFingerprintOptions(ctx, options, p.EvalContext(), p.SemaCtx(), - "SHOW EXPERIMENTAL_FINGERPRINTS FROM VIRTUAL CLUSTER") - if err != nil { - return nil, err - } - return &showFingerprintsNode{ columns: colinfo.ShowTenantFingerprintsColumns, tenantSpec: tspec, @@ -290,7 +317,11 @@ func (n *showFingerprintsNode) Next(params runParams) (bool, error) { return false, nil } index := n.indexes[n.run.rowIdx] - sql, err := BuildFingerprintQueryForIndex(n.tableDesc, index, []string{}) + excludedColumns := []string{} + if n.options != nil && len(n.options.excludedUserColumns) > 0 { + excludedColumns = append(excludedColumns, n.options.excludedUserColumns...) + } + sql, err := BuildFingerprintQueryForIndex(n.tableDesc, index, excludedColumns) if err != nil { return false, err }