Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
129766: logical: Add the EXCLUDE COLUMNS option to SHOW EXPERIMENTAL_FI… r=msbutler a=navsetlur

…NGERPRINT

Currently tables used for logical data replication can't be fingerprinted
as they contain a column crdb_replication_origin_timestamp which will vary
based on the internal MVCC timestamp. Adding an option to SHOW
EXPERIMENTAL FINGERPRINT will allow LDR tables to be fingerprinted by excluding
the replication column. A follow up will use this to verify tables in LDR roachtests

Release note: none
Fixes: cockroachdb#129497

129844: roachtest: remove COCKROACH_DISABLE_LEADER_FOLLOWS_LEASEHOLDER from `failover` tests r=nvanbenschoten a=nvanbenschoten

Fixes cockroachdb#125259.

This commit removes the use of COCKROACH_DISABLE_LEADER_FOLLOWS_LEASEHOLDER from the `failover/partial/lease-leader` tests. Instead of using the cluster setting to disable leader follows leaseholder, the tests now creates the kind of situation where leader and leaseholder could be split indefinitely when using epoch-based leases.

This is more realistic and allows us to test lease+leader failover with leader leases when the leaseholder gets partitioned from its followers but remains connected to the liveness range.

I confirmed that by default, both the epoch-based and expiration-based lease variants of this test still fail at 60s due to stuck requests. This is what we see in roachperf on master as well:
- https://roachperf.crdb.dev/?filter=leader&view=failover%2Fpartial%2Flease-leader&tab=gce
- https://roachperf.crdb.dev/?filter=leader&view=failover%2Fpartial%2Flease-leader%2Flease%3Dexpiration&tab=gce

However, notice that the expiration-based variant temporarily improved when we enabled DistSender circuit breakers, which add a timeout to all KV requests. This is because the expiration-based lease setup can actually recover range availability, we just don't currently unwedge requests that were stuck before the recovery. This is of arguable importance, as most real-world users run with application side timeouts. Regardless, I see the same improvement if I re-enable DistSender circuit breakers. With them, `failover/partial/lease-leader` stays at 60s (infinite) recovery time while `failover/partial/lease-leader/lease=expiration` drops to recovering in ~20s.

Release note: None

Co-authored-by: Naveen Setlur <[email protected]>
Co-authored-by: Nathan VanBenschoten <[email protected]>
  • Loading branch information
3 people committed Aug 29, 2024
3 parents 91faa66 + 84f5cec + 8da8a91 commit 175b0f6
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 49 deletions.
1 change: 1 addition & 0 deletions docs/generated/sql/bnf/fingerprint_options.bnf
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
fingerprint_options ::=
'START' 'TIMESTAMP' '=' d_expr
| 'EXCLUDE' 'COLUMNS' '=' string_or_placeholder_opt_list
59 changes: 25 additions & 34 deletions pkg/cmd/roachtest/tests/failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -526,22 +526,21 @@ func runFailoverPartialLeaseGateway(ctx context.Context, t test.Test, c cluster.
// n1-n3: system and liveness ranges, SQL gateway
// n4-n6: user ranges
//
// The cluster runs with COCKROACH_DISABLE_LEADER_FOLLOWS_LEASEHOLDER, which
// will place Raft leaders and leases independently of each other. We can then
// assume that some number of user ranges will randomly have split leader/lease,
// and simply create partial partitions between each of n4-n6 in sequence.
// We then create partial partitions where one of n4-n6 is unable to reach the
// other two nodes but is still able to reach the liveness range. This will
// cause split leader/leaseholder scenarios if raft leadership fails over from a
// partitioned node but the lease does not. We expect this to be a problem for
// epoch-based leases, but not for other types of leases.
//
// We run a kv50 workload on SQL gateways and collect pMax latency for graphing.
func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.Cluster) {
require.Equal(t, 7, c.Spec().NodeCount)

rng, _ := randutil.NewTestRand()

// Create cluster, disabling leader/leaseholder colocation. We only start
// n1-n3, to precisely place system ranges, since we'll have to disable the
// replicate queue shortly.
// Create cluster. We only start n1-n3, to precisely place system ranges,
// since we'll have to disable the replicate queue shortly.
settings := install.MakeClusterSettings()
settings.Env = append(settings.Env, "COCKROACH_DISABLE_LEADER_FOLLOWS_LEASEHOLDER=true")
settings.Env = append(settings.Env, "COCKROACH_SCAN_MAX_IDLE_TIME=100ms") // speed up replication

m := c.NewMonitor(ctx, c.CRDBNodes())
Expand Down Expand Up @@ -578,22 +577,6 @@ func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.C
relocateRanges(t, ctx, conn, `database_name = 'kv'`, []int{1, 2, 3}, []int{4, 5, 6})
relocateRanges(t, ctx, conn, `database_name != 'kv'`, []int{4, 5, 6}, []int{1, 2, 3})

// Check that we have a few split leaders/leaseholders on n4-n6. We give
// it a few seconds, since metrics are updated every 10 seconds.
for i := 0; ; i++ {
var count float64
for _, node := range []int{4, 5, 6} {
count += nodeMetric(ctx, t, c, node, "replicas.leaders_not_leaseholders")
}
t.L().Printf("%.0f split leaders/leaseholders", count)
if count >= 3 {
break
} else if i >= 10 {
t.Fatalf("timed out waiting for 3 split leaders/leaseholders")
}
time.Sleep(time.Second)
}

// Run workload on n7 via n1-n3 gateways until test ends (context cancels).
t.L().Printf("running workload")
cancelWorkload := m.GoWithCancel(func(ctx context.Context) error {
Expand All @@ -606,13 +589,21 @@ func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.C
return err
})

// Start a worker to fail and recover partial partitions between each pair of
// n4-n6 for 3 cycles (9 failures total).
// Start a worker to fail and recover partial partitions between each of n4-n6
// and the other two nodes for 3 cycles (9 failures total).
m.Go(func(ctx context.Context) error {
defer cancelWorkload()

nodes := []int{4, 5, 6}
for i := 0; i < 3; i++ {
for _, node := range []int{4, 5, 6} {
for _, node := range nodes {
var peers []int
for _, peer := range nodes {
if peer != node {
peers = append(peers, peer)
}
}

sleepFor(ctx, t, time.Minute)

// Ranges may occasionally escape their constraints. Move them to where
Expand All @@ -626,17 +617,17 @@ func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.C

failer.Ready(ctx, node)

peer := node + 1
if peer > 6 {
peer = 4
for _, peer := range peers {
t.L().Printf("failing n%d to n%d (%s lease/leader)", node, peer, failer)
failer.FailPartial(ctx, node, []int{peer})
}
t.L().Printf("failing n%d to n%d (%s lease/leader)", node, peer, failer)
failer.FailPartial(ctx, node, []int{peer})

sleepFor(ctx, t, time.Minute)

t.L().Printf("recovering n%d to n%d (%s lease/leader)", node, peer, failer)
failer.Recover(ctx, node)
for _, peer := range peers {
t.L().Printf("recovering n%d to n%d (%s lease/leader)", node, peer, failer)
failer.Recover(ctx, node)
}
}
}

Expand Down
22 changes: 22 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/show_fingerprints
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,28 @@ SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE t
t_pkey -7903300865687235210
t_b_idx -5073888452016928166

# Test excluded columns
query TT rowsort
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE t WITH EXCLUDE COLUMNS = ('c')
----
t_pkey -2938394162542358272
t_b_idx -5073888452016928166

# Test multiple excluded columns
query TT rowsort
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE t WITH EXCLUDE COLUMNS = ('a', 'b')
----
t_pkey -3539648437866042702
t_b_idx 590700560494856532

# START TIMESTAMP is only for VIRTUAL CLUSTERS
query error pgcode 22023 cannot use the START TIMESTAMP option when fingerprinting a table.
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE t WITH START TIMESTAMP = '132412341234.000000'

# EXCLUDE COLUMNS is only for tables
query error pgcode 22023 cannot use the EXCLUDE COLUMNS option when fingerprinting a tenant.
SHOW EXPERIMENTAL_FINGERPRINTS FROM VIRTUAL CLUSTER t WITH EXCLUDE COLUMNS = ('a', 'b')

# Test a partial index. We expect this index to have the same fingerprint
# as t_b_idx since the predicate covers all values.
statement ok
Expand Down
9 changes: 7 additions & 2 deletions pkg/sql/parser/sql.y
Original file line number Diff line number Diff line change
Expand Up @@ -9634,10 +9634,10 @@ show_locality_stmt:
}

show_fingerprints_stmt:
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE table_name
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE table_name opt_with_show_fingerprints_options
{
/* SKIP DOC */
$$.val = &tree.ShowFingerprints{Table: $5.unresolvedObjectName()}
$$.val = &tree.ShowFingerprints{Table: $5.unresolvedObjectName(), Options: *$6.showFingerprintOptions()}
}
| SHOW EXPERIMENTAL_FINGERPRINTS FROM virtual_cluster virtual_cluster_spec opt_with_show_fingerprints_options
{
Expand Down Expand Up @@ -9678,6 +9678,11 @@ fingerprint_options:
{
$$.val = &tree.ShowFingerprintOptions{StartTimestamp: $4.expr()}
}
| EXCLUDE COLUMNS '=' string_or_placeholder_opt_list
{
$$.val = &tree.ShowFingerprintOptions{ExcludedUserColumns: $4.stringOrPlaceholderOptList()}
}



show_full_scans_stmt:
Expand Down
8 changes: 8 additions & 0 deletions pkg/sql/parser/testdata/show
Original file line number Diff line number Diff line change
Expand Up @@ -1323,6 +1323,14 @@ SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t -- fully parenthesized
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t -- literals removed
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE _._ -- identifiers removed

parse
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t WITH START TIMESTAMP = '132412341234.000000', EXCLUDE COLUMNS = ('crdb_original_replication_timestamp', 'other_column')
----
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t WITH OPTIONS (START TIMESTAMP = '132412341234.000000', EXCLUDE COLUMNS = ('crdb_original_replication_timestamp', 'other_column')) -- normalized!
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t WITH OPTIONS (START TIMESTAMP = ('132412341234.000000'), EXCLUDE COLUMNS = (('crdb_original_replication_timestamp'), ('other_column'))) -- fully parenthesized
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t WITH OPTIONS (START TIMESTAMP = '_', EXCLUDE COLUMNS = ('_', '_')) -- literals removed
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE _._ WITH OPTIONS (START TIMESTAMP = '132412341234.000000', EXCLUDE COLUMNS = ('crdb_original_replication_timestamp', 'other_column')) -- identifiers removed

parse
SHOW EXPERIMENTAL_FINGERPRINTS FROM VIRTUAL CLUSTER t
----
Expand Down
24 changes: 22 additions & 2 deletions pkg/sql/sem/tree/show.go
Original file line number Diff line number Diff line change
Expand Up @@ -1160,11 +1160,21 @@ func (node *ShowFingerprints) Format(ctx *FmtCtx) {
// ShowFingerprintOptions describes options for the SHOW EXPERIMENTAL_FINGERPINT
// execution.
type ShowFingerprintOptions struct {
StartTimestamp Expr
StartTimestamp Expr
ExcludedUserColumns StringOrPlaceholderOptList
}

func (s *ShowFingerprintOptions) Format(ctx *FmtCtx) {
var addSep bool
maybeAddSep := func() {
if addSep {
ctx.WriteString(", ")
}
addSep = true
}

if s.StartTimestamp != nil {
maybeAddSep()
ctx.WriteString("START TIMESTAMP = ")
_, canOmitParentheses := s.StartTimestamp.(alreadyDelimitedAsSyntacticDExpr)
if !canOmitParentheses {
Expand All @@ -1175,6 +1185,11 @@ func (s *ShowFingerprintOptions) Format(ctx *FmtCtx) {
ctx.WriteByte(')')
}
}
if s.ExcludedUserColumns != nil {
maybeAddSep()
ctx.WriteString("EXCLUDE COLUMNS = ")
s.ExcludedUserColumns.Format(ctx)
}
}

// CombineWith merges other TenantReplicationOptions into this struct.
Expand All @@ -1188,13 +1203,18 @@ func (s *ShowFingerprintOptions) CombineWith(other *ShowFingerprintOptions) erro
s.StartTimestamp = other.StartTimestamp
}

var err error
s.ExcludedUserColumns, err = combineStringOrPlaceholderOptList(s.ExcludedUserColumns, other.ExcludedUserColumns, "excluded_user_columns")
if err != nil {
return err
}
return nil
}

// IsDefault returns true if this backup options struct has default value.
func (s ShowFingerprintOptions) IsDefault() bool {
options := ShowFingerprintOptions{}
return s.StartTimestamp == options.StartTimestamp
return s.StartTimestamp == options.StartTimestamp && cmp.Equal(s.ExcludedUserColumns, options.ExcludedUserColumns)
}

var _ NodeFormatter = &ShowFingerprintOptions{}
Expand Down
53 changes: 42 additions & 11 deletions pkg/sql/show_fingerprints.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
"github.com/cockroachdb/cockroach/pkg/sql/clusterunique"
"github.com/cockroachdb/cockroach/pkg/sql/exprutil"
"github.com/cockroachdb/cockroach/pkg/sql/isql"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
Expand Down Expand Up @@ -71,8 +72,27 @@ type showFingerprintsNode struct {
func (p *planner) ShowFingerprints(
ctx context.Context, n *tree.ShowFingerprints,
) (planNode, error) {

op := "SHOW EXPERIMENTAL_FINGERPRINTS"
evalOptions, err := evalShowFingerprintOptions(ctx, n.Options, p.EvalContext(), p.SemaCtx(),
op, p.ExprEvaluator(op))
if err != nil {
return nil, err
}

if n.TenantSpec != nil {
return p.planShowTenantFingerprint(ctx, n.TenantSpec, n.Options)
// Tenant fingerprints use the KV fingerprint method and can't exclude columns this way
if evalOptions.excludedUserColumns != nil {
err = pgerror.New(pgcode.InvalidParameterValue, "cannot use the EXCLUDE COLUMNS option when fingerprinting a tenant.")
return nil, err
}
return p.planShowTenantFingerprint(ctx, n.TenantSpec, evalOptions)
}

// Only allow this for virtual clusters as it uses the KV fingerprint method instead of SQL
if !evalOptions.startTimestamp.IsEmpty() {
err = pgerror.New(pgcode.InvalidParameterValue, "cannot use the START TIMESTAMP option when fingerprinting a table.")
return nil, err
}

// We avoid the cache so that we can observe the fingerprints without
Expand All @@ -91,19 +111,22 @@ func (p *planner) ShowFingerprints(
columns: colinfo.ShowFingerprintsColumns,
tableDesc: tableDesc,
indexes: tableDesc.ActiveIndexes(),
options: evalOptions,
}, nil
}

type resolvedShowTenantFingerprintOptions struct {
startTimestamp hlc.Timestamp
startTimestamp hlc.Timestamp
excludedUserColumns []string
}

func evalShowTenantFingerprintOptions(
func evalShowFingerprintOptions(
ctx context.Context,
options tree.ShowFingerprintOptions,
evalCtx *eval.Context,
semaCtx *tree.SemaContext,
op string,
eval exprutil.Evaluator,
) (*resolvedShowTenantFingerprintOptions, error) {
r := &resolvedShowTenantFingerprintOptions{}
if options.StartTimestamp != nil {
Expand All @@ -114,11 +137,21 @@ func evalShowTenantFingerprintOptions(
r.startTimestamp = ts
}

if options.ExcludedUserColumns != nil {
cols, err := eval.StringArray(
ctx, tree.Exprs(options.ExcludedUserColumns))

if err != nil {
return nil, err
}
r.excludedUserColumns = cols
}

return r, nil
}

func (p *planner) planShowTenantFingerprint(
ctx context.Context, ts *tree.TenantSpec, options tree.ShowFingerprintOptions,
ctx context.Context, ts *tree.TenantSpec, evalOptions *resolvedShowTenantFingerprintOptions,
) (planNode, error) {
if err := CanManageTenant(ctx, p); err != nil {
return nil, err
Expand All @@ -133,12 +166,6 @@ func (p *planner) planShowTenantFingerprint(
return nil, err
}

evalOptions, err := evalShowTenantFingerprintOptions(ctx, options, p.EvalContext(), p.SemaCtx(),
"SHOW EXPERIMENTAL_FINGERPRINTS FROM VIRTUAL CLUSTER")
if err != nil {
return nil, err
}

return &showFingerprintsNode{
columns: colinfo.ShowTenantFingerprintsColumns,
tenantSpec: tspec,
Expand Down Expand Up @@ -290,7 +317,11 @@ func (n *showFingerprintsNode) Next(params runParams) (bool, error) {
return false, nil
}
index := n.indexes[n.run.rowIdx]
sql, err := BuildFingerprintQueryForIndex(n.tableDesc, index, []string{})
excludedColumns := []string{}
if n.options != nil && len(n.options.excludedUserColumns) > 0 {
excludedColumns = append(excludedColumns, n.options.excludedUserColumns...)
}
sql, err := BuildFingerprintQueryForIndex(n.tableDesc, index, excludedColumns)
if err != nil {
return false, err
}
Expand Down

0 comments on commit 175b0f6

Please sign in to comment.