Skip to content

Commit

Permalink
logical: Add the EXCLUDE COLUMNS option to SHOW EXPERIMENTAL_FINGERPRINT
Browse files Browse the repository at this point in the history
Currently tables used for logical data replication can't be fingerprinted
as they contain a column crdb_replication_origin_timestamp which will vary
based on the internal MVCC timestamp. Adding an option to SHOW
EXPERIMENTAL FINGERPRINT will allow LDR tables to be fingerprinted by excluding
the replication column. A follow up will use this to verify tables in roachtests

Release note: none
Fixes: cockroachdb#129497
  • Loading branch information
navsetlur committed Aug 29, 2024
1 parent 74429f0 commit 84f5cec
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 15 deletions.
1 change: 1 addition & 0 deletions docs/generated/sql/bnf/fingerprint_options.bnf
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
fingerprint_options ::=
'START' 'TIMESTAMP' '=' d_expr
| 'EXCLUDE' 'COLUMNS' '=' string_or_placeholder_opt_list
22 changes: 22 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/show_fingerprints
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,28 @@ SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE t
t_pkey -7903300865687235210
t_b_idx -5073888452016928166

# Test excluded columns
query TT rowsort
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE t WITH EXCLUDE COLUMNS = ('c')
----
t_pkey -2938394162542358272
t_b_idx -5073888452016928166

# Test multiple excluded columns
query TT rowsort
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE t WITH EXCLUDE COLUMNS = ('a', 'b')
----
t_pkey -3539648437866042702
t_b_idx 590700560494856532

# START TIMESTAMP is only for VIRTUAL CLUSTERS
query error pgcode 22023 cannot use the START TIMESTAMP option when fingerprinting a table.
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE t WITH START TIMESTAMP = '132412341234.000000'

# EXCLUDE COLUMNS is only for tables
query error pgcode 22023 cannot use the EXCLUDE COLUMNS option when fingerprinting a tenant.
SHOW EXPERIMENTAL_FINGERPRINTS FROM VIRTUAL CLUSTER t WITH EXCLUDE COLUMNS = ('a', 'b')

# Test a partial index. We expect this index to have the same fingerprint
# as t_b_idx since the predicate covers all values.
statement ok
Expand Down
9 changes: 7 additions & 2 deletions pkg/sql/parser/sql.y
Original file line number Diff line number Diff line change
Expand Up @@ -9634,10 +9634,10 @@ show_locality_stmt:
}

show_fingerprints_stmt:
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE table_name
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE table_name opt_with_show_fingerprints_options
{
/* SKIP DOC */
$$.val = &tree.ShowFingerprints{Table: $5.unresolvedObjectName()}
$$.val = &tree.ShowFingerprints{Table: $5.unresolvedObjectName(), Options: *$6.showFingerprintOptions()}
}
| SHOW EXPERIMENTAL_FINGERPRINTS FROM virtual_cluster virtual_cluster_spec opt_with_show_fingerprints_options
{
Expand Down Expand Up @@ -9678,6 +9678,11 @@ fingerprint_options:
{
$$.val = &tree.ShowFingerprintOptions{StartTimestamp: $4.expr()}
}
| EXCLUDE COLUMNS '=' string_or_placeholder_opt_list
{
$$.val = &tree.ShowFingerprintOptions{ExcludedUserColumns: $4.stringOrPlaceholderOptList()}
}



show_full_scans_stmt:
Expand Down
8 changes: 8 additions & 0 deletions pkg/sql/parser/testdata/show
Original file line number Diff line number Diff line change
Expand Up @@ -1323,6 +1323,14 @@ SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t -- fully parenthesized
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t -- literals removed
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE _._ -- identifiers removed

parse
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t WITH START TIMESTAMP = '132412341234.000000', EXCLUDE COLUMNS = ('crdb_original_replication_timestamp', 'other_column')
----
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t WITH OPTIONS (START TIMESTAMP = '132412341234.000000', EXCLUDE COLUMNS = ('crdb_original_replication_timestamp', 'other_column')) -- normalized!
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t WITH OPTIONS (START TIMESTAMP = ('132412341234.000000'), EXCLUDE COLUMNS = (('crdb_original_replication_timestamp'), ('other_column'))) -- fully parenthesized
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE d.t WITH OPTIONS (START TIMESTAMP = '_', EXCLUDE COLUMNS = ('_', '_')) -- literals removed
SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE _._ WITH OPTIONS (START TIMESTAMP = '132412341234.000000', EXCLUDE COLUMNS = ('crdb_original_replication_timestamp', 'other_column')) -- identifiers removed

parse
SHOW EXPERIMENTAL_FINGERPRINTS FROM VIRTUAL CLUSTER t
----
Expand Down
24 changes: 22 additions & 2 deletions pkg/sql/sem/tree/show.go
Original file line number Diff line number Diff line change
Expand Up @@ -1160,11 +1160,21 @@ func (node *ShowFingerprints) Format(ctx *FmtCtx) {
// ShowFingerprintOptions describes options for the SHOW EXPERIMENTAL_FINGERPINT
// execution.
type ShowFingerprintOptions struct {
StartTimestamp Expr
StartTimestamp Expr
ExcludedUserColumns StringOrPlaceholderOptList
}

func (s *ShowFingerprintOptions) Format(ctx *FmtCtx) {
var addSep bool
maybeAddSep := func() {
if addSep {
ctx.WriteString(", ")
}
addSep = true
}

if s.StartTimestamp != nil {
maybeAddSep()
ctx.WriteString("START TIMESTAMP = ")
_, canOmitParentheses := s.StartTimestamp.(alreadyDelimitedAsSyntacticDExpr)
if !canOmitParentheses {
Expand All @@ -1175,6 +1185,11 @@ func (s *ShowFingerprintOptions) Format(ctx *FmtCtx) {
ctx.WriteByte(')')
}
}
if s.ExcludedUserColumns != nil {
maybeAddSep()
ctx.WriteString("EXCLUDE COLUMNS = ")
s.ExcludedUserColumns.Format(ctx)
}
}

// CombineWith merges other TenantReplicationOptions into this struct.
Expand All @@ -1188,13 +1203,18 @@ func (s *ShowFingerprintOptions) CombineWith(other *ShowFingerprintOptions) erro
s.StartTimestamp = other.StartTimestamp
}

var err error
s.ExcludedUserColumns, err = combineStringOrPlaceholderOptList(s.ExcludedUserColumns, other.ExcludedUserColumns, "excluded_user_columns")
if err != nil {
return err
}
return nil
}

// IsDefault returns true if this backup options struct has default value.
func (s ShowFingerprintOptions) IsDefault() bool {
options := ShowFingerprintOptions{}
return s.StartTimestamp == options.StartTimestamp
return s.StartTimestamp == options.StartTimestamp && cmp.Equal(s.ExcludedUserColumns, options.ExcludedUserColumns)
}

var _ NodeFormatter = &ShowFingerprintOptions{}
Expand Down
53 changes: 42 additions & 11 deletions pkg/sql/show_fingerprints.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
"github.com/cockroachdb/cockroach/pkg/sql/clusterunique"
"github.com/cockroachdb/cockroach/pkg/sql/exprutil"
"github.com/cockroachdb/cockroach/pkg/sql/isql"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
Expand Down Expand Up @@ -71,8 +72,27 @@ type showFingerprintsNode struct {
func (p *planner) ShowFingerprints(
ctx context.Context, n *tree.ShowFingerprints,
) (planNode, error) {

op := "SHOW EXPERIMENTAL_FINGERPRINTS"
evalOptions, err := evalShowFingerprintOptions(ctx, n.Options, p.EvalContext(), p.SemaCtx(),
op, p.ExprEvaluator(op))
if err != nil {
return nil, err
}

if n.TenantSpec != nil {
return p.planShowTenantFingerprint(ctx, n.TenantSpec, n.Options)
// Tenant fingerprints use the KV fingerprint method and can't exclude columns this way
if evalOptions.excludedUserColumns != nil {
err = pgerror.New(pgcode.InvalidParameterValue, "cannot use the EXCLUDE COLUMNS option when fingerprinting a tenant.")
return nil, err
}
return p.planShowTenantFingerprint(ctx, n.TenantSpec, evalOptions)
}

// Only allow this for virtual clusters as it uses the KV fingerprint method instead of SQL
if !evalOptions.startTimestamp.IsEmpty() {
err = pgerror.New(pgcode.InvalidParameterValue, "cannot use the START TIMESTAMP option when fingerprinting a table.")
return nil, err
}

// We avoid the cache so that we can observe the fingerprints without
Expand All @@ -91,19 +111,22 @@ func (p *planner) ShowFingerprints(
columns: colinfo.ShowFingerprintsColumns,
tableDesc: tableDesc,
indexes: tableDesc.ActiveIndexes(),
options: evalOptions,
}, nil
}

type resolvedShowTenantFingerprintOptions struct {
startTimestamp hlc.Timestamp
startTimestamp hlc.Timestamp
excludedUserColumns []string
}

func evalShowTenantFingerprintOptions(
func evalShowFingerprintOptions(
ctx context.Context,
options tree.ShowFingerprintOptions,
evalCtx *eval.Context,
semaCtx *tree.SemaContext,
op string,
eval exprutil.Evaluator,
) (*resolvedShowTenantFingerprintOptions, error) {
r := &resolvedShowTenantFingerprintOptions{}
if options.StartTimestamp != nil {
Expand All @@ -114,11 +137,21 @@ func evalShowTenantFingerprintOptions(
r.startTimestamp = ts
}

if options.ExcludedUserColumns != nil {
cols, err := eval.StringArray(
ctx, tree.Exprs(options.ExcludedUserColumns))

if err != nil {
return nil, err
}
r.excludedUserColumns = cols
}

return r, nil
}

func (p *planner) planShowTenantFingerprint(
ctx context.Context, ts *tree.TenantSpec, options tree.ShowFingerprintOptions,
ctx context.Context, ts *tree.TenantSpec, evalOptions *resolvedShowTenantFingerprintOptions,
) (planNode, error) {
if err := CanManageTenant(ctx, p); err != nil {
return nil, err
Expand All @@ -133,12 +166,6 @@ func (p *planner) planShowTenantFingerprint(
return nil, err
}

evalOptions, err := evalShowTenantFingerprintOptions(ctx, options, p.EvalContext(), p.SemaCtx(),
"SHOW EXPERIMENTAL_FINGERPRINTS FROM VIRTUAL CLUSTER")
if err != nil {
return nil, err
}

return &showFingerprintsNode{
columns: colinfo.ShowTenantFingerprintsColumns,
tenantSpec: tspec,
Expand Down Expand Up @@ -290,7 +317,11 @@ func (n *showFingerprintsNode) Next(params runParams) (bool, error) {
return false, nil
}
index := n.indexes[n.run.rowIdx]
sql, err := BuildFingerprintQueryForIndex(n.tableDesc, index, []string{})
excludedColumns := []string{}
if n.options != nil && len(n.options.excludedUserColumns) > 0 {
excludedColumns = append(excludedColumns, n.options.excludedUserColumns...)
}
sql, err := BuildFingerprintQueryForIndex(n.tableDesc, index, excludedColumns)
if err != nil {
return false, err
}
Expand Down

0 comments on commit 84f5cec

Please sign in to comment.