Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
123331: internal/sqlsmith: make it more deterministic r=yuzefovich a=yuzefovich

This commit removes one source of non-determinism (iteration order over a map) in sqlsmith. The following parts are affected:
- list of regions for MR setup
- file names for IMPORT stmt
- table-index pairs for merge join expr
- DROP FUNCTION
- picking a random table or random index
- UDTs
- builtin functions and binary operators (here we needed to make construction of global "registries" deterministic).

Informs: cockroachdb#122766
Epic: None

Release note: None

Co-authored-by: Yahor Yuzefovich <[email protected]>
  • Loading branch information
craig[bot] and yuzefovich committed May 1, 2024
2 parents e572349 + d30a455 commit 186b9b9
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 46 deletions.
6 changes: 5 additions & 1 deletion pkg/internal/sqlsmith/alter.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package sqlsmith

import (
gosql "database/sql"
"sort"

"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
"github.com/cockroachdb/cockroach/pkg/sql/randgen"
Expand Down Expand Up @@ -425,7 +426,7 @@ func makeDropType(s *Smither) (tree.Statement, bool) {
var typNames []*tree.UnresolvedObjectName
for len(typNames) < 1 || s.coin() {
// It's ok if the same type is chosen multiple times.
typName, ok := s.getRandUserDefinedType()
_, typName, ok := s.getRandUserDefinedType()
if !ok {
if len(typNames) == 0 {
return nil, false
Expand Down Expand Up @@ -458,6 +459,9 @@ func rowsToRegionList(rows *gosql.Rows) ([]string, error) {
for region := range regionsSet {
regions = append(regions, region)
}
// Make deterministic. Note that we don't need to shuffle the regions since
// the caller will be picking random ones from the slice.
sort.Strings(regions)
return regions, nil
}

Expand Down
11 changes: 9 additions & 2 deletions pkg/internal/sqlsmith/bulkio.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"net/http"
"net/http/httptest"
"regexp"
"sort"
"strings"
"time"

Expand Down Expand Up @@ -125,6 +126,7 @@ func makeRestore(s *Smither) (tree.Statement, bool) {
func() {
s.lock.Lock()
defer s.lock.Unlock()
// TODO(yuzefovich): picking a backup target here is non-deterministic.
for name, targets = range s.bulkBackups {
break
}
Expand Down Expand Up @@ -219,12 +221,17 @@ func makeImport(s *Smither) (tree.Statement, bool) {
}
expr := s.bulkExports[0]
s.bulkExports = s.bulkExports[1:]
var f tree.Exprs
var fileNames []string
for name := range s.bulkFiles {
if strings.Contains(name, expr+"/") && !strings.HasSuffix(name, exportSchema) {
f = append(f, tree.NewStrVal(s.bulkSrv.URL+name))
fileNames = append(fileNames, name)
}
}
sort.Strings(fileNames)
var f tree.Exprs
for _, name := range fileNames {
f = append(f, tree.NewStrVal(s.bulkSrv.URL+name))
}
return f, expr
}()
// An empty table will produce an EXPORT with zero files.
Expand Down
30 changes: 25 additions & 5 deletions pkg/internal/sqlsmith/relational.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package sqlsmith

import (
"fmt"
"sort"
"strings"

"github.com/cockroachdb/cockroach/pkg/sql/randgen"
Expand Down Expand Up @@ -356,16 +357,31 @@ func makeMergeJoinExpr(s *Smither, _ colRefs, forJoin bool) (tree.TableExpr, col
rightAliasName := tree.NewUnqualifiedTableName(rightAlias)

// Now look for one that satisfies our constraints (some shared prefix
// of type + direction), might end up being the same one. We rely on
// Go's non-deterministic map iteration ordering for randomness.
// of type + direction), might end up being the same one.
rightTableName, cols, ok := func() (*tree.TableIndexName, [][2]colRef, bool) {
s.lock.RLock()
defer s.lock.RUnlock()
for tbl, idxs := range s.indexes {
for idxName, idx := range idxs {
if len(s.tables) == 0 {
return nil, nil, false
}
// Iterate in deterministic but random order over all tables.
tableNames := make([]tree.TableName, 0, len(s.tables))
for t := range s.indexes {
tableNames = append(tableNames, t)
}
sort.Slice(tableNames, func(i, j int) bool {
return strings.Compare(tableNames[i].String(), tableNames[j].String()) < 0
})
s.rnd.Shuffle(len(tableNames), func(i, j int) {
tableNames[i], tableNames[j] = tableNames[j], tableNames[i]
})
for _, tbl := range tableNames {
// Iterate in deterministic but random order over all indexes.
idxs := s.getAllIndexesForTableRLocked(tbl)
for _, idx := range idxs {
rightTableName := &tree.TableIndexName{
Table: tbl,
Index: tree.UnrestrictedName(idxName),
Index: tree.UnrestrictedName(idx.Name),
}
// cols keeps track of matching column pairs.
var cols [][2]colRef
Expand Down Expand Up @@ -919,6 +935,10 @@ func makeDropFunc(s *Smither) (tree.Statement, bool) {
for oid := range fns {
retOIDs = append(retOIDs, oid)
}
// Sort the slice since iterating over fns is non-deterministic.
sort.Slice(retOIDs, func(i, j int) bool {
return retOIDs[i] < retOIDs[j]
})
// Pick a random starting point within the list of OIDs.
oidShift := s.rnd.Intn(len(retOIDs))
for i := 0; i < len(retOIDs); i++ {
Expand Down
113 changes: 81 additions & 32 deletions pkg/internal/sqlsmith/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package sqlsmith

import (
"fmt"
"sort"
"strings"

"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
Expand Down Expand Up @@ -87,16 +88,56 @@ func (s *Smither) ReloadSchemas() error {
return err
}

// indexesWithNames is a helper struct to sort CreateIndex nodes based on the
// names.
type indexesWithNames struct {
names []string
nodes []*tree.CreateIndex
}

func (t *indexesWithNames) Len() int {
return len(t.names)
}

func (t *indexesWithNames) Less(i, j int) bool {
return t.names[i] < t.names[j]
}

func (t *indexesWithNames) Swap(i, j int) {
t.names[i], t.names[j] = t.names[j], t.names[i]
t.nodes[i], t.nodes[j] = t.nodes[j], t.nodes[i]
}

var _ sort.Interface = &indexesWithNames{}

// getAllIndexesForTableRLocked returns information about all indexes of the
// given table in the deterministic order. s.lock is assumed to be read-locked.
func (s *Smither) getAllIndexesForTableRLocked(tableName tree.TableName) []*tree.CreateIndex {
s.lock.AssertRHeld()
indexes, ok := s.indexes[tableName]
if !ok {
return nil
}
names := make([]string, 0, len(indexes))
nodes := make([]*tree.CreateIndex, 0, len(indexes))
for _, index := range indexes {
names = append(names, string(index.Name))
nodes = append(nodes, index)
}
sort.Sort(&indexesWithNames{names: names, nodes: nodes})
return nodes
}

func (s *Smither) getRandTable() (*aliasedTableRef, bool) {
s.lock.RLock()
defer s.lock.RUnlock()
if len(s.tables) == 0 {
return nil, false
}
table := s.tables[s.rnd.Intn(len(s.tables))]
indexes := s.indexes[*table.TableName]
var indexFlags tree.IndexFlags
if !s.disableIndexHints && s.coin() {
indexes := s.getAllIndexesForTableRLocked(*table.TableName)
indexNames := make([]tree.Name, 0, len(indexes))
for _, index := range indexes {
if !index.Inverted {
Expand All @@ -117,17 +158,16 @@ func (s *Smither) getRandTable() (*aliasedTableRef, bool) {
func (s *Smither) getRandTableIndex(
table, alias tree.TableName,
) (*tree.TableIndexName, *tree.CreateIndex, colRefs, bool) {
s.lock.RLock()
indexes := s.indexes[table]
s.lock.RUnlock()
var indexes []*tree.CreateIndex
func() {
s.lock.RLock()
defer s.lock.RUnlock()
indexes = s.getAllIndexesForTableRLocked(table)
}()
if len(indexes) == 0 {
return nil, nil, nil, false
}
names := make([]tree.Name, 0, len(indexes))
for n := range indexes {
names = append(names, n)
}
idx := indexes[names[s.rnd.Intn(len(names))]]
idx := indexes[s.rnd.Intn(len(indexes))]
var refs colRefs
s.lock.RLock()
defer s.lock.RUnlock()
Expand Down Expand Up @@ -159,13 +199,12 @@ func (s *Smither) getRandIndex() (*tree.TableIndexName, *tree.CreateIndex, colRe
}

func (s *Smither) getRandUserDefinedTypeLabel() (*tree.EnumValue, *tree.TypeName, bool) {
typName, ok := s.getRandUserDefinedType()
udt, typName, ok := s.getRandUserDefinedType()
if !ok {
return nil, nil, false
}
s.lock.RLock()
defer s.lock.RUnlock()
udt := s.types.udts[*typName]
logicalRepresentations := udt.TypeMeta.EnumData.LogicalRepresentations
// There are no values in this enum.
if len(logicalRepresentations) == 0 {
Expand All @@ -175,21 +214,14 @@ func (s *Smither) getRandUserDefinedTypeLabel() (*tree.EnumValue, *tree.TypeName
return &enumVal, typName, true
}

func (s *Smither) getRandUserDefinedType() (*tree.TypeName, bool) {
func (s *Smither) getRandUserDefinedType() (*types.T, *tree.TypeName, bool) {
s.lock.RLock()
defer s.lock.RUnlock()
if s.types == nil || len(s.types.udts) == 0 {
return nil, false
return nil, nil, false
}
idx := s.rnd.Intn(len(s.types.udts))
count := 0
for typName := range s.types.udts {
if count == idx {
return &typName, true
}
count++
}
return nil, false
return s.types.udts[idx], &s.types.udtNames[idx], true
}

func (s *Smither) extractTypes() (*typeInfo, error) {
Expand All @@ -205,7 +237,8 @@ FROM
defer rows.Close()

evalCtx := eval.Context{}
udtMapping := make(map[tree.TypeName]*types.T)
var udts []*types.T
var udtNames []tree.TypeName

for rows.Next() {
// For each row, collect columns.
Expand Down Expand Up @@ -243,18 +276,15 @@ FROM
IsMemberReadOnly: make([]bool, len(members)),
},
}
key := tree.MakeSchemaQualifiedTypeName(scName, name)
udtMapping[key] = typ
}
var udts []*types.T
for _, t := range udtMapping {
udts = append(udts, t)
udts = append(udts, typ)
udtNames = append(udtNames, tree.MakeSchemaQualifiedTypeName(scName, name))
}
// Make sure that future appends to udts force a copy.
udts = udts[:len(udts):len(udts)]

return &typeInfo{
udts: udtMapping,
udts: udts,
udtNames: udtNames,
scalarTypes: append(udts, types.Scalar...),
seedTypes: append(udts, randgen.SeedTypes...),
}, nil
Expand Down Expand Up @@ -454,6 +484,9 @@ func (s *Smither) extractIndexes(
// Remove indexes with empty Columns. This is the case for rowid indexes
// where the only index column, rowid, is ignored in the SQL statement
// above, but the stored columns are not.
//
// Note that here non-deterministic iteration order over 'indexes' map
// doesn't matter.
for name, idx := range indexes {
if len(idx.Columns) == 0 {
delete(indexes, name)
Expand Down Expand Up @@ -492,12 +525,21 @@ type operator struct {
}

var operators = func() map[oid.Oid][]operator {
// Ensure deterministic order of populating operators map.
binOps := make([]treebin.BinaryOperatorSymbol, 0, len(tree.BinOps))
for op := range tree.BinOps {
binOps = append(binOps, op)
}
sort.Slice(binOps, func(i, j int) bool {
return binOps[i] < binOps[j]
})
m := map[oid.Oid][]operator{}
for BinaryOperator, overload := range tree.BinOps {
for _, binOp := range binOps {
overload := tree.BinOps[binOp]
_ = overload.ForEachBinOp(func(bo *tree.BinOp) error {
m[bo.ReturnType.Oid()] = append(m[bo.ReturnType.Oid()], operator{
BinOp: bo,
Operator: treebin.MakeBinaryOperator(BinaryOperator),
Operator: treebin.MakeBinaryOperator(binOp),
})
return nil
})
Expand All @@ -518,8 +560,15 @@ type functionsMu struct {
}

var functions = func() *functionsMu {
// Ensure deterministic order of populating functions map.
funcNames := make([]string, 0, len(tree.FunDefs))
for name := range tree.FunDefs {
funcNames = append(funcNames, name)
}
sort.Strings(funcNames)
m := map[tree.FunctionClass]map[oid.Oid][]function{}
for _, def := range tree.FunDefs {
for _, name := range funcNames {
def := tree.FunDefs[name]
if n := tree.Name(def.Name); n.String() != def.Name {
// sqlsmith doesn't know how to quote function names, e.g. for
// the numeric cast, we need to use `"numeric"(val)`, but sqlsmith
Expand Down
4 changes: 3 additions & 1 deletion pkg/internal/sqlsmith/sqlsmith.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ type Smither struct {
tables []*tableRef
sequences []*sequenceRef
columns map[tree.TableName]map[tree.Name]*tree.ColumnTableDef
indexes map[tree.TableName]map[tree.Name]*tree.CreateIndex
// Note: consider using getAllIndexesForTable helper if you need to iterate
// over all indexes for a particular table.
indexes map[tree.TableName]map[tree.Name]*tree.CreateIndex
// Only one of nameCounts and nameGens will be used. nameCounts is used when
// simpleNames is true.
nameCounts map[string]int
Expand Down
12 changes: 7 additions & 5 deletions pkg/internal/sqlsmith/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,8 @@ func (s *Smither) makeDesiredTypes() []*types.T {
}

type typeInfo struct {
udts map[tree.TypeName]*types.T
udts []*types.T
udtNames []tree.TypeName
seedTypes []*types.T
scalarTypes []*types.T
}
Expand All @@ -137,11 +138,12 @@ func (s *Smither) ResolveType(
_ context.Context, name *tree.UnresolvedObjectName,
) (*types.T, error) {
key := tree.MakeSchemaQualifiedTypeName(name.Schema(), name.Object())
res, ok := s.types.udts[key]
if !ok {
return nil, errors.Newf("type name %s not found by smither", name.Object())
for i, typeName := range s.types.udtNames {
if typeName == key {
return s.types.udts[i], nil
}
}
return res, nil
return nil, errors.Newf("type name %s not found by smither", name.Object())
}

// ResolveTypeByOID implements the tree.TypeReferenceResolver interface.
Expand Down

0 comments on commit 186b9b9

Please sign in to comment.