Skip to content

Commit

Permalink
evalengine: Implement LOCATE and friends (#15195)
Browse files Browse the repository at this point in the history
Signed-off-by: Dirkjan Bussink <[email protected]>
  • Loading branch information
dbussink authored Feb 12, 2024
1 parent a42032a commit d4f0c2a
Show file tree
Hide file tree
Showing 12 changed files with 378 additions and 37 deletions.
14 changes: 14 additions & 0 deletions go/mysql/collations/charset/eightbit/8bit.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,17 @@ func (Charset_8bit) Length(src []byte) int {
func (Charset_8bit) MaxWidth() int {
return 1
}

func (Charset_8bit) Slice(src []byte, from, to int) []byte {
if from >= len(src) {
return nil
}
if to > len(src) {
to = len(src)
}
return src[from:to]
}

func (Charset_8bit) Validate(src []byte) bool {
return true
}
14 changes: 14 additions & 0 deletions go/mysql/collations/charset/eightbit/binary.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,17 @@ func (Charset_binary) Length(src []byte) int {
func (Charset_binary) MaxWidth() int {
return 1
}

func (Charset_binary) Slice(src []byte, from, to int) []byte {
if from >= len(src) {
return nil
}
if to > len(src) {
to = len(src)
}
return src[from:to]
}

func (Charset_binary) Validate(src []byte) bool {
return true
}
14 changes: 14 additions & 0 deletions go/mysql/collations/charset/eightbit/latin1.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,3 +230,17 @@ func (Charset_latin1) Length(src []byte) int {
func (Charset_latin1) MaxWidth() int {
return 1
}

func (Charset_latin1) Slice(src []byte, from, to int) []byte {
if from >= len(src) {
return nil
}
if to > len(src) {
to = len(src)
}
return src[from:to]
}

func (Charset_latin1) Validate(src []byte) bool {
return true
}
44 changes: 44 additions & 0 deletions go/mysql/collations/colldata/collation.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package colldata

import (
"bytes"
"fmt"
"math"

Expand Down Expand Up @@ -380,3 +381,46 @@ coerceToRight:
return charset.Convert(dst, rightCS, in, leftCS)
}, nil, nil
}

func Index(col Collation, str, sub []byte, offset int) int {
cs := col.Charset()
if offset > 0 {
l := charset.Length(cs, str)
if offset > l {
return -1
}
str = charset.Slice(cs, str, offset, len(str))
}

pos := instr(col, str, sub)
if pos < 0 {
return -1
}
return offset + pos
}

func instr(col Collation, str, sub []byte) int {
if len(sub) == 0 {
return 0
}

if len(str) == 0 {
return -1
}

if col.IsBinary() && col.Charset().MaxWidth() == 1 {
return bytes.Index(str, sub)
}

var pos int
cs := col.Charset()
for len(str) > 0 {
if col.Collate(str, sub, true) == 0 {
return pos
}
_, size := cs.DecodeRune(str)
str = str[size:]
pos++
}
return -1
}
12 changes: 12 additions & 0 deletions go/vt/vtgate/evalengine/cached_size.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 36 additions & 11 deletions go/vt/vtgate/evalengine/compiler_asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -2986,6 +2986,40 @@ func (asm *assembler) Like_collate(expr *LikeExpr, collation colldata.Collation)
}, "LIKE VARCHAR(SP-2), VARCHAR(SP-1) COLLATE '%s'", collation.Name())
}

func (asm *assembler) Locate3(collation colldata.Collation) {
asm.adjustStack(-2)

asm.emit(func(env *ExpressionEnv) int {
substr := env.vm.stack[env.vm.sp-3].(*evalBytes)
str := env.vm.stack[env.vm.sp-2].(*evalBytes)
pos := env.vm.stack[env.vm.sp-1].(*evalInt64)
env.vm.sp -= 2

if pos.i < 1 || pos.i > math.MaxInt {
env.vm.stack[env.vm.sp-1] = env.vm.arena.newEvalInt64(0)
return 1
}

found := colldata.Index(collation, str.bytes, substr.bytes, int(pos.i)-1)
env.vm.stack[env.vm.sp-1] = env.vm.arena.newEvalInt64(int64(found) + 1)
return 1
}, "LOCATE VARCHAR(SP-3), VARCHAR(SP-2) INT64(SP-1) COLLATE '%s'", collation.Name())
}

func (asm *assembler) Locate2(collation colldata.Collation) {
asm.adjustStack(-1)

asm.emit(func(env *ExpressionEnv) int {
substr := env.vm.stack[env.vm.sp-2].(*evalBytes)
str := env.vm.stack[env.vm.sp-1].(*evalBytes)
env.vm.sp--

found := colldata.Index(collation, str.bytes, substr.bytes, 0)
env.vm.stack[env.vm.sp-1] = env.vm.arena.newEvalInt64(int64(found) + 1)
return 1
}, "LOCATE VARCHAR(SP-2), VARCHAR(SP-1) COLLATE '%s'", collation.Name())
}

func (asm *assembler) Strcmp(collation collations.TypedCollation) {
asm.adjustStack(-1)

Expand Down Expand Up @@ -3833,11 +3867,6 @@ func (asm *assembler) Fn_LAST_DAY() {
return 1
}
arg := env.vm.stack[env.vm.sp-1].(*evalTemporal)
if arg.dt.IsZero() {
env.vm.stack[env.vm.sp-1] = nil
return 1
}

d := lastDay(env.currentTimezone(), arg.dt)
env.vm.stack[env.vm.sp-1] = env.vm.arena.newEvalDate(d)
return 1
Expand All @@ -3850,12 +3879,8 @@ func (asm *assembler) Fn_TO_DAYS() {
return 1
}
arg := env.vm.stack[env.vm.sp-1].(*evalTemporal)
if arg.dt.Date.IsZero() {
env.vm.stack[env.vm.sp-1] = nil
} else {
numDays := datetime.MysqlDayNumber(arg.dt.Date.Year(), arg.dt.Date.Month(), arg.dt.Date.Day())
env.vm.stack[env.vm.sp-1] = env.vm.arena.newEvalInt64(int64(numDays))
}
numDays := datetime.MysqlDayNumber(arg.dt.Date.Year(), arg.dt.Date.Month(), arg.dt.Date.Day())
env.vm.stack[env.vm.sp-1] = env.vm.arena.newEvalInt64(int64(numDays))
return 1
}, "FN TO_DAYS DATE(SP-1)")
}
Expand Down
12 changes: 12 additions & 0 deletions go/vt/vtgate/evalengine/compiler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,18 @@ func TestCompilerSingle(t *testing.T) {
expression: `time('1111:66:56')`,
result: `NULL`,
},
{
expression: `locate('Å', 'a')`,
result: `INT64(1)`,
},
{
expression: `locate('a', 'Å')`,
result: `INT64(1)`,
},
{
expression: `locate("", "😊😂🤢", 3)`,
result: `INT64(3)`,
},
}

tz, _ := time.LoadLocation("Europe/Madrid")
Expand Down
110 changes: 110 additions & 0 deletions go/vt/vtgate/evalengine/fn_string.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ type (
CallExpr
collate collations.ID
}

builtinLocate struct {
CallExpr
collate collations.ID
}
)

var _ IR = (*builtinInsert)(nil)
Expand Down Expand Up @@ -1265,6 +1270,111 @@ func (call *builtinSubstring) compile(c *compiler) (ctype, error) {
return ctype{Type: tt, Col: col, Flag: flagNullable}, nil
}

func (call *builtinLocate) eval(env *ExpressionEnv) (eval, error) {
substr, err := call.Arguments[0].eval(env)
if err != nil || substr == nil {
return nil, err
}

str, err := call.Arguments[1].eval(env)
if err != nil || str == nil {
return nil, err
}

if _, ok := str.(*evalBytes); !ok {
str, err = evalToVarchar(str, call.collate, true)
if err != nil {
return nil, err
}
}

col := str.(*evalBytes).col.Collation
substr, err = evalToVarchar(substr, col, true)
if err != nil {
return nil, err
}

pos := int64(1)
if len(call.Arguments) > 2 {
p, err := call.Arguments[2].eval(env)
if err != nil || p == nil {
return nil, err
}
pos = evalToInt64(p).i
if pos < 1 || pos > math.MaxInt {
return newEvalInt64(0), nil
}
}

var coll colldata.Collation
if typeIsTextual(substr.SQLType()) && typeIsTextual(str.SQLType()) {
coll = colldata.Lookup(col)
} else {
coll = colldata.Lookup(collations.CollationBinaryID)
}
found := colldata.Index(coll, str.ToRawBytes(), substr.ToRawBytes(), int(pos)-1)
return newEvalInt64(int64(found) + 1), nil
}

func (call *builtinLocate) compile(c *compiler) (ctype, error) {
substr, err := call.Arguments[0].compile(c)
if err != nil {
return ctype{}, err
}

str, err := call.Arguments[1].compile(c)
if err != nil {
return ctype{}, err
}

skip1 := c.compileNullCheck2(substr, str)
var skip2 *jump
if len(call.Arguments) > 2 {
l, err := call.Arguments[2].compile(c)
if err != nil {
return ctype{}, err
}
skip2 = c.compileNullCheck2(str, l)
_ = c.compileToInt64(l, 1)
}

if !str.isTextual() {
c.asm.Convert_xce(len(call.Arguments)-1, sqltypes.VarChar, c.collation)
str.Col = collations.TypedCollation{
Collation: c.collation,
Coercibility: collations.CoerceCoercible,
Repertoire: collations.RepertoireASCII,
}
}

fromCharset := colldata.Lookup(substr.Col.Collation).Charset()
toCharset := colldata.Lookup(str.Col.Collation).Charset()
if !substr.isTextual() || (fromCharset != toCharset && !toCharset.IsSuperset(fromCharset)) {
c.asm.Convert_xce(len(call.Arguments), sqltypes.VarChar, str.Col.Collation)
substr.Col = collations.TypedCollation{
Collation: str.Col.Collation,
Coercibility: collations.CoerceCoercible,
Repertoire: collations.RepertoireASCII,
}
}

var coll colldata.Collation
if typeIsTextual(substr.Type) && typeIsTextual(str.Type) {
coll = colldata.Lookup(str.Col.Collation)
} else {
coll = colldata.Lookup(collations.CollationBinaryID)
}

if len(call.Arguments) > 2 {
c.asm.Locate3(coll)
} else {
c.asm.Locate2(coll)
}

c.asm.jumpDestination(skip1, skip2)
return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: flagNullable}, nil
}

type builtinConcat struct {
CallExpr
collate collations.ID
Expand Down
Loading

0 comments on commit d4f0c2a

Please sign in to comment.