From 465ffcf7a464e115035daab3f4a4accc9a7e99d3 Mon Sep 17 00:00:00 2001 From: Manan Gupta <35839558+GuptaManan100@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:51:17 +0530 Subject: [PATCH] Group Concat function support for separator (#16237) Signed-off-by: Manan Gupta --- .gitignore | 2 + .../aggregation/aggregation.test | 50 ++++++++++++++++ go/vt/sqlparser/constants.go | 3 + go/vt/vtgate/engine/aggregations.go | 17 ++++-- go/vt/vtgate/engine/cached_size.go | 4 +- go/vt/vtgate/engine/ordered_aggregate_test.go | 9 ++- .../vtgate/engine/scalar_aggregation_test.go | 3 + .../planbuilder/operator_transformers.go | 5 +- .../operators/aggregation_pushing_helper.go | 4 +- .../planbuilder/testdata/aggr_cases.json | 60 +++++++++++++++++++ .../testdata/unsupported_cases.json | 5 ++ 11 files changed, 150 insertions(+), 12 deletions(-) create mode 100644 go/test/endtoend/vtgate/vitess_tester/aggregation/aggregation.test diff --git a/.gitignore b/.gitignore index 43f352d1b80..70ae13ad32d 100644 --- a/.gitignore +++ b/.gitignore @@ -56,6 +56,8 @@ __debug_bin /php/composer.phar /php/vendor +report*.xml + # vitess.io preview site /preview-vitess.io/ diff --git a/go/test/endtoend/vtgate/vitess_tester/aggregation/aggregation.test b/go/test/endtoend/vtgate/vitess_tester/aggregation/aggregation.test new file mode 100644 index 00000000000..8861b9672b8 --- /dev/null +++ b/go/test/endtoend/vtgate/vitess_tester/aggregation/aggregation.test @@ -0,0 +1,50 @@ +CREATE TABLE `t1` +( + `id` int unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(191) NOT NULL, + PRIMARY KEY (`id`) +) ENGINE InnoDB, + CHARSET utf8mb4, + COLLATE utf8mb4_unicode_ci; + +CREATE TABLE `t2` +( + `id` bigint unsigned NOT NULL AUTO_INCREMENT, + `t1_id` int unsigned NOT NULL, + PRIMARY KEY (`id`) +) ENGINE InnoDB, + CHARSET utf8mb4, + COLLATE utf8mb4_unicode_ci; + +CREATE TABLE `t3` +( + `id` bigint unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(191) NOT NULL, + PRIMARY KEY (`id`) +) ENGINE InnoDB, + CHARSET utf8mb4, + COLLATE utf8mb4_unicode_ci; + +insert into t1 (id, name) +values (1, 'A'), + (2, 'B'), + (3, 'C'), + (4, 'D'); + +insert into t2 (id, t1_id) +values (1, 1), + (2, 2), + (3, 3); + +insert into t3 (id, name) +values (1, 'A'), + (2, 'B'); + +-- wait_authoritative t1 +-- wait_authoritative t2 +-- wait_authoritative t3 +select group_concat(t3.name SEPARATOR ', ') as "Group Name" +from t1 + join t2 on t1.id = t2.t1_id + left join t3 on t1.id = t3.id +group by t1.id; \ No newline at end of file diff --git a/go/vt/sqlparser/constants.go b/go/vt/sqlparser/constants.go index 024a2148c33..228546cb422 100644 --- a/go/vt/sqlparser/constants.go +++ b/go/vt/sqlparser/constants.go @@ -478,6 +478,9 @@ const ( // KillType strings ConnectionStr = "connection" QueryStr = "query" + + // GroupConcatDefaultSeparator is the default separator for GroupConcatExpr. + GroupConcatDefaultSeparator = "," ) // Constants for Enum Type - Insert.Action diff --git a/go/vt/vtgate/engine/aggregations.go b/go/vt/vtgate/engine/aggregations.go index 4673a2717e5..d3f2b7a1c82 100644 --- a/go/vt/vtgate/engine/aggregations.go +++ b/go/vt/vtgate/engine/aggregations.go @@ -43,7 +43,7 @@ type AggregateParams struct { Type evalengine.Type Alias string `json:",omitempty"` - Expr sqlparser.Expr + Func sqlparser.AggrFunc Original *sqlparser.AliasedExpr // This is based on the function passed in the select expression and @@ -255,8 +255,9 @@ func (a *aggregatorScalar) reset() { } type aggregatorGroupConcat struct { - from int - type_ sqltypes.Type + from int + type_ sqltypes.Type + separator []byte concat []byte n int @@ -267,7 +268,7 @@ func (a *aggregatorGroupConcat) add(row []sqltypes.Value) error { return nil } if a.n > 0 { - a.concat = append(a.concat, ',') + a.concat = append(a.concat, a.separator...) } a.concat = append(a.concat, row[a.from].Raw()...) a.n++ @@ -434,7 +435,13 @@ func newAggregation(fields []*querypb.Field, aggregates []*AggregateParams) (agg ag = &aggregatorScalar{from: aggr.Col} case AggregateGroupConcat: - ag = &aggregatorGroupConcat{from: aggr.Col, type_: targetType} + gcFunc := aggr.Func.(*sqlparser.GroupConcatExpr) + separator := []byte(gcFunc.Separator) + ag = &aggregatorGroupConcat{ + from: aggr.Col, + type_: targetType, + separator: separator, + } default: panic("BUG: unexpected Aggregation opcode") diff --git a/go/vt/vtgate/engine/cached_size.go b/go/vt/vtgate/engine/cached_size.go index e65ff61a9f6..df23f14f6f5 100644 --- a/go/vt/vtgate/engine/cached_size.go +++ b/go/vt/vtgate/engine/cached_size.go @@ -41,8 +41,8 @@ func (cached *AggregateParams) CachedSize(alloc bool) int64 { size += cached.Type.CachedSize(false) // field Alias string size += hack.RuntimeAllocSize(int64(len(cached.Alias))) - // field Expr vitess.io/vitess/go/vt/sqlparser.Expr - if cc, ok := cached.Expr.(cachedObject); ok { + // field Func vitess.io/vitess/go/vt/sqlparser.AggrFunc + if cc, ok := cached.Func.(cachedObject); ok { size += cc.CachedSize(true) } // field Original *vitess.io/vitess/go/vt/sqlparser.AliasedExpr diff --git a/go/vt/vtgate/engine/ordered_aggregate_test.go b/go/vt/vtgate/engine/ordered_aggregate_test.go index 3eaa63819e4..c601654bced 100644 --- a/go/vt/vtgate/engine/ordered_aggregate_test.go +++ b/go/vt/vtgate/engine/ordered_aggregate_test.go @@ -22,6 +22,7 @@ import ( "fmt" "testing" + "vitess.io/vitess/go/vt/sqlparser" "vitess.io/vitess/go/vt/vtgate/evalengine" "github.com/stretchr/testify/assert" @@ -1058,8 +1059,10 @@ func TestGroupConcatWithAggrOnEngine(t *testing.T) { for _, tcase := range tcases { t.Run(tcase.name, func(t *testing.T) { fp := &fakePrimitive{results: []*sqltypes.Result{tcase.inputResult}} + agp := NewAggregateParam(AggregateGroupConcat, 1, "group_concat(c2)", collations.MySQL8()) + agp.Func = &sqlparser.GroupConcatExpr{Separator: ","} oa := &OrderedAggregate{ - Aggregates: []*AggregateParams{NewAggregateParam(AggregateGroupConcat, 1, "group_concat(c2)", collations.MySQL8())}, + Aggregates: []*AggregateParams{agp}, GroupByKeys: []*GroupByParams{{KeyCol: 0}}, Input: fp, } @@ -1137,8 +1140,10 @@ func TestGroupConcat(t *testing.T) { for _, tcase := range tcases { t.Run(tcase.name, func(t *testing.T) { fp := &fakePrimitive{results: []*sqltypes.Result{tcase.inputResult}} + agp := NewAggregateParam(AggregateGroupConcat, 1, "", collations.MySQL8()) + agp.Func = &sqlparser.GroupConcatExpr{Separator: ","} oa := &OrderedAggregate{ - Aggregates: []*AggregateParams{NewAggregateParam(AggregateGroupConcat, 1, "", collations.MySQL8())}, + Aggregates: []*AggregateParams{agp}, GroupByKeys: []*GroupByParams{{KeyCol: 0}}, Input: fp, } diff --git a/go/vt/vtgate/engine/scalar_aggregation_test.go b/go/vt/vtgate/engine/scalar_aggregation_test.go index 99031c95f34..6fa0c8aecb8 100644 --- a/go/vt/vtgate/engine/scalar_aggregation_test.go +++ b/go/vt/vtgate/engine/scalar_aggregation_test.go @@ -27,6 +27,7 @@ import ( "vitess.io/vitess/go/mysql/collations" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/test/utils" + "vitess.io/vitess/go/vt/sqlparser" . "vitess.io/vitess/go/vt/vtgate/engine/opcode" ) @@ -233,6 +234,7 @@ func TestScalarGroupConcatWithAggrOnEngine(t *testing.T) { Opcode: AggregateGroupConcat, Col: 0, Alias: "group_concat(c2)", + Func: &sqlparser.GroupConcatExpr{Separator: ","}, }}, Input: fp, } @@ -394,6 +396,7 @@ func TestScalarGroupConcat(t *testing.T) { Aggregates: []*AggregateParams{{ Opcode: AggregateGroupConcat, Col: 0, + Func: &sqlparser.GroupConcatExpr{Separator: ","}, }}, Input: fp, } diff --git a/go/vt/vtgate/planbuilder/operator_transformers.go b/go/vt/vtgate/planbuilder/operator_transformers.go index 60a59ed936a..da3f7348afd 100644 --- a/go/vt/vtgate/planbuilder/operator_transformers.go +++ b/go/vt/vtgate/planbuilder/operator_transformers.go @@ -308,7 +308,10 @@ func transformAggregator(ctx *plancontext.PlanningContext, op *operators.Aggrega return nil, vterrors.VT12001(fmt.Sprintf("in scatter query: aggregation function '%s'", sqlparser.String(aggr.Original))) } aggrParam := engine.NewAggregateParam(aggr.OpCode, aggr.ColOffset, aggr.Alias, ctx.VSchema.Environment().CollationEnv()) - aggrParam.Expr = aggr.Func + aggrParam.Func = aggr.Func + if gcFunc, isGc := aggrParam.Func.(*sqlparser.GroupConcatExpr); isGc && gcFunc.Separator == "" { + gcFunc.Separator = sqlparser.GroupConcatDefaultSeparator + } aggrParam.Original = aggr.Original aggrParam.OrigOpcode = aggr.OriginalOpCode aggrParam.WCol = aggr.WSOffset diff --git a/go/vt/vtgate/planbuilder/operators/aggregation_pushing_helper.go b/go/vt/vtgate/planbuilder/operators/aggregation_pushing_helper.go index 2c47f426695..2fbf5c32311 100644 --- a/go/vt/vtgate/planbuilder/operators/aggregation_pushing_helper.go +++ b/go/vt/vtgate/planbuilder/operators/aggregation_pushing_helper.go @@ -134,8 +134,8 @@ func (ab *aggBuilder) handleAggr(ctx *plancontext.PlanningContext, aggr Aggr) er return ab.handlePushThroughAggregation(ctx, aggr) case opcode.AggregateGroupConcat: f := aggr.Func.(*sqlparser.GroupConcatExpr) - if f.Distinct || len(f.OrderBy) > 0 || f.Separator != "" { - panic("fail here") + if f.Distinct || len(f.OrderBy) > 0 { + panic(vterrors.VT12001("cannot evaluate group concat with distinct or order by")) } // this needs special handling, currently aborting the push of function // and later will try pushing the column instead. diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json index dac95cfa51f..a272954725d 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json @@ -824,6 +824,66 @@ ] } }, + { + "comment": "group concat with a separator needing evaluation on vtgate", + "query": "select group_concat(music.name SEPARATOR ', ') as `Group Name` from user join user_extra on user.id = user_extra.user_id left join music on user.id = music.id group by user.id;", + "plan": { + "QueryType": "SELECT", + "Original": "select group_concat(music.name SEPARATOR ', ') as `Group Name` from user join user_extra on user.id = user_extra.user_id left join music on user.id = music.id group by user.id;", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "group_concat(0) AS Group Name", + "GroupBy": "(1|2)", + "ResultColumns": 1, + "Inputs": [ + { + "OperatorType": "Join", + "Variant": "LeftJoin", + "JoinColumnIndexes": "R:0,L:0,L:1", + "JoinVars": { + "user_id": 0 + }, + "TableName": "`user`, user_extra_music", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select `user`.id, weight_string(`user`.id) from `user`, user_extra where 1 != 1", + "OrderBy": "(0|1) ASC", + "Query": "select `user`.id, weight_string(`user`.id) from `user`, user_extra where `user`.id = user_extra.user_id order by `user`.id asc", + "Table": "`user`, user_extra" + }, + { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select music.`name` from music where 1 != 1", + "Query": "select music.`name` from music where music.id = :user_id", + "Table": "music", + "Values": [ + ":user_id" + ], + "Vindex": "music_user_map" + } + ] + } + ] + }, + "TablesUsed": [ + "user.music", + "user.user", + "user.user_extra" + ] + } + }, { "comment": "scatter aggregate group by column number", "query": "select col from user group by 1", diff --git a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json index a8fd082eb7b..38119ba936c 100644 --- a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json @@ -174,6 +174,11 @@ "query": "select id2 from user uu where id in (select id from user where id = uu.id and user.col in (select col from (select id from user_extra where user_id = 5) uu where uu.user_id = uu.id))", "plan": "VT12001: unsupported: correlated subquery is only supported for EXISTS" }, + { + "comment": "group concat with order by requiring evaluation at vtgate", + "query": "select group_concat(music.name ORDER BY 1 asc SEPARATOR ', ') as `Group Name` from user join user_extra on user.id = user_extra.user_id left join music on user.id = music.id group by user.id;", + "plan": "VT12001: unsupported: cannot evaluate group concat with distinct or order by" + }, { "comment": "outer and inner subquery route reference the same \"uu.id\" name\n# but they refer to different things. The first reference is to the outermost query,\n# and the second reference is to the innermost 'from' subquery.\n# changed to project all the columns from the derived tables.", "query": "select id2 from user uu where id in (select id from user where id = uu.id and user.col in (select col from (select col, id, user_id from user_extra where user_id = 5) uu where uu.user_id = uu.id))",