Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix: GROUP BY/HAVING alias resolution #15344

Merged
merged 15 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,21 @@ func start(t *testing.T) (utils.MySQLCompare, func()) {
deleteAll := func() {
_, _ = utils.ExecAllowError(t, mcmp.VtConn, "set workload = oltp")

tables := []string{"t9", "aggr_test", "t3", "t7_xxhash", "aggr_test_dates", "t7_xxhash_idx", "t1", "t2", "t10"}
tables := []string{
"t3",
"t3_id7_idx",
"t9",
"aggr_test",
"aggr_test_dates",
"t7_xxhash",
"t7_xxhash_idx",
"t1",
"t2",
"t10",
"emp",
"dept",
"bet_logs",
}
for _, table := range tables {
_, _ = mcmp.ExecAndIgnore("delete from " + table)
}
Expand Down Expand Up @@ -673,3 +687,84 @@ func TestDistinctAggregation(t *testing.T) {
})
}
}

func TestHavingQueries(t *testing.T) {
mcmp, closer := start(t)
defer closer()

inserts := []string{
`INSERT INTO emp (empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES
(1, 'John', 'Manager', NULL, '2022-01-01', 5000, 500, 1),
(2, 'Doe', 'Analyst', 1, '2023-01-01', 4500, NULL, 1),
(3, 'Jane', 'Clerk', 1, '2023-02-01', 3000, 200, 2),
(4, 'Mary', 'Analyst', 2, '2022-03-01', 4700, NULL, 1),
(5, 'Smith', 'Salesman', 3, '2023-01-15', 3200, 300, 3)`,
"INSERT INTO dept (deptno, dname, loc) VALUES (1, 'IT', 'New York'), (2, 'HR', 'London'), (3, 'Sales', 'San Francisco')",
"INSERT INTO t1 (t1_id, name, value, shardKey) VALUES (1, 'Name1', 'Value1', 100), (2, 'Name2', 'Value2', 100), (3, 'Name1', 'Value3', 200)",
"INSERT INTO aggr_test_dates (id, val1, val2) VALUES (1, '2023-01-01', '2023-01-02'), (2, '2023-02-01', '2023-02-02'), (3, '2023-03-01', '2023-03-02')",
"INSERT INTO t10 (k, a, b) VALUES (1, 10, 20), (2, 30, 40), (3, 50, 60)",
"INSERT INTO t3 (id5, id6, id7) VALUES (1, 10, 100), (2, 20, 200), (3, 30, 300)",
"INSERT INTO t9 (id1, id2, id3) VALUES (1, 'A1', 'B1'), (2, 'A2', 'B2'), (3, 'A1', 'B3')",
"INSERT INTO aggr_test (id, val1, val2) VALUES (1, 'Test1', 100), (2, 'Test2', 200), (3, 'Test1', 300), (4, 'Test3', 400)",
"INSERT INTO t2 (id, shardKey) VALUES (1, 100), (2, 200), (3, 300)",
`INSERT INTO bet_logs (id, merchant_game_id, bet_amount, game_id) VALUES
(1, 1, 100.0, 10),
(2, 1, 200.0, 11),
(3, 2, 300.0, 10),
(4, 3, 400.0, 12)`,
}

for _, insert := range inserts {
mcmp.Exec(insert)
}

queries := []string{
// The following queries are not allowed by MySQL but Vitess allows them
// SELECT ename FROM emp GROUP BY ename HAVING sal > 5000
// SELECT val1, COUNT(val2) FROM aggr_test_dates GROUP BY val1 HAVING val2 > 5
// SELECT k, a FROM t10 GROUP BY k HAVING b > 2
// SELECT loc FROM dept GROUP BY loc HAVING COUNT(deptno) AND dname = 'Sales'
// SELECT AVG(val2) AS average_val2 FROM aggr_test HAVING val1 = 'Test'

// these first queries are all failing in different ways. let's check that Vitess also fails

"SELECT deptno, AVG(sal) AS average_salary HAVING average_salary > 5000 FROM emp",
"SELECT job, COUNT(empno) AS num_employees FROM emp HAVING num_employees > 2",
"SELECT dname, SUM(sal) FROM dept JOIN emp ON dept.deptno = emp.deptno HAVING AVG(sal) > 6000",
"SELECT COUNT(*) AS count FROM emp WHERE count > 5",
"SELECT `name`, AVG(`value`) FROM t1 GROUP BY `name` HAVING `name`",
"SELECT empno, MAX(sal) FROM emp HAVING COUNT(*) > 3",
"SELECT id, SUM(bet_amount) AS total_bets FROM bet_logs HAVING total_bets > 1000",
"SELECT merchant_game_id FROM bet_logs GROUP BY merchant_game_id HAVING SUM(bet_amount)",
"SELECT shardKey, COUNT(id) FROM t2 HAVING shardKey > 100",
"SELECT deptno FROM emp GROUP BY deptno HAVING MAX(hiredate) > '2020-01-01'",

// These queries should not fail
"SELECT deptno, COUNT(*) AS num_employees FROM emp GROUP BY deptno HAVING num_employees > 5",
"SELECT ename, SUM(sal) FROM emp GROUP BY ename HAVING SUM(sal) > 10000",
"SELECT dname, AVG(sal) AS average_salary FROM emp JOIN dept ON emp.deptno = dept.deptno GROUP BY dname HAVING average_salary > 5000",
"SELECT dname, MAX(sal) AS max_salary FROM emp JOIN dept ON emp.deptno = dept.deptno GROUP BY dname HAVING max_salary < 10000",
"SELECT YEAR(hiredate) AS year, COUNT(*) FROM emp GROUP BY year HAVING COUNT(*) > 2",
"SELECT mgr, COUNT(empno) AS managed_employees FROM emp WHERE mgr IS NOT NULL GROUP BY mgr HAVING managed_employees >= 3",
"SELECT deptno, SUM(comm) AS total_comm FROM emp GROUP BY deptno HAVING total_comm > AVG(total_comm)",
"SELECT id2, COUNT(*) AS count FROM t9 GROUP BY id2 HAVING count > 1",
"SELECT val1, COUNT(*) FROM aggr_test GROUP BY val1 HAVING COUNT(*) > 1",
"SELECT DATE(val1) AS date, SUM(val2) FROM aggr_test_dates GROUP BY date HAVING SUM(val2) > 100",
"SELECT shardKey, AVG(`value`) FROM t1 WHERE `value` IS NOT NULL GROUP BY shardKey HAVING AVG(`value`) > 10",
"SELECT job, COUNT(*) AS job_count FROM emp GROUP BY job HAVING job_count > 3",
"SELECT b, AVG(a) AS avg_a FROM t10 GROUP BY b HAVING AVG(a) > 5",
"SELECT merchant_game_id, SUM(bet_amount) AS total_bets FROM bet_logs GROUP BY merchant_game_id HAVING total_bets > 1000",
"SELECT loc, COUNT(deptno) AS num_depts FROM dept GROUP BY loc HAVING num_depts > 1",
"SELECT `name`, COUNT(*) AS name_count FROM t1 GROUP BY `name` HAVING name_count > 2",
"SELECT COUNT(*) AS num_jobs FROM emp GROUP BY empno HAVING num_jobs > 1",
"SELECT id, COUNT(*) AS count FROM t2 GROUP BY id HAVING count > 1",
"SELECT val2, SUM(id) FROM aggr_test GROUP BY val2 HAVING SUM(id) > 10",
"SELECT game_id, COUNT(*) AS num_logs FROM bet_logs GROUP BY game_id HAVING num_logs > 5",
}

for _, query := range queries {
mcmp.Run(query, func(mcmp *utils.MySQLCompare) {
mcmp.ExecAllowAndCompareError(query)
})
}
}
167 changes: 54 additions & 113 deletions go/vt/vtgate/planbuilder/testdata/aggr_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -675,9 +675,41 @@
}
},
{
"comment": "scatter aggregate group by aggregate function",
"comment": "scatter aggregate group by aggregate function - since we don't have authoratative columns for user, we can't be sure that the user isn't referring a column named b",
systay marked this conversation as resolved.
Show resolved Hide resolved
"query": "select count(*) b from user group by b",
"plan": "VT03005: cannot group on 'count(*)'"
"plan": {
"QueryType": "SELECT",
"Original": "select count(*) b from user group by b",
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Ordered",
"Aggregates": "sum_count_star(0) AS b",
"GroupBy": "(1|2)",
"ResultColumns": 1,
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select count(*) as b, b, weight_string(b) from `user` where 1 != 1 group by b, weight_string(b)",
"OrderBy": "(1|2) ASC",
"Query": "select count(*) as b, b, weight_string(b) from `user` group by b, weight_string(b) order by b asc",
"Table": "`user`"
}
]
},
"TablesUsed": [
"user.user"
]
}
},
{
"comment": "scatter aggregate group by aggregate function with column information",
"query": "select count(*) b from authoritative group by b",
"plan": "VT03005: cannot group on 'b'"
},
{
"comment": "scatter aggregate multiple group by (columns)",
Expand Down Expand Up @@ -1041,32 +1073,6 @@
]
}
},
{
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed this since it fails on mysql. a is not the same as a collate utf8_general_ci, so if full_group_by is enabled, this query would fail

"comment": "Group by with collate operator",
"query": "select user.col1 as a from user where user.id = 5 group by a collate utf8_general_ci",
"plan": {
"QueryType": "SELECT",
"Original": "select user.col1 as a from user where user.id = 5 group by a collate utf8_general_ci",
"Instructions": {
"OperatorType": "Route",
"Variant": "EqualUnique",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select `user`.col1 as a from `user` where 1 != 1 group by `user`.col1 collate utf8_general_ci",
"Query": "select `user`.col1 as a from `user` where `user`.id = 5 group by `user`.col1 collate utf8_general_ci",
"Table": "`user`",
"Values": [
"5"
],
"Vindex": "user_index"
},
"TablesUsed": [
"user.user"
]
}
},
{
"comment": "routing rules for aggregates",
"query": "select id, count(*) from route2 group by id",
Expand Down Expand Up @@ -1584,10 +1590,10 @@
},
{
"comment": "weight_string addition to group by",
"query": "select lower(textcol1) as v, count(*) from user group by v",
"query": "select lower(col1) as v, count(*) from authoritative group by v",
"plan": {
"QueryType": "SELECT",
"Original": "select lower(textcol1) as v, count(*) from user group by v",
"Original": "select lower(col1) as v, count(*) from authoritative group by v",
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Ordered",
Expand All @@ -1602,24 +1608,24 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select lower(textcol1) as v, count(*), weight_string(lower(textcol1)) from `user` where 1 != 1 group by lower(textcol1), weight_string(lower(textcol1))",
"FieldQuery": "select lower(col1) as v, count(*), weight_string(lower(col1)) from authoritative where 1 != 1 group by lower(col1), weight_string(lower(col1))",
"OrderBy": "(0|2) ASC",
"Query": "select lower(textcol1) as v, count(*), weight_string(lower(textcol1)) from `user` group by lower(textcol1), weight_string(lower(textcol1)) order by lower(textcol1) asc",
"Table": "`user`"
"Query": "select lower(col1) as v, count(*), weight_string(lower(col1)) from authoritative group by lower(col1), weight_string(lower(col1)) order by lower(col1) asc",
"Table": "authoritative"
}
]
},
"TablesUsed": [
"user.user"
"user.authoritative"
]
}
},
{
"comment": "weight_string addition to group by when also there in order by",
"query": "select char_length(texcol1) as a, count(*) from user group by a order by a",
"query": "select char_length(col1) as a, count(*) from authoritative group by a order by a",
"plan": {
"QueryType": "SELECT",
"Original": "select char_length(texcol1) as a, count(*) from user group by a order by a",
"Original": "select char_length(col1) as a, count(*) from authoritative group by a order by a",
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Ordered",
Expand All @@ -1634,15 +1640,15 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select char_length(texcol1) as a, count(*), weight_string(char_length(texcol1)) from `user` where 1 != 1 group by char_length(texcol1), weight_string(char_length(texcol1))",
"FieldQuery": "select char_length(col1) as a, count(*), weight_string(char_length(col1)) from authoritative where 1 != 1 group by char_length(col1), weight_string(char_length(col1))",
"OrderBy": "(0|2) ASC",
"Query": "select char_length(texcol1) as a, count(*), weight_string(char_length(texcol1)) from `user` group by char_length(texcol1), weight_string(char_length(texcol1)) order by char_length(texcol1) asc",
"Table": "`user`"
"Query": "select char_length(col1) as a, count(*), weight_string(char_length(col1)) from authoritative group by char_length(col1), weight_string(char_length(col1)) order by char_length(col1) asc",
"Table": "authoritative"
}
]
},
"TablesUsed": [
"user.user"
"user.authoritative"
]
}
},
Expand Down Expand Up @@ -2113,71 +2119,6 @@
]
}
},
{
"comment": "aggregation filtering by having on a route with no group by with non-unique vindex filter",
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this query fails on both mysql and vitess. name is not accessible on the having clause

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This query does not fail anymore, we should bring the test back.

"query": "select 1 from user having count(id) = 10 and name = 'a'",
"plan": {
"QueryType": "SELECT",
"Original": "select 1 from user having count(id) = 10 and name = 'a'",
"Instructions": {
"OperatorType": "Filter",
"Predicate": "count(id) = 10",
"ResultColumns": 1,
"Inputs": [
{
"OperatorType": "Aggregate",
"Variant": "Scalar",
"Aggregates": "any_value(0) AS 1, sum_count(1) AS count(id)",
"Inputs": [
{
"OperatorType": "VindexLookup",
"Variant": "Equal",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"Values": [
"'a'"
],
"Vindex": "name_user_map",
"Inputs": [
{
"OperatorType": "Route",
"Variant": "IN",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select `name`, keyspace_id from name_user_vdx where 1 != 1",
"Query": "select `name`, keyspace_id from name_user_vdx where `name` in ::__vals",
"Table": "name_user_vdx",
"Values": [
"::name"
],
"Vindex": "user_index"
},
{
"OperatorType": "Route",
"Variant": "ByDestination",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select 1, count(id) from `user` where 1 != 1",
"Query": "select 1, count(id) from `user` where `name` = 'a'",
"Table": "`user`"
}
]
}
]
}
]
},
"TablesUsed": [
"user.user"
]
}
},
{
"comment": "Aggregates and joins",
"query": "select count(*) from user join user_extra",
Expand Down Expand Up @@ -2620,10 +2561,10 @@
},
{
"comment": "group by column alias",
"query": "select ascii(val1) as a, count(*) from user group by a",
"query": "select ascii(col2) as a, count(*) from authoritative group by a",
"plan": {
"QueryType": "SELECT",
systay marked this conversation as resolved.
Show resolved Hide resolved
"Original": "select ascii(val1) as a, count(*) from user group by a",
"Original": "select ascii(col2) as a, count(*) from authoritative group by a",
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Ordered",
Expand All @@ -2638,15 +2579,15 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select ascii(val1) as a, count(*), weight_string(ascii(val1)) from `user` where 1 != 1 group by ascii(val1), weight_string(ascii(val1))",
"FieldQuery": "select ascii(col2) as a, count(*), weight_string(ascii(col2)) from authoritative where 1 != 1 group by ascii(col2), weight_string(ascii(col2))",
"OrderBy": "(0|2) ASC",
"Query": "select ascii(val1) as a, count(*), weight_string(ascii(val1)) from `user` group by ascii(val1), weight_string(ascii(val1)) order by ascii(val1) asc",
"Table": "`user`"
"Query": "select ascii(col2) as a, count(*), weight_string(ascii(col2)) from authoritative group by ascii(col2), weight_string(ascii(col2)) order by ascii(col2) asc",
"Table": "authoritative"
}
]
},
"TablesUsed": [
"user.user"
"user.authoritative"
]
}
},
Expand Down Expand Up @@ -3328,10 +3269,10 @@
},
{
"comment": "group by and ',' joins",
"query": "select user.id from user, user_extra group by id",
"query": "select user.id from user, user_extra group by user.id",
"plan": {
"QueryType": "SELECT",
"Original": "select user.id from user, user_extra group by id",
"Original": "select user.id from user, user_extra group by user.id",
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Ordered",
Expand Down
22 changes: 0 additions & 22 deletions go/vt/vtgate/planbuilder/testdata/postprocess_cases.json
Original file line number Diff line number Diff line change
@@ -1,26 +1,4 @@
[
{
"comment": "HAVING implicitly references table col",
"query": "select user.col1 from user having col2 = 2",
"plan": {
"QueryType": "SELECT",
"Original": "select user.col1 from user having col2 = 2",
"Instructions": {
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
systay marked this conversation as resolved.
Show resolved Hide resolved
},
"FieldQuery": "select `user`.col1 from `user` where 1 != 1",
"Query": "select `user`.col1 from `user` where col2 = 2",
"Table": "`user`"
},
"TablesUsed": [
"user.user"
]
}
},
{
"comment": "ambiguous symbol reference",
"query": "select user.col1, user_extra.col1 from user join user_extra having col1 = 2",
Expand Down
Loading
Loading