diff --git a/cmd/explaintest/r/explain_generate_column_substitute.result b/cmd/explaintest/r/explain_generate_column_substitute.result index 5d598e82ebe74..c28bbb6ff2289 100644 --- a/cmd/explaintest/r/explain_generate_column_substitute.result +++ b/cmd/explaintest/r/explain_generate_column_substitute.result @@ -617,3 +617,20 @@ select (select t2.c_str from t2 where t2.c_int = 3 order by t2.c_str) x from t1; x unruffled chaplygin drop table t1,t2; +drop table if exists t1, t2; +create table t1 (c_int int, c_decimal decimal(12, 6), primary key (c_int) nonclustered,key((c_int + 1))) ; +create table t2 like t1; +explain format = 'brief' select /*+ agg_to_cop() */ * from t1 where c_decimal in (select c_decimal from t2 where t2.c_int + 1 = 8 + 1); +id estRows task access object operator info +HashJoin 9.99 root inner join, equal:[eq(test.t2.c_decimal, test.t1.c_decimal)] +├─HashAgg(Build) 7.99 root group by:test.t2.c_decimal, funcs:firstrow(test.t2.c_decimal)->test.t2.c_decimal +│ └─IndexLookUp 7.99 root +│ ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t2, index:expression_index(`c_int` + 1) range:[9,9], keep order:false, stats:pseudo +│ └─HashAgg(Probe) 7.99 cop[tikv] group by:test.t2.c_decimal, +│ └─Selection 9.99 cop[tikv] not(isnull(test.t2.c_decimal)) +│ └─TableRowIDScan 10.00 cop[tikv] table:t2 keep order:false, stats:pseudo +└─TableReader(Probe) 9990.00 root data:Selection + └─Selection 9990.00 cop[tikv] not(isnull(test.t1.c_decimal)) + └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo +drop table t1; +drop table t2; diff --git a/cmd/explaintest/t/explain_generate_column_substitute.test b/cmd/explaintest/t/explain_generate_column_substitute.test index 4738d13cb9da1..1acde78a709e7 100644 --- a/cmd/explaintest/t/explain_generate_column_substitute.test +++ b/cmd/explaintest/t/explain_generate_column_substitute.test @@ -293,3 +293,12 @@ select (select t2.c_str from t2 where t2.c_int + 1 = 4 order by t2.c_str) x from select (select t2.c_str from t2 where t2.c_int = 3 order by t2.c_str) x from t1; drop table t1,t2; + +-- for issue 33237 +drop table if exists t1, t2; +create table t1 (c_int int, c_decimal decimal(12, 6), primary key (c_int) nonclustered,key((c_int + 1))) ; +create table t2 like t1; +explain format = 'brief' select /*+ agg_to_cop() */ * from t1 where c_decimal in (select c_decimal from t2 where t2.c_int + 1 = 8 + 1); +drop table t1; +drop table t2; + diff --git a/planner/core/task.go b/planner/core/task.go index f1c1ea814b52e..c5bcfdc61f3bd 100644 --- a/planner/core/task.go +++ b/planner/core/task.go @@ -967,7 +967,18 @@ func buildIndexLookUpTask(ctx sessionctx.Context, t *copTask) *rootTask { newTask.cst += sortCPUCost } p.cost = newTask.cst - if t.needExtraProj { + + // Do not inject the extra Projection even if t.needExtraProj is set, or the schema between the phase-1 agg and + // the final agg would be broken. Please reference comments for the similar logic in + // (*copTask).convertToRootTaskImpl() for the PhysicalTableReader case. + // We need to refactor these logics. + aggPushedDown := false + switch p.tablePlan.(type) { + case *PhysicalHashAgg, *PhysicalStreamAgg: + aggPushedDown = true + } + + if t.needExtraProj && !aggPushedDown { schema := t.originSchema proj := PhysicalProjection{Exprs: expression.Column2Exprs(schema.Columns)}.Init(ctx, p.stats, t.tablePlan.SelectBlockOffset(), nil) proj.SetSchema(schema)