Skip to content

Commit

Permalink
statistics: allow using not-loaded stats when ndv is 0 (pingcap#33241)
Browse files Browse the repository at this point in the history
  • Loading branch information
time-and-fate authored Mar 22, 2022
1 parent 547d8f8 commit 768ce5f
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 1 deletion.
2 changes: 1 addition & 1 deletion statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -1130,7 +1130,7 @@ func (c *Column) IsInvalid(sctx sessionctx.Context, collPseudo bool) bool {
}
}
}
return c.TotalRowCount() == 0 || !c.IsLoaded()
return c.TotalRowCount() == 0 || (!c.IsLoaded() && c.Histogram.NDV > 0)
}

// IsHistNeeded checks if this column needs histogram to be loaded
Expand Down
66 changes: 66 additions & 0 deletions statistics/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -559,3 +559,69 @@ func hasPseudoStats(rows [][]interface{}) bool {
}
return false
}

// TestNotLoadedStatsOnAllNULLCol makes sure that stats on a column that only contains NULLs can be used even when it's
// not loaded. This is reasonable because it makes no difference whether it's loaded or not.
func TestNotLoadedStatsOnAllNULLCol(t *testing.T) {
store, dom, clean := testkit.CreateMockStoreAndDomain(t)
defer clean()
h := dom.StatsHandle()
oriLease := h.Lease()
h.SetLease(1000)
defer func() {
h.SetLease(oriLease)
}()
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t1")
tk.MustExec("drop table if exists t2")
tk.MustExec("create table t1(a int)")
tk.MustExec("create table t2(a int)")
tk.MustExec("insert into t1 values(null), (null), (null), (null)")
tk.MustExec("insert into t2 values(null), (null)")
tk.MustExec("analyze table t1;")
tk.MustExec("analyze table t2;")

res := tk.MustQuery("explain format = 'brief' select * from t1 left join t2 on t1.a=t2.a order by t1.a, t2.a")
res.Check(testkit.Rows(
"Sort 4.00 root test.t1.a, test.t2.a",
"└─HashJoin 4.00 root left outer join, equal:[eq(test.t1.a, test.t2.a)]",
" ├─TableReader(Build) 0.00 root data:Selection",
// If we are not using stats on this column (which means we use pseudo estimation), the row count for the Selection will become 2.
" │ └─Selection 0.00 cop[tikv] not(isnull(test.t2.a))",
" │ └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false",
" └─TableReader(Probe) 4.00 root data:TableFullScan",
" └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false"))

res = tk.MustQuery("explain format = 'brief' select * from t2 left join t1 on t1.a=t2.a order by t1.a, t2.a")
res.Check(testkit.Rows(
"Sort 2.00 root test.t1.a, test.t2.a",
"└─HashJoin 2.00 root left outer join, equal:[eq(test.t2.a, test.t1.a)]",
// If we are not using stats on this column, the build side will become t2 because of smaller row count.
" ├─TableReader(Build) 0.00 root data:Selection",
// If we are not using stats on this column, the row count for the Selection will become 4.
" │ └─Selection 0.00 cop[tikv] not(isnull(test.t1.a))",
" │ └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false",
" └─TableReader(Probe) 2.00 root data:TableFullScan",
" └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false"))

res = tk.MustQuery("explain format = 'brief' select * from t1 right join t2 on t1.a=t2.a order by t1.a, t2.a")
res.Check(testkit.Rows(
"Sort 2.00 root test.t1.a, test.t2.a",
"└─HashJoin 2.00 root right outer join, equal:[eq(test.t1.a, test.t2.a)]",
" ├─TableReader(Build) 0.00 root data:Selection",
" │ └─Selection 0.00 cop[tikv] not(isnull(test.t1.a))",
" │ └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false",
" └─TableReader(Probe) 2.00 root data:TableFullScan",
" └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false"))

res = tk.MustQuery("explain format = 'brief' select * from t2 right join t1 on t1.a=t2.a order by t1.a, t2.a")
res.Check(testkit.Rows(
"Sort 4.00 root test.t1.a, test.t2.a",
"└─HashJoin 4.00 root right outer join, equal:[eq(test.t2.a, test.t1.a)]",
" ├─TableReader(Build) 0.00 root data:Selection",
" │ └─Selection 0.00 cop[tikv] not(isnull(test.t2.a))",
" │ └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false",
" └─TableReader(Probe) 4.00 root data:TableFullScan",
" └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false"))
}

0 comments on commit 768ce5f

Please sign in to comment.