From 768ce5fbde15a089191a4d6e5f3c01ff24233adb Mon Sep 17 00:00:00 2001 From: Zhou Kunqin <25057648+time-and-fate@users.noreply.github.com> Date: Tue, 22 Mar 2022 17:08:33 +0800 Subject: [PATCH] statistics: allow using not-loaded stats when ndv is 0 (#33241) ref pingcap/tidb#32758, close pingcap/tidb#33280 --- statistics/histogram.go | 2 +- statistics/integration_test.go | 66 ++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/statistics/histogram.go b/statistics/histogram.go index ee5c9bf784d72..d9fcbf9c5fa5a 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -1130,7 +1130,7 @@ func (c *Column) IsInvalid(sctx sessionctx.Context, collPseudo bool) bool { } } } - return c.TotalRowCount() == 0 || !c.IsLoaded() + return c.TotalRowCount() == 0 || (!c.IsLoaded() && c.Histogram.NDV > 0) } // IsHistNeeded checks if this column needs histogram to be loaded diff --git a/statistics/integration_test.go b/statistics/integration_test.go index 34a63ccdbeb7b..b45f03ada062e 100644 --- a/statistics/integration_test.go +++ b/statistics/integration_test.go @@ -559,3 +559,69 @@ func hasPseudoStats(rows [][]interface{}) bool { } return false } + +// TestNotLoadedStatsOnAllNULLCol makes sure that stats on a column that only contains NULLs can be used even when it's +// not loaded. This is reasonable because it makes no difference whether it's loaded or not. +func TestNotLoadedStatsOnAllNULLCol(t *testing.T) { + store, dom, clean := testkit.CreateMockStoreAndDomain(t) + defer clean() + h := dom.StatsHandle() + oriLease := h.Lease() + h.SetLease(1000) + defer func() { + h.SetLease(oriLease) + }() + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t1") + tk.MustExec("drop table if exists t2") + tk.MustExec("create table t1(a int)") + tk.MustExec("create table t2(a int)") + tk.MustExec("insert into t1 values(null), (null), (null), (null)") + tk.MustExec("insert into t2 values(null), (null)") + tk.MustExec("analyze table t1;") + tk.MustExec("analyze table t2;") + + res := tk.MustQuery("explain format = 'brief' select * from t1 left join t2 on t1.a=t2.a order by t1.a, t2.a") + res.Check(testkit.Rows( + "Sort 4.00 root test.t1.a, test.t2.a", + "└─HashJoin 4.00 root left outer join, equal:[eq(test.t1.a, test.t2.a)]", + " ├─TableReader(Build) 0.00 root data:Selection", + // If we are not using stats on this column (which means we use pseudo estimation), the row count for the Selection will become 2. + " │ └─Selection 0.00 cop[tikv] not(isnull(test.t2.a))", + " │ └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false", + " └─TableReader(Probe) 4.00 root data:TableFullScan", + " └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false")) + + res = tk.MustQuery("explain format = 'brief' select * from t2 left join t1 on t1.a=t2.a order by t1.a, t2.a") + res.Check(testkit.Rows( + "Sort 2.00 root test.t1.a, test.t2.a", + "└─HashJoin 2.00 root left outer join, equal:[eq(test.t2.a, test.t1.a)]", + // If we are not using stats on this column, the build side will become t2 because of smaller row count. + " ├─TableReader(Build) 0.00 root data:Selection", + // If we are not using stats on this column, the row count for the Selection will become 4. + " │ └─Selection 0.00 cop[tikv] not(isnull(test.t1.a))", + " │ └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false", + " └─TableReader(Probe) 2.00 root data:TableFullScan", + " └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false")) + + res = tk.MustQuery("explain format = 'brief' select * from t1 right join t2 on t1.a=t2.a order by t1.a, t2.a") + res.Check(testkit.Rows( + "Sort 2.00 root test.t1.a, test.t2.a", + "└─HashJoin 2.00 root right outer join, equal:[eq(test.t1.a, test.t2.a)]", + " ├─TableReader(Build) 0.00 root data:Selection", + " │ └─Selection 0.00 cop[tikv] not(isnull(test.t1.a))", + " │ └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false", + " └─TableReader(Probe) 2.00 root data:TableFullScan", + " └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false")) + + res = tk.MustQuery("explain format = 'brief' select * from t2 right join t1 on t1.a=t2.a order by t1.a, t2.a") + res.Check(testkit.Rows( + "Sort 4.00 root test.t1.a, test.t2.a", + "└─HashJoin 4.00 root right outer join, equal:[eq(test.t2.a, test.t1.a)]", + " ├─TableReader(Build) 0.00 root data:Selection", + " │ └─Selection 0.00 cop[tikv] not(isnull(test.t2.a))", + " │ └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false", + " └─TableReader(Probe) 4.00 root data:TableFullScan", + " └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false")) +}