diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 766dcc6..8e696a0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,7 +4,7 @@ jobs: test: strategy: matrix: - pg: [16, 15, 14, 13, 12, 11, 10, 9.6] + pg: [17, 16, 15, 14, 13, 12, 11, 10, 9.6] name: PostgreSQL ${{ matrix.pg }} runs-on: ubuntu-latest container: pgxn/pgxn-tools diff --git a/test/expected/parallel_query_0.out b/test/expected/parallel_query_0.out new file mode 100644 index 0000000..420fcaa --- /dev/null +++ b/test/expected/parallel_query_0.out @@ -0,0 +1,361 @@ +DO $$ +DECLARE + v_version numeric; +BEGIN + + SELECT substring(setting from '\d+')::numeric INTO v_version FROM pg_settings WHERE name = 'server_version'; + + -- GUCs common for all versions + PERFORM set_config('extra_float_digits', '0', false); + PERFORM set_config('parallel_setup_cost', '0', false); + PERFORM set_config('parallel_tuple_cost', '0', false); + PERFORM set_config('max_parallel_workers_per_gather', '2', false); + + -- 9.6 used somewhat different GUC name for relation size + IF v_version < 10 THEN + PERFORM set_config('min_parallel_relation_size', '1kB', false); + ELSE + PERFORM set_config('min_parallel_table_scan_size', '1kB', false); + END IF; + + -- in 14 disable Memoize nodes, to make explain more consistent + IF v_version >= 14 THEN + PERFORM set_config('enable_memoize', 'off', false); + END IF; + +END; +$$ LANGUAGE plpgsql; +-- test parallel query +DROP TABLE t; +CREATE TABLE t (v double precision, c int, d int); +INSERT INTO t SELECT 1000 * random(), 1 + mod(i,7), mod(i,113) FROM generate_series(1,100000) s(i); +ANALYZE t; +CREATE TABLE t2 (d tdigest); +INSERT INTO t2 SELECT tdigest(v, 100) FROM t GROUP BY d; +ANALYZE t2; +-- individual values +EXPLAIN (COSTS OFF) +WITH x AS (SELECT percentile_disc(0.95) WITHIN GROUP (ORDER BY v) AS p FROM t) +SELECT + 0.95, + abs(a - b) / 1000 < 0.01 +FROM ( + SELECT + (SELECT p FROM x) AS a, + tdigest_percentile(v, 100, 0.95) AS b + FROM t) foo; + QUERY PLAN +------------------------------------------------------ + Subquery Scan on foo + -> Finalize Aggregate + InitPlan 1 + -> Aggregate + -> Gather + Workers Planned: 2 + -> Parallel Seq Scan on t t_1 + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Seq Scan on t +(11 rows) + +WITH x AS (SELECT percentile_disc(0.95) WITHIN GROUP (ORDER BY v) AS p FROM t) +SELECT + 0.95, + abs(a - b) / 1000 < 0.01 +FROM ( + SELECT + (SELECT p FROM x) AS a, + tdigest_percentile(v, 100, 0.95) AS b + FROM t) foo; + ?column? | ?column? +----------+---------- + 0.95 | t +(1 row) + +EXPLAIN (COSTS OFF) +SELECT + 950, + abs(a - b) < 0.01 +FROM ( + SELECT + 0.95 AS a, + tdigest_percentile_of(v, 100, 950) AS b + FROM t) foo; + QUERY PLAN +------------------------------------------------ + Subquery Scan on foo + -> Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Seq Scan on t +(6 rows) + +SELECT + 950, + abs(a - b) < 0.01 +FROM ( + SELECT + 0.95 AS a, + tdigest_percentile_of(v, 100, 950) AS b + FROM t) foo; + ?column? | ?column? +----------+---------- + 950 | t +(1 row) + +EXPLAIN (COSTS OFF) +WITH x AS (SELECT percentile_disc(0.95) WITHIN GROUP (ORDER BY v) AS p FROM t) +SELECT + 0.95, + abs(a - b) / 1000 < 0.01 +FROM ( + SELECT + (SELECT p FROM x) AS a, + tdigest_percentile(d, 0.95) AS b + FROM t2) foo; + QUERY PLAN +-------------------------------------------------- + Subquery Scan on foo + -> Finalize Aggregate + InitPlan 1 + -> Aggregate + -> Gather + Workers Planned: 2 + -> Parallel Seq Scan on t + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Seq Scan on t2 +(11 rows) + +WITH x AS (SELECT percentile_disc(0.95) WITHIN GROUP (ORDER BY v) AS p FROM t) +SELECT + 0.95, + abs(a - b) / 1000 < 0.01 +FROM ( + SELECT + (SELECT p FROM x) AS a, + tdigest_percentile(d, 0.95) AS b + FROM t2) foo; + ?column? | ?column? +----------+---------- + 0.95 | t +(1 row) + +EXPLAIN (COSTS OFF) +SELECT + 950, + abs(a - b) < 0.01 +FROM ( + SELECT + 0.95 AS a, + tdigest_percentile_of(d, 950) AS b + FROM t2) foo; + QUERY PLAN +------------------------------------------------- + Subquery Scan on foo + -> Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Seq Scan on t2 +(6 rows) + +SELECT + 950, + abs(a - b) < 0.01 +FROM ( + SELECT + 0.95 AS a, + tdigest_percentile_of(d, 950) AS b + FROM t2) foo; + ?column? | ?column? +----------+---------- + 950 | t +(1 row) + +-- array of percentiles / values +EXPLAIN (COSTS OFF) +WITH x AS (SELECT percentile_disc(ARRAY[0.0, 0.95, 0.99, 1.0]) WITHIN GROUP (ORDER BY v) AS p FROM t) +SELECT + p, + abs(a - b) / 1000 < 0.01 +FROM ( + SELECT + unnest(ARRAY[0.0, 0.95, 0.99, 1.0]) p, + unnest((SELECT p FROM x)) AS a, + unnest(tdigest_percentile(v, 100, ARRAY[0.0, 0.95, 0.99, 1.0])) AS b + FROM t) foo; + QUERY PLAN +------------------------------------------------------ + Subquery Scan on foo + -> ProjectSet + InitPlan 1 + -> Aggregate + -> Gather + Workers Planned: 2 + -> Parallel Seq Scan on t t_1 + -> Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Seq Scan on t +(12 rows) + +WITH x AS (SELECT percentile_disc(ARRAY[0.0, 0.95, 0.99, 1.0]) WITHIN GROUP (ORDER BY v) AS p FROM t) +SELECT + p, + abs(a - b) / 1000 < 0.01 +FROM ( + SELECT + unnest(ARRAY[0.0, 0.95, 0.99, 1.0]) p, + unnest((SELECT p FROM x)) AS a, + unnest(tdigest_percentile(v, 100, ARRAY[0.0, 0.95, 0.99, 1.0])) AS b + FROM t) foo; + p | ?column? +------+---------- + 0.0 | t + 0.95 | t + 0.99 | t + 1.0 | t +(4 rows) + +EXPLAIN (COSTS OFF) +WITH x AS (SELECT array_agg((SELECT percent_rank(f) WITHIN GROUP (ORDER BY v) FROM t)) AS p FROM unnest(ARRAY[950, 990]) f) +SELECT + p, + abs(a - b) < 0.01 +FROM ( + SELECT + unnest(ARRAY[950, 990]) AS p, + unnest((SELECT p FROM x)) AS a, + unnest(tdigest_percentile_of(v, 100, ARRAY[950, 990])) AS b + FROM t) foo; + QUERY PLAN +-------------------------------------------------------------- + Subquery Scan on foo + -> ProjectSet + InitPlan 2 + -> Aggregate + -> Function Scan on unnest f + SubPlan 1 + -> Aggregate + -> Gather + Workers Planned: 2 + -> Parallel Seq Scan on t t_1 + -> Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Seq Scan on t +(15 rows) + +WITH x AS (SELECT array_agg((SELECT percent_rank(f) WITHIN GROUP (ORDER BY v) FROM t)) AS p FROM unnest(ARRAY[950, 990]) f) +SELECT + p, + abs(a - b) < 0.01 +FROM ( + SELECT + unnest(ARRAY[950, 990]) AS p, + unnest((SELECT p FROM x)) AS a, + unnest(tdigest_percentile_of(v, 100, ARRAY[950, 990])) AS b + FROM t) foo; + p | ?column? +-----+---------- + 950 | t + 990 | t +(2 rows) + +EXPLAIN (COSTS OFF) +WITH x AS (SELECT percentile_disc(ARRAY[0.0, 0.95, 0.99, 1.0]) WITHIN GROUP (ORDER BY v) AS p FROM t) +SELECT + p, + abs(a - b) / 1000 < 0.01 +FROM ( + SELECT + unnest(ARRAY[0.0, 0.95, 0.99, 1.0]) p, + unnest((SELECT p FROM x)) AS a, + unnest(tdigest_percentile(d, ARRAY[0.0, 0.95, 0.99, 1.0])) AS b + FROM t2) foo; + QUERY PLAN +------------------------------------------------------- + Subquery Scan on foo + -> ProjectSet + InitPlan 1 + -> Aggregate + -> Gather + Workers Planned: 2 + -> Parallel Seq Scan on t + -> Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Seq Scan on t2 +(12 rows) + +WITH x AS (SELECT percentile_disc(ARRAY[0.0, 0.95, 0.99, 1.0]) WITHIN GROUP (ORDER BY v) AS p FROM t) +SELECT + p, + abs(a - b) / 1000 < 0.01 +FROM ( + SELECT + unnest(ARRAY[0.0, 0.95, 0.99, 1.0]) p, + unnest((SELECT p FROM x)) AS a, + unnest(tdigest_percentile(d, ARRAY[0.0, 0.95, 0.99, 1.0])) AS b + FROM t2) foo; + p | ?column? +------+---------- + 0.0 | t + 0.95 | t + 0.99 | t + 1.0 | t +(4 rows) + +EXPLAIN (COSTS OFF) +WITH x AS (SELECT array_agg((SELECT percent_rank(f) WITHIN GROUP (ORDER BY v) FROM t)) AS p FROM unnest(ARRAY[950, 990]) f) +SELECT + p, + abs(a - b) < 0.01 +FROM ( + SELECT + unnest(ARRAY[950, 990]) AS p, + unnest((SELECT p FROM x)) AS a, + unnest(tdigest_percentile_of(d, ARRAY[950, 990])) AS b + FROM t2) foo; + QUERY PLAN +---------------------------------------------------------- + Subquery Scan on foo + -> ProjectSet + InitPlan 2 + -> Aggregate + -> Function Scan on unnest f + SubPlan 1 + -> Aggregate + -> Gather + Workers Planned: 2 + -> Parallel Seq Scan on t + -> Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Seq Scan on t2 +(15 rows) + +WITH x AS (SELECT array_agg((SELECT percent_rank(f) WITHIN GROUP (ORDER BY v) FROM t)) AS p FROM unnest(ARRAY[950, 990]) f) +SELECT + p, + abs(a - b) < 0.01 +FROM ( + SELECT + unnest(ARRAY[950, 990]) AS p, + unnest((SELECT p FROM x)) AS a, + unnest(tdigest_percentile_of(d, ARRAY[950, 990])) AS b + FROM t2) foo; + p | ?column? +-----+---------- + 950 | t + 990 | t +(2 rows) + diff --git a/test/expected/value_count_api_0.out b/test/expected/value_count_api_0.out new file mode 100644 index 0000000..bf0c432 --- /dev/null +++ b/test/expected/value_count_api_0.out @@ -0,0 +1,364 @@ +DO $$ +DECLARE + v_version numeric; +BEGIN + + SELECT substring(setting from '\d+')::numeric INTO v_version FROM pg_settings WHERE name = 'server_version'; + + -- GUCs common for all versions + PERFORM set_config('extra_float_digits', '0', false); + PERFORM set_config('parallel_setup_cost', '0', false); + PERFORM set_config('parallel_tuple_cost', '0', false); + PERFORM set_config('max_parallel_workers_per_gather', '2', false); + + -- 9.6 used somewhat different GUC name for relation size + IF v_version < 10 THEN + PERFORM set_config('min_parallel_relation_size', '1kB', false); + ELSE + PERFORM set_config('min_parallel_table_scan_size', '1kB', false); + END IF; + + -- in 14 disable Memoize nodes, to make explain more consistent + IF v_version >= 14 THEN + PERFORM set_config('enable_memoize', 'off', false); + END IF; + +END; +$$ LANGUAGE plpgsql; +-- API +select tdigest_percentile(value, count, 100, 0.95) +from (values + (47325940488,1), + (15457695432,2), + (6889790700,3), + (4188763788,4), + (2882932224,5), + (2114815860,6), + (1615194324,7), + (2342114568,9), + (1626471924,11), + (1660755408,14), + (1143728292,17), + (1082582424,21), + (911488284,26), + (728863908,32), + (654898692,40), + (530198076,50), + (417883440,62), + (341452344,77), + (274579584,95), + (231921120,118), + (184091820,146), + (152469828,181), + (125634972,224), + (107059704,278), + (88746120,345), + (73135668,428), + (61035756,531), + (50683320,658), + (42331824,816), + (35234400,1012), + (29341356,1255), + (24290928,1556), + (20284668,1929), + (17215908,2391), + (14737488,2964), + (12692772,3674), + (11220732,4555), + (9787584,5647), + (8148420,7000), + (6918612,8678), + (6015000,10758), + (5480316,13336), + (5443356,16532), + (4535616,20494), + (3962316,25406), + (3914484,31495), + (3828108,39043), + (3583536,48400), + (4104120,60000), + (166024740,2147483647)) foo (count, value); + tdigest_percentile +-------------------- + 30.3586183216119 +(1 row) + +---------------------------------------------- +-- nice data set with random data (uniform) -- +---------------------------------------------- +-- 10 centroids (tiny) +WITH + data AS (SELECT prng(1000) x, prng(1000, 29823218) cnt), + data_expanded AS (SELECT x FROM (SELECT x, generate_series(1, (10 + 100 * cnt)::int) FROM data) foo ORDER BY random()) +SELECT + p, + abs(a - b) < 0.1, -- arbitrary threshold of 10% + (CASE WHEN abs(a - b) < 0.1 THEN NULL ELSE (a - b) END) AS err +FROM ( + SELECT + unnest(ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) AS p, + unnest(a) AS a, + unnest(b) AS b + FROM + (SELECT percentile_cont(ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) WITHIN GROUP (ORDER BY x) a FROM data_expanded) foo, + (SELECT tdigest_percentile(x, (10 + 100 * cnt)::int, 10, ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) b FROM data) bar +) baz; + p | ?column? | err +------+----------+----- + 0.01 | t | + 0.05 | t | + 0.1 | t | + 0.9 | t | + 0.95 | t | + 0.99 | t | +(6 rows) + +-- 100 centroids (okay-ish) +WITH + data AS (SELECT prng(1000) x, prng(1000, 29823218) cnt), + data_expanded AS (SELECT x FROM (SELECT x, generate_series(1, (10 + 100 * cnt)::int) FROM data) foo ORDER BY random()) +SELECT + p, + abs(a - b) < 0.01, -- arbitrary threshold of 1% + (CASE WHEN abs(a - b) < 0.1 THEN NULL ELSE (a - b) END) AS err +FROM ( + SELECT + unnest(ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) AS p, + unnest(a) AS a, + unnest(b) AS b + FROM + (SELECT percentile_cont(ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) WITHIN GROUP (ORDER BY x) a FROM data_expanded) foo, + (SELECT tdigest_percentile(x, (10 + 100 * cnt)::int, 100, ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) b FROM data) bar +) baz; + p | ?column? | err +------+----------+----- + 0.01 | t | + 0.05 | t | + 0.1 | t | + 0.9 | t | + 0.95 | t | + 0.99 | t | +(6 rows) + +-- 1000 centroids (very accurate) +WITH + data AS (SELECT prng(1000) x, prng(1000, 29823218) cnt), + data_expanded AS (SELECT x FROM (SELECT x, generate_series(1, (10 + 100 * cnt)::int) FROM data) foo ORDER BY random()) +SELECT + p, + abs(a - b) < 0.01, -- arbitrary threshold of 1% + (CASE WHEN abs(a - b) < 0.1 THEN NULL ELSE (a - b) END) AS err +FROM ( + SELECT + unnest(ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) AS p, + unnest(a) AS a, + unnest(b) AS b + FROM + (SELECT percentile_cont(ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) WITHIN GROUP (ORDER BY x) a FROM data_expanded) foo, + (SELECT tdigest_percentile(x, (10 + 100 * cnt)::int, 1000, ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) b FROM data) bar +) baz; + p | ?column? | err +------+----------+----- + 0.01 | t | + 0.05 | t | + 0.1 | t | + 0.9 | t | + 0.95 | t | + 0.99 | t | +(6 rows) + +-- API +EXPLAIN (COSTS OFF) +WITH + d AS (SELECT t.* FROM t, LATERAL generate_series(1,t.c)), + x AS (SELECT percentile_disc(0.95) WITHIN GROUP (ORDER BY v) AS p FROM d) +SELECT + 0.95, + abs(a - b) / 1000 < 0.01 +FROM ( + SELECT + (SELECT p FROM x) AS a, + tdigest_percentile(v, c, 100, 0.95) AS b + FROM t) foo; + QUERY PLAN +------------------------------------------------------------------ + Subquery Scan on foo + -> Finalize Aggregate + InitPlan 1 + -> Aggregate + -> Gather + Workers Planned: 2 + -> Nested Loop + -> Parallel Seq Scan on t t_1 + -> Function Scan on generate_series + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Seq Scan on t +(13 rows) + +WITH + d AS (SELECT t.* FROM t, LATERAL generate_series(1,t.c)), + x AS (SELECT percentile_disc(0.95) WITHIN GROUP (ORDER BY v) AS p FROM d) +SELECT + 0.95, + abs(a - b) / 1000 < 0.01 +FROM ( + SELECT + (SELECT p FROM x) AS a, + tdigest_percentile(v, c, 100, 0.95) AS b + FROM t) foo; + ?column? | ?column? +----------+---------- + 0.95 | t +(1 row) + +EXPLAIN (COSTS OFF) +WITH + d AS (SELECT t.* FROM t, LATERAL generate_series(1,t.c)), + x AS (SELECT percent_rank(950) WITHIN GROUP (ORDER BY v) AS p FROM d) +SELECT + 950, + abs(a - b) < 0.01 +FROM ( + SELECT + (SELECT p FROM x) AS a, + tdigest_percentile_of(v, c, 100, 950) AS b + FROM t) foo; + QUERY PLAN +------------------------------------------------------------------ + Subquery Scan on foo + -> Finalize Aggregate + InitPlan 1 + -> Aggregate + -> Gather + Workers Planned: 2 + -> Nested Loop + -> Parallel Seq Scan on t t_1 + -> Function Scan on generate_series + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Seq Scan on t +(13 rows) + +WITH + d AS (SELECT t.* FROM t, LATERAL generate_series(1,t.c)), + x AS (SELECT percent_rank(950) WITHIN GROUP (ORDER BY v) AS p FROM d) +SELECT + 950, + abs(a - b) < 0.01 +FROM ( + SELECT + (SELECT p FROM x) AS a, + tdigest_percentile_of(v, c, 100, 950) AS b + FROM t) foo; + ?column? | ?column? +----------+---------- + 950 | t +(1 row) + +-- array of percentiles / values +EXPLAIN (COSTS OFF) +WITH + d AS (SELECT t.* FROM t, LATERAL generate_series(1,t.c)), + x AS (SELECT percentile_disc(ARRAY[0.0, 0.95, 0.99, 1.0]) WITHIN GROUP (ORDER BY v) AS p FROM d) +SELECT + p, + abs(a - b) / 1000 < 0.01 +FROM ( + SELECT + unnest(ARRAY[0.0, 0.95, 0.99, 1.0]) p, + unnest((SELECT p FROM x)) AS a, + unnest(tdigest_percentile(v, c, 100, ARRAY[0.0, 0.95, 0.99, 1.0])) AS b + FROM t) foo; + QUERY PLAN +------------------------------------------------------------------ + Subquery Scan on foo + -> ProjectSet + InitPlan 1 + -> Aggregate + -> Gather + Workers Planned: 2 + -> Nested Loop + -> Parallel Seq Scan on t t_1 + -> Function Scan on generate_series + -> Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Seq Scan on t +(14 rows) + +WITH + d AS (SELECT t.* FROM t, LATERAL generate_series(1,t.c)), + x AS (SELECT percentile_disc(ARRAY[0.0, 0.95, 0.99, 1.0]) WITHIN GROUP (ORDER BY v) AS p FROM d) +SELECT + p, + abs(a - b) / 1000 < 0.01 +FROM ( + SELECT + unnest(ARRAY[0.0, 0.95, 0.99, 1.0]) p, + unnest((SELECT p FROM x)) AS a, + unnest(tdigest_percentile(v, c, 100, ARRAY[0.0, 0.95, 0.99, 1.0])) AS b + FROM t) foo; + p | ?column? +------+---------- + 0.0 | t + 0.95 | t + 0.99 | t + 1.0 | t +(4 rows) + +EXPLAIN (COSTS OFF) +WITH + d AS (SELECT t.* FROM t, LATERAL generate_series(1,t.c)), + x AS (SELECT array_agg((SELECT percent_rank(f) WITHIN GROUP (ORDER BY v) AS p FROM d)) p FROM unnest(ARRAY[950, 990]) f) +SELECT + p, + abs(a - b) < 0.01 +FROM ( + SELECT + unnest(ARRAY[950, 990]) AS p, + unnest((select x.p from x)) AS a, + unnest(tdigest_percentile_of(v, c, 100, ARRAY[950, 990])) AS b + FROM t) foo; + QUERY PLAN +-------------------------------------------------------------------------- + Subquery Scan on foo + -> ProjectSet + InitPlan 2 + -> Aggregate + -> Function Scan on unnest f + SubPlan 1 + -> Aggregate + -> Gather + Workers Planned: 2 + -> Nested Loop + -> Parallel Seq Scan on t t_1 + -> Function Scan on generate_series + -> Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Seq Scan on t +(17 rows) + +WITH + d AS (SELECT t.* FROM t, LATERAL generate_series(1,t.c)), + x AS (SELECT array_agg((SELECT percent_rank(f) WITHIN GROUP (ORDER BY v) AS p FROM d)) p FROM unnest(ARRAY[950, 990]) f) +SELECT + p, + abs(a - b) < 0.01 +FROM ( + SELECT + unnest(ARRAY[950, 990]) AS p, + unnest((select x.p from x)) AS a, + unnest(tdigest_percentile_of(v, c, 100, ARRAY[950, 990])) AS b + FROM t) foo; + p | ?column? +-----+---------- + 950 | t + 990 | t +(2 rows) +