From eb0df8d031cbc94a992ef087de109cc4adeab8bd Mon Sep 17 00:00:00 2001 From: Brent Moran Date: Mon, 12 Aug 2024 17:19:47 +0800 Subject: [PATCH 1/4] add aggregation step to collect result indices --- db/sql/00_msar.sql | 77 +++++++++++++++++++++++----------- db/sql/test_00_msar.sql | 92 +++++++++++++++++++++++++++-------------- 2 files changed, 114 insertions(+), 55 deletions(-) diff --git a/db/sql/00_msar.sql b/db/sql/00_msar.sql index 035d952224..0e9d39382b 100644 --- a/db/sql/00_msar.sql +++ b/db/sql/00_msar.sql @@ -3502,13 +3502,16 @@ $$ LANGUAGE SQL STABLE; CREATE OR REPLACE FUNCTION -msar.build_results_jsonb_expr(tab_id oid, cte_name text) RETURNS TEXT AS $$/* +msar.build_results_jsonb_expr(tab_id oid, cte_name text, order_ jsonb) RETURNS TEXT AS $$/* Build an SQL expresson string that, when added to the record listing query, produces a JSON array with the records resulting from the request. */ -SELECT 'coalesce(jsonb_agg(json_build_object(' +SELECT format( + 'coalesce(jsonb_agg(json_build_object(' || string_agg(format('%1$L, %2$I.%1$I', attnum, cte_name), ', ') - || ')), jsonb_build_array())' + || ') %1$s), jsonb_build_array())', + msar.build_order_by_expr(tab_id, order_) +) FROM pg_catalog.pg_attribute WHERE attrelid = tab_id @@ -3519,23 +3522,16 @@ $$ LANGUAGE SQL STABLE; CREATE OR REPLACE FUNCTION -msar.build_grouping_results_jsonb_expr(tab_id oid, cte_name text, group_ jsonb) RETURNS TEXT AS $$/* -Build an SQL expresson string that, when added to the record listing query, produces a JSON array -with the groups resulting from the request. +msar.build_groups_cte_expr(tab_id oid, cte_name text, group_ jsonb) RETURNS TEXT AS $$/* */ SELECT format( $gj$ - jsonb_build_object( - 'columns', %3$L::jsonb, - 'preproc', %4$L::jsonb, - 'groups', jsonb_agg( - DISTINCT jsonb_build_object( - 'id', %2$I.__mathesar_gid, - 'count', %2$I.__mathesar_gcount, - 'results_eq', jsonb_build_object(%1$s) - ) - ) - ) + __mathesar_gid AS id, + __mathesar_gcount AS count, + jsonb_build_object(%1$s) AS results_eq, + jsonb_agg(%2$I) AS result_indices + FROM %3$I + GROUP BY id, count, results_eq $gj$, string_agg( format( @@ -3548,9 +3544,8 @@ SELECT format( ), ', ' ORDER BY ordinality ), - cte_name, - group_ ->> 'columns', - group_ ->> 'preproc' + msar.get_pk_column(tab_id), + cte_name ) FROM msar.expr_templates RIGHT JOIN ROWS FROM( jsonb_array_elements_text(group_ -> 'columns'), @@ -3560,6 +3555,33 @@ WHERE has_column_privilege(tab_id, col_id::smallint, 'SELECT'); $$ LANGUAGE SQL STABLE RETURNS NULL ON NULL INPUT; +CREATE OR REPLACE FUNCTION +msar.build_grouping_results_jsonb_expr(tab_id oid, cte_name text, group_ jsonb) RETURNS TEXT AS $$/* +Build an SQL expresson string that, when added to the record listing query, produces a JSON array +with the groups resulting from the request. +*/ +SELECT format( + $gj$ + jsonb_build_object( + 'columns', %2$L::jsonb, + 'preproc', %3$L::jsonb, + 'groups', jsonb_agg( + DISTINCT jsonb_build_object( + 'id', %1$I.id, + 'count', %1$I.count, + 'results_eq', %1$I.results_eq, + 'result_indices', %1$I.result_indices + ) + ) + ) + $gj$, + cte_name, + group_ ->> 'columns', + group_ ->> 'preproc' +) +$$ LANGUAGE SQL STABLE RETURNS NULL ON NULL INPUT; + + CREATE OR REPLACE FUNCTION msar.build_selectable_column_expr(tab_id oid) RETURNS text AS $$/* Build an SQL select-target expression of only columns to which the user has access. @@ -3613,6 +3635,8 @@ BEGIN SELECT count(1) AS count FROM %2$I.%3$I %7$s ), enriched_results_cte AS ( SELECT %1$s, %8$s FROM %2$I.%3$I %7$s %6$s LIMIT %4$L OFFSET %5$L + ), groups_cte as ( + SELECT %11$s ) SELECT jsonb_build_object( 'results', %9$s, @@ -3620,7 +3644,9 @@ BEGIN 'grouping', %10$s, 'query', $iq$SELECT %1$s FROM %2$I.%3$I %7$s %6$s LIMIT %4$L OFFSET %5$L$iq$ ) - FROM enriched_results_cte, count_cte + FROM enriched_results_cte + LEFT JOIN groups_cte ON enriched_results_cte.__mathesar_gid = groups_cte.id + CROSS JOIN count_cte $q$, msar.build_selectable_column_expr(tab_id), msar.get_relation_schema_name(tab_id), @@ -3630,8 +3656,9 @@ BEGIN msar.build_order_by_expr(tab_id, order_), msar.build_where_clause(tab_id, filter_), msar.build_grouping_expr(tab_id, group_), - msar.build_results_jsonb_expr(tab_id, 'enriched_results_cte'), - COALESCE(msar.build_grouping_results_jsonb_expr(tab_id, 'enriched_results_cte', group_), 'NULL') + msar.build_results_jsonb_expr(tab_id, 'enriched_results_cte', order_), + COALESCE(msar.build_grouping_results_jsonb_expr(tab_id, 'groups_cte', group_), 'NULL'), + COALESCE(msar.build_groups_cte_expr(tab_id, 'enriched_results_cte', group_), 'NULL AS id') ) INTO records; RETURN records; END; @@ -3816,7 +3843,7 @@ BEGIN $i$, msar.build_single_insert_expr(tab_id, rec_def), msar.build_selectable_column_expr(tab_id), - msar.build_results_jsonb_expr(tab_id, 'insert_cte') + msar.build_results_jsonb_expr(tab_id, 'insert_cte', null) ) INTO rec_created; RETURN rec_created; END; @@ -3870,7 +3897,7 @@ BEGIN ) ), msar.build_selectable_column_expr(tab_id), - msar.build_results_jsonb_expr(tab_id, 'update_cte') + msar.build_results_jsonb_expr(tab_id, 'update_cte', null) ) INTO rec_modified; RETURN rec_modified; END; diff --git a/db/sql/test_00_msar.sql b/db/sql/test_00_msar.sql index 05d4f4c85f..126c24c192 100644 --- a/db/sql/test_00_msar.sql +++ b/db/sql/test_00_msar.sql @@ -2901,27 +2901,27 @@ CREATE TABLE "Customers" ( ); ALTER TABLE "Customers" DROP COLUMN "dropmeee 1"; INSERT INTO "Customers" ("First Name", "Last Name", "Subscription Date") VALUES - ('Aaron', 'Adams', '2020-03-21'), - ('Abigail', 'Acosta', '2020-04-16'), - ('Aaron', 'Adams', '2020-04-29'), - ('Abigail', 'Adams', '2020-05-29'), - ('Abigail', 'Abbott', '2020-07-05'), - ('Aaron', 'Adkins', '2020-08-16'), - ('Aaron', 'Acevedo', '2020-10-29'), - ('Abigail', 'Abbott', '2020-10-30'), - ('Abigail', 'Adams', '2021-02-14'), - ('Abigail', 'Acevedo', '2021-03-29'), - ('Aaron', 'Acosta', '2021-04-13'), - ('Aaron', 'Adams', '2021-06-30'), - ('Abigail', 'Adkins', '2021-09-12'), - ('Aaron', 'Adams', '2021-11-11'), - ('Abigail', 'Abbott', '2021-11-30'), - ('Aaron', 'Acevedo', '2022-02-04'), - ('Aaron', 'Adkins', '2022-03-10'), - ('Abigail', 'Abbott', '2022-03-23'), - ('Abigail', 'Adkins', '2022-03-27'), - ('Abigail', 'Abbott', '2022-04-29'), - ('Abigail', 'Adams', '2022-05-24'); + ('Aaron', 'Adams', '2020-03-21'), -- 1 + ('Abigail', 'Acosta', '2020-04-16'), -- 2 + ('Aaron', 'Adams', '2020-04-29'), -- 3 + ('Abigail', 'Adams', '2020-05-29'), -- 4 + ('Abigail', 'Abbott', '2020-07-05'), -- 5 + ('Aaron', 'Adkins', '2020-08-16'), -- 6 + ('Aaron', 'Acevedo', '2020-10-29'), -- 7 + ('Abigail', 'Abbott', '2020-10-30'), -- 8 + ('Abigail', 'Adams', '2021-02-14'), -- 9 + ('Abigail', 'Acevedo', '2021-03-29'), -- 10 + ('Aaron', 'Acosta', '2021-04-13'), -- 11 + ('Aaron', 'Adams', '2021-06-30'), -- 12 + ('Abigail', 'Adkins', '2021-09-12'), -- 13 + ('Aaron', 'Adams', '2021-11-11'), -- 14 + ('Abigail', 'Abbott', '2021-11-30'), -- 15 + ('Aaron', 'Acevedo', '2022-02-04'), -- 16 + ('Aaron', 'Adkins', '2022-03-10'), -- 17 + ('Abigail', 'Abbott', '2022-03-23'), -- 18 + ('Abigail', 'Adkins', '2022-03-27'), -- 19 + ('Abigail', 'Abbott', '2022-04-29'), -- 20 + ('Abigail', 'Adams', '2022-05-24'); -- 21 END; $$ LANGUAGE plpgsql; @@ -3100,11 +3100,36 @@ BEGIN "columns": [3, 2], "preproc": null, "groups": [ - {"id": 1, "count": 5, "results_eq": {"2": "Abigail", "3": "Abbott"}}, - {"id": 2, "count": 2, "results_eq": {"2": "Aaron", "3": "Acevedo"}}, - {"id": 3, "count": 1, "results_eq": {"2": "Abigail", "3": "Acevedo"}}, - {"id": 4, "count": 1, "results_eq": {"2": "Aaron", "3": "Acosta"}}, - {"id": 5, "count": 1, "results_eq": {"2": "Abigail", "3": "Acosta"}} + { + "id": 1, + "count": 5, + "results_eq": {"2": "Abigail", "3": "Abbott"}, + "result_indices": [5, 8, 15, 18, 20] + }, + { + "id": 2, + "count": 2, + "results_eq": {"2": "Aaron", "3": "Acevedo"}, + "result_indices": [7, 16] + }, + { + "id": 3, + "count": 1, + "results_eq": {"2": "Abigail", "3": "Acevedo"}, + "result_indices": [10] + }, + { + "id": 4, + "count": 1, + "results_eq": {"2": "Aaron", "3": "Acosta"}, + "result_indices": [11] + }, + { + "id": 5, + "count": 1, + "results_eq": {"2": "Abigail", "3": "Acosta"}, + "result_indices": [2] + } ] } }$j$ || jsonb_build_object( @@ -3135,7 +3160,12 @@ BEGIN "columns": [3, 2], "preproc": null, "groups": [ - {"id": 1, "count": 5, "results_eq": {"2": "Abigail", "3": "Abbott"}} + { + "id": 1, + "count": 5, + "results_eq": {"2": "Abigail", "3": "Abbott"}, + "result_indices": [5, 8, 15] + } ] } }$j$ || jsonb_build_object( @@ -3166,8 +3196,8 @@ BEGIN "columns": [4], "preproc": ["truncate_to_month"], "groups": [ - {"id": 1, "count": 1, "results_eq": {"4": "2020-03 AD"}}, - {"id": 2, "count": 2, "results_eq": {"4": "2020-04 AD"}} + {"id": 1, "count": 1, "results_eq": {"4": "2020-03 AD"}, "result_indices": [1]}, + {"id": 2, "count": 2, "results_eq": {"4": "2020-04 AD"}, "result_indices": [2, 3]} ] } }$j$ || jsonb_build_object( @@ -3199,7 +3229,9 @@ BEGIN "grouping": { "columns": [4], "preproc": ["truncate_to_year"], - "groups": [{"id": 1, "count": 8, "results_eq": {"4": "2020 AD"}}] + "groups": [ + {"id": 1, "count": 8, "results_eq": {"4": "2020 AD"}, "result_indices": [1, 2, 3, 4, 5]} + ] } }$j$ || jsonb_build_object( 'query', concat( From 8ac46c7582fdbfbabb2839e9241d48dd6e8c5651 Mon Sep 17 00:00:00 2001 From: Brent Moran Date: Mon, 12 Aug 2024 17:54:59 +0800 Subject: [PATCH 2/4] update documentation to represent new behavior --- mathesar/rpc/records.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mathesar/rpc/records.py b/mathesar/rpc/records.py index e115838f61..b34359e8f5 100644 --- a/mathesar/rpc/records.py +++ b/mathesar/rpc/records.py @@ -81,6 +81,8 @@ class Grouping(TypedDict): """ Grouping definition. + The table involved must have a single column primary key. + Attributes: columns: The columns to be grouped by. preproc: The preprocessing funtions to apply (if any). @@ -93,14 +95,21 @@ class Group(TypedDict): """ Group definition. + Note that the `count` is over all rows in the group, whether returned + or not. However, `result_indices` is restricted to only the rows + returned. This is to avoid potential problems if there are many rows + in the group (e.g., the whole table), but we only return a few. + Attributes: id: The id of the group. Consistent for same input. count: The number of items in the group. results_eq: The value the results of the group equal. + result_indices: The primary key values of group members. """ id: int count: int results_eq: list[dict] + result_indices: list[Any] class GroupingResponse(TypedDict): From a32d79c7ac8d8a4230334ef693d58ed3025d4668 Mon Sep 17 00:00:00 2001 From: Brent Moran Date: Tue, 13 Aug 2024 13:28:52 +0800 Subject: [PATCH 3/4] use row_number from the result set for result_indices --- db/sql/00_msar.sql | 11 ++++++----- db/sql/test_00_msar.sql | 18 +++++++++--------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/db/sql/00_msar.sql b/db/sql/00_msar.sql index 0e9d39382b..eb94c36ab4 100644 --- a/db/sql/00_msar.sql +++ b/db/sql/00_msar.sql @@ -3529,8 +3529,8 @@ SELECT format( __mathesar_gid AS id, __mathesar_gcount AS count, jsonb_build_object(%1$s) AS results_eq, - jsonb_agg(%2$I) AS result_indices - FROM %3$I + jsonb_agg(__mathesar_result_idx) AS result_indices + FROM %2$I GROUP BY id, count, results_eq $gj$, string_agg( @@ -3544,7 +3544,6 @@ SELECT format( ), ', ' ORDER BY ordinality ), - msar.get_pk_column(tab_id), cte_name ) FROM msar.expr_templates RIGHT JOIN ROWS FROM( @@ -3635,7 +3634,9 @@ BEGIN SELECT count(1) AS count FROM %2$I.%3$I %7$s ), enriched_results_cte AS ( SELECT %1$s, %8$s FROM %2$I.%3$I %7$s %6$s LIMIT %4$L OFFSET %5$L - ), groups_cte as ( + ), results_ranked_cte AS ( + SELECT *, row_number() OVER (%6$s) - 1 AS __mathesar_result_idx FROM enriched_results_cte + ), groups_cte AS ( SELECT %11$s ) SELECT jsonb_build_object( @@ -3658,7 +3659,7 @@ BEGIN msar.build_grouping_expr(tab_id, group_), msar.build_results_jsonb_expr(tab_id, 'enriched_results_cte', order_), COALESCE(msar.build_grouping_results_jsonb_expr(tab_id, 'groups_cte', group_), 'NULL'), - COALESCE(msar.build_groups_cte_expr(tab_id, 'enriched_results_cte', group_), 'NULL AS id') + COALESCE(msar.build_groups_cte_expr(tab_id, 'results_ranked_cte', group_), 'NULL AS id') ) INTO records; RETURN records; END; diff --git a/db/sql/test_00_msar.sql b/db/sql/test_00_msar.sql index 126c24c192..cf6b255d3b 100644 --- a/db/sql/test_00_msar.sql +++ b/db/sql/test_00_msar.sql @@ -3104,31 +3104,31 @@ BEGIN "id": 1, "count": 5, "results_eq": {"2": "Abigail", "3": "Abbott"}, - "result_indices": [5, 8, 15, 18, 20] + "result_indices": [0, 1, 2, 3, 4] }, { "id": 2, "count": 2, "results_eq": {"2": "Aaron", "3": "Acevedo"}, - "result_indices": [7, 16] + "result_indices": [5, 6] }, { "id": 3, "count": 1, "results_eq": {"2": "Abigail", "3": "Acevedo"}, - "result_indices": [10] + "result_indices": [7] }, { "id": 4, "count": 1, "results_eq": {"2": "Aaron", "3": "Acosta"}, - "result_indices": [11] + "result_indices": [8] }, { "id": 5, "count": 1, "results_eq": {"2": "Abigail", "3": "Acosta"}, - "result_indices": [2] + "result_indices": [9] } ] } @@ -3164,7 +3164,7 @@ BEGIN "id": 1, "count": 5, "results_eq": {"2": "Abigail", "3": "Abbott"}, - "result_indices": [5, 8, 15] + "result_indices": [0, 1, 2] } ] } @@ -3196,8 +3196,8 @@ BEGIN "columns": [4], "preproc": ["truncate_to_month"], "groups": [ - {"id": 1, "count": 1, "results_eq": {"4": "2020-03 AD"}, "result_indices": [1]}, - {"id": 2, "count": 2, "results_eq": {"4": "2020-04 AD"}, "result_indices": [2, 3]} + {"id": 1, "count": 1, "results_eq": {"4": "2020-03 AD"}, "result_indices": [0]}, + {"id": 2, "count": 2, "results_eq": {"4": "2020-04 AD"}, "result_indices": [1, 2]} ] } }$j$ || jsonb_build_object( @@ -3230,7 +3230,7 @@ BEGIN "columns": [4], "preproc": ["truncate_to_year"], "groups": [ - {"id": 1, "count": 8, "results_eq": {"4": "2020 AD"}, "result_indices": [1, 2, 3, 4, 5]} + {"id": 1, "count": 8, "results_eq": {"4": "2020 AD"}, "result_indices": [0, 1, 2, 3, 4]} ] } }$j$ || jsonb_build_object( From 6910525c1944e6a0ccf4071beddaf000e1a7c0f8 Mon Sep 17 00:00:00 2001 From: Brent Moran Date: Tue, 13 Aug 2024 22:26:30 +0800 Subject: [PATCH 4/4] fix documentation to align with array indices rather than pkeys --- mathesar/rpc/records.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mathesar/rpc/records.py b/mathesar/rpc/records.py index 504673b976..39b8dbe173 100644 --- a/mathesar/rpc/records.py +++ b/mathesar/rpc/records.py @@ -104,12 +104,13 @@ class Group(TypedDict): id: The id of the group. Consistent for same input. count: The number of items in the group. results_eq: The value the results of the group equal. - result_indices: The primary key values of group members. + result_indices: The 0-indexed positions of group members in the + results array. """ id: int count: int results_eq: list[dict] - result_indices: list[Any] + result_indices: list[int] class GroupingResponse(TypedDict):