-
-
Notifications
You must be signed in to change notification settings - Fork 338
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add ability to export tables #4090
base: develop
Are you sure you want to change the base?
Changes from all commits
004181d
5b89a5c
f8315af
838452d
d92eb24
59ae25c
93392db
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4786,18 +4786,29 @@ $$ LANGUAGE SQL STABLE RETURNS NULL ON NULL INPUT; | |
|
||
|
||
CREATE OR REPLACE FUNCTION | ||
msar.build_selectable_column_expr(tab_id oid) RETURNS text AS $$/* | ||
Build an SQL select-target expression of only columns to which the user has access. | ||
msar.build_selectable_columns(tab_id oid) RETURNS jsonb AS $$/* | ||
Build a jsonb object with attnum and attname of the columns to which the user has access. | ||
|
||
Given columns with attnums 2, 3, and 4, and assuming the user has access only to columns 2 and 4, | ||
this function will return an expression of the form: | ||
|
||
column_name AS "2", another_column_name AS "4" | ||
{ | ||
"attnum": 2, "attname": <column name of 2>, | ||
"attnum": 4, "attname": <column name of 4>, | ||
} | ||
|
||
Args: | ||
tab_id: The OID of the table containing the columns to select. | ||
*/ | ||
SELECT string_agg(format('msar.format_data(%I) AS %I', attname, attnum), ', ') | ||
SELECT coalesce( | ||
jsonb_agg( | ||
jsonb_build_object( | ||
'attnum', attnum, | ||
'attname', attname | ||
) | ||
), | ||
'[]'::jsonb | ||
) | ||
FROM pg_catalog.pg_attribute | ||
WHERE | ||
attrelid = tab_id | ||
|
@@ -4807,6 +4818,23 @@ WHERE | |
$$ LANGUAGE SQL STABLE RETURNS NULL ON NULL INPUT; | ||
|
||
|
||
CREATE OR REPLACE FUNCTION | ||
msar.build_selectable_column_expr(tab_id oid) RETURNS text AS $$/* | ||
Build an SQL select-target expression of only columns to which the user has access. | ||
|
||
Given columns with attnums 2, 3, and 4, and assuming the user has access only to columns 2 and 4, | ||
this function will return an expression of the form: | ||
|
||
column_name AS "2", another_column_name AS "4" | ||
|
||
Args: | ||
tab_id: The OID of the table containing the columns to select. | ||
*/ | ||
SELECT string_agg(format('msar.format_data(%I) AS %I', columns->>'attname', columns->>'attnum'), ', ') | ||
FROM jsonb_array_elements(msar.build_selectable_columns(tab_id)) as columns; | ||
$$ LANGUAGE SQL STABLE RETURNS NULL ON NULL INPUT; | ||
|
||
|
||
CREATE OR REPLACE FUNCTION msar.get_default_summary_column(tab_id oid) RETURNS smallint AS $$/* | ||
Choose a column to use for summarizing rows of a table. | ||
|
||
|
@@ -5255,6 +5283,46 @@ END; | |
$$ LANGUAGE plpgsql STABLE; | ||
|
||
|
||
CREATE OR REPLACE FUNCTION | ||
msar.get_table_columns_and_records_cursor( | ||
tab_id oid, | ||
limit_ integer, | ||
offset_ integer, | ||
order_ jsonb, | ||
filter_ jsonb | ||
) RETURNS TABLE (name text, columns jsonb, records_cursor refcursor) AS $$ | ||
DECLARE | ||
name text; | ||
columns jsonb; | ||
columns_sql text; | ||
records_cursor refcursor; | ||
BEGIN | ||
SELECT pgc.relname FROM pg_catalog.pg_class AS pgc WHERE pgc.oid=tab_id INTO name; | ||
SELECT msar.build_selectable_columns(tab_id) INTO columns; | ||
SELECT string_agg( | ||
format('msar.format_data(%I) AS %I', column_result->>'attname', column_result->>'attnum'), | ||
', ' | ||
) | ||
FROM jsonb_array_elements(columns) as column_result | ||
INTO columns_sql; | ||
|
||
OPEN records_cursor FOR | ||
EXECUTE format( | ||
'SELECT %1$s FROM %2$I.%3$I %7$s %6$s LIMIT %4$L OFFSET %5$L', | ||
COALESCE(columns_sql, 'NULL'), | ||
msar.get_relation_schema_name(tab_id), | ||
msar.get_relation_name(tab_id), | ||
limit_, | ||
offset_, | ||
msar.build_order_by_expr(tab_id, order_), | ||
msar.build_where_clause(tab_id, filter_) | ||
Comment on lines
+5311
to
+5318
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code replication is avoidable by just calling the record lister, then pulling apart the result. Given that this is not exactly a "Hot Path", I think the very minor inefficiency involved is well worth the increase in maintainability. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The record lister queries all the records and stores it into a single json, so calling that function means that we cannot use a cursor to iterate through the results. That would make the entire process inefficient. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Regardless, maintaining that SQL construction in multiple different functions isn't a good idea. We'll need to find a better way. |
||
); | ||
|
||
RETURN QUERY SELECT name, columns, records_cursor; | ||
END; | ||
$$ LANGUAGE plpgsql STABLE; | ||
|
||
|
||
CREATE OR REPLACE FUNCTION | ||
msar.get_score_expr(tab_id oid, parameters_ jsonb) RETURNS text AS $$ | ||
SELECT string_agg( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
import csv | ||
from io import StringIO | ||
|
||
from django.contrib.auth.decorators import login_required | ||
from django import forms | ||
from django.http import StreamingHttpResponse, JsonResponse | ||
|
||
from mathesar.rpc.utils import connect | ||
from mathesar.rpc.records import Filter, OrderBy | ||
|
||
from db.tables import fetch_table_in_chunks | ||
|
||
|
||
class ExportTableQueryForm(forms.Form): | ||
database_id = forms.IntegerField(required=True) | ||
table_oid = forms.IntegerField(required=True) | ||
filter = forms.JSONField(required=False) | ||
order = forms.JSONField(required=False) | ||
|
||
|
||
def export_table_csv_in_chunks( | ||
user, | ||
database_id: int, | ||
table_oid: int, | ||
**kwargs | ||
): | ||
with connect(database_id, user) as conn: | ||
csv_buffer = StringIO() | ||
csv_writer = csv.writer(csv_buffer) | ||
for rows in fetch_table_in_chunks(conn, table_oid, **kwargs): | ||
csv_writer.writerows(rows) | ||
value = csv_buffer.getvalue() | ||
yield value | ||
csv_buffer.seek(0) | ||
csv_buffer.truncate(0) | ||
|
||
|
||
def stream_table_as_csv( | ||
request, | ||
database_id: int, | ||
table_oid: int, | ||
limit: int = None, | ||
offset: int = None, | ||
order: list[OrderBy] = None, | ||
filter: Filter = None, | ||
) -> StreamingHttpResponse: | ||
user = request.user | ||
response = StreamingHttpResponse( | ||
export_table_csv_in_chunks( | ||
user, | ||
database_id, | ||
table_oid, | ||
limit=limit, | ||
offset=offset, | ||
order=order, | ||
filter=filter, | ||
), | ||
content_type="text/csv" | ||
) | ||
response['Content-Disposition'] = 'attachment' | ||
return response | ||
|
||
|
||
@login_required | ||
def export_table(request): | ||
form = ExportTableQueryForm(request.GET) | ||
if form.is_valid(): | ||
data = form.cleaned_data | ||
return stream_table_as_csv( | ||
request=request, | ||
database_id=data['database_id'], | ||
table_oid=data['table_oid'], | ||
filter=data['filter'], | ||
order=data['order'] | ||
) | ||
else: | ||
return JsonResponse({'errors': form.errors}, status=400) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not use the
fetchmany
functionality of thepsycopg.ServerCursor
class? This seems like it's reinventing the wheel.See the docs for ServerCursor.
I think it has all the pieces you need, and it'd be more convenient than rolling our own if it works.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
list
does.If we use
ServerCursor
class for the records, we'd need to make additional calls forbuild_select_query
andfetch_selectable_column_names
. We'd also need to have additional logic to ensure that the column names are in the same order as the columns in the records result.It's much simpler to fetch everything via a single plpgsql function.