Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to export tables #4090

Open
wants to merge 7 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions db/connection.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from psycopg.rows import dict_row


def exec_msar_func(conn, func_name, *args):
def exec_msar_func(conn_or_cursor, func_name, *args):
"""
Execute an msar function using a psycopg (3) connection.
Execute an msar function using a psycopg (3) connection or cursor.

Args:
conn: a psycopg connection
conn_or_cursor: a psycopg connection or cursor
func_name: The unqualified msar_function name (danger; not sanitized)
*args: The list of parameters to pass
"""
# Returns a cursor
return conn.execute(
return conn_or_cursor.execute(
f"SELECT msar.{func_name}({','.join(['%s'] * len(args))})", args
)

Expand All @@ -35,3 +35,16 @@ def select_from_msar_func(conn, func_name, *args):
def load_file_with_conn(conn, file_handle):
"""Run an SQL script from a file, using psycopg."""
conn.execute(file_handle.read())


def select_from_db_cursor(cursor, refcursor_name, batch_size=2000):
fetch_query = f"FETCH FORWARD {batch_size} FROM \"{refcursor_name}\""

while True:
cursor.execute(fetch_query)
rows = cursor.fetchall()
if not rows:
break
yield rows
Comment on lines +43 to +48
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not use the fetchmany functionality of the psycopg.ServerCursor class? This seems like it's reinventing the wheel.

See the docs for ServerCursor.

I think it has all the pieces you need, and it'd be more convenient than rolling our own if it works.

Copy link
Member Author

@pavish pavish Dec 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. We need a SQL call to build the select query which accepts filters and sorting in the exact format as list does.
  2. We need a SQL call to fetch the names of the columns (as the existing functions rely on oids).
  3. We need a server side cursor for the records.

If we use ServerCursor class for the records, we'd need to make additional calls for build_select_query and fetch_selectable_column_names. We'd also need to have additional logic to ensure that the column names are in the same order as the columns in the records result.

It's much simpler to fetch everything via a single plpgsql function.


cursor.execute(f"CLOSE \"{refcursor_name}\"")
76 changes: 72 additions & 4 deletions db/sql/00_msar.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4786,18 +4786,29 @@ $$ LANGUAGE SQL STABLE RETURNS NULL ON NULL INPUT;


CREATE OR REPLACE FUNCTION
msar.build_selectable_column_expr(tab_id oid) RETURNS text AS $$/*
Build an SQL select-target expression of only columns to which the user has access.
msar.build_selectable_columns(tab_id oid) RETURNS jsonb AS $$/*
Build a jsonb object with attnum and attname of the columns to which the user has access.

Given columns with attnums 2, 3, and 4, and assuming the user has access only to columns 2 and 4,
this function will return an expression of the form:

column_name AS "2", another_column_name AS "4"
{
"attnum": 2, "attname": <column name of 2>,
"attnum": 4, "attname": <column name of 4>,
}

Args:
tab_id: The OID of the table containing the columns to select.
*/
SELECT string_agg(format('msar.format_data(%I) AS %I', attname, attnum), ', ')
SELECT coalesce(
jsonb_agg(
jsonb_build_object(
'attnum', attnum,
'attname', attname
)
),
'[]'::jsonb
)
FROM pg_catalog.pg_attribute
WHERE
attrelid = tab_id
Expand All @@ -4807,6 +4818,23 @@ WHERE
$$ LANGUAGE SQL STABLE RETURNS NULL ON NULL INPUT;


CREATE OR REPLACE FUNCTION
msar.build_selectable_column_expr(tab_id oid) RETURNS text AS $$/*
Build an SQL select-target expression of only columns to which the user has access.

Given columns with attnums 2, 3, and 4, and assuming the user has access only to columns 2 and 4,
this function will return an expression of the form:

column_name AS "2", another_column_name AS "4"

Args:
tab_id: The OID of the table containing the columns to select.
*/
SELECT string_agg(format('msar.format_data(%I) AS %I', columns->>'attname', columns->>'attnum'), ', ')
FROM jsonb_array_elements(msar.build_selectable_columns(tab_id)) as columns;
$$ LANGUAGE SQL STABLE RETURNS NULL ON NULL INPUT;


CREATE OR REPLACE FUNCTION msar.get_default_summary_column(tab_id oid) RETURNS smallint AS $$/*
Choose a column to use for summarizing rows of a table.

Expand Down Expand Up @@ -5255,6 +5283,46 @@ END;
$$ LANGUAGE plpgsql STABLE;


CREATE OR REPLACE FUNCTION
msar.get_table_columns_and_records_cursor(
tab_id oid,
limit_ integer,
offset_ integer,
order_ jsonb,
filter_ jsonb
) RETURNS TABLE (name text, columns jsonb, records_cursor refcursor) AS $$
DECLARE
name text;
columns jsonb;
columns_sql text;
records_cursor refcursor;
BEGIN
SELECT pgc.relname FROM pg_catalog.pg_class AS pgc WHERE pgc.oid=tab_id INTO name;
SELECT msar.build_selectable_columns(tab_id) INTO columns;
SELECT string_agg(
format('msar.format_data(%I) AS %I', column_result->>'attname', column_result->>'attnum'),
', '
)
FROM jsonb_array_elements(columns) as column_result
INTO columns_sql;

OPEN records_cursor FOR
EXECUTE format(
'SELECT %1$s FROM %2$I.%3$I %7$s %6$s LIMIT %4$L OFFSET %5$L',
COALESCE(columns_sql, 'NULL'),
msar.get_relation_schema_name(tab_id),
msar.get_relation_name(tab_id),
limit_,
offset_,
msar.build_order_by_expr(tab_id, order_),
msar.build_where_clause(tab_id, filter_)
Comment on lines +5311 to +5318
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code replication is avoidable by just calling the record lister, then pulling apart the result. Given that this is not exactly a "Hot Path", I think the very minor inefficiency involved is well worth the increase in maintainability.

Copy link
Member Author

@pavish pavish Dec 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code replication is avoidable by just calling the record lister, then pulling apart the result.

The record lister queries all the records and stores it into a single json, so calling that function means that we cannot use a cursor to iterate through the results. That would make the entire process inefficient.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regardless, maintaining that SQL construction in multiple different functions isn't a good idea. We'll need to find a better way.

);

RETURN QUERY SELECT name, columns, records_cursor;
END;
$$ LANGUAGE plpgsql STABLE;


CREATE OR REPLACE FUNCTION
msar.get_score_expr(tab_id oid, parameters_ jsonb) RETURNS text AS $$
SELECT string_agg(
Expand Down
36 changes: 36 additions & 0 deletions db/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
from db.deprecated.types.base import PostgresType


def _json_or_none(value):
return json.dumps(value) if value is not None else None


def get_table(table, conn):
"""
Return a dictionary describing a table of a schema.
Expand Down Expand Up @@ -217,3 +221,35 @@ def split_table(
'extracted_table_oid': extracted_table_oid,
'new_fkey_attnum': new_fkey_attnum
}


def fetch_table_in_chunks(
conn,
table_oid,
limit=None,
offset=None,
order=None,
filter=None,
with_column_header=True,
):
with conn.transaction():
with conn.cursor() as cursor:
db_conn.exec_msar_func(
cursor,
'get_table_columns_and_records_cursor',
table_oid,
limit,
offset,
_json_or_none(order),
_json_or_none(filter),
)
result = cursor.fetchone()[0]
columns = result[1]
records_cursor = result[2]
if with_column_header:
column_names = [tuple(column['attname'] for column in json.loads(columns))]
yield column_names
yield from db_conn.select_from_db_cursor(
cursor,
records_cursor
)
1 change: 1 addition & 0 deletions mathesar/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
urlpatterns = [
path('api/rpc/v0/', views.MathesarRPCEntryPoint.as_view()),
path('api/db/v0/', include(db_router.urls)),
path('api/export/v0/tables/', views.export.export_table, name="export_table"),
path('auth/password_reset_confirm', MathesarPasswordResetConfirmView.as_view(), name='password_reset_confirm'),
path('auth/login/', superuser_exist(LoginView.as_view(redirect_authenticated_user=True)), name='login'),
path('auth/create_superuser/', superuser_must_not_exist(SuperuserFormView.as_view()), name='superuser_create'),
Expand Down
5 changes: 5 additions & 0 deletions mathesar/views.py → mathesar/views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
from mathesar.rpc.users import get as get_user_info
from mathesar import __version__

from . import export


__all__ = [export]


def get_database_list(request):
return databases_list(request=request)
Expand Down
77 changes: 77 additions & 0 deletions mathesar/views/export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import csv
from io import StringIO

from django.contrib.auth.decorators import login_required
from django import forms
from django.http import StreamingHttpResponse, JsonResponse

from mathesar.rpc.utils import connect
from mathesar.rpc.records import Filter, OrderBy

from db.tables import fetch_table_in_chunks


class ExportTableQueryForm(forms.Form):
database_id = forms.IntegerField(required=True)
table_oid = forms.IntegerField(required=True)
filter = forms.JSONField(required=False)
order = forms.JSONField(required=False)


def export_table_csv_in_chunks(
user,
database_id: int,
table_oid: int,
**kwargs
):
with connect(database_id, user) as conn:
csv_buffer = StringIO()
csv_writer = csv.writer(csv_buffer)
for rows in fetch_table_in_chunks(conn, table_oid, **kwargs):
csv_writer.writerows(rows)
value = csv_buffer.getvalue()
yield value
csv_buffer.seek(0)
csv_buffer.truncate(0)


def stream_table_as_csv(
request,
database_id: int,
table_oid: int,
limit: int = None,
offset: int = None,
order: list[OrderBy] = None,
filter: Filter = None,
) -> StreamingHttpResponse:
user = request.user
response = StreamingHttpResponse(
export_table_csv_in_chunks(
user,
database_id,
table_oid,
limit=limit,
offset=offset,
order=order,
filter=filter,
),
content_type="text/csv"
)
response['Content-Disposition'] = 'attachment'
return response


@login_required
def export_table(request):
form = ExportTableQueryForm(request.GET)
if form.is_valid():
data = form.cleaned_data
return stream_table_as_csv(
request=request,
database_id=data['database_id'],
table_oid=data['table_oid'],
filter=data['filter'],
order=data['order']
)
else:
return JsonResponse({'errors': form.errors}, status=400)
2 changes: 2 additions & 0 deletions mathesar_ui/src/i18n/languages/en/dict.json
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,8 @@
"explore_table": "Explore Table",
"explore_your_data": "Explore your Data",
"exploring_from": "Exploring from",
"export": "Export",
"export_csv_help": "Export the {tableName} table as a CSV file. Your current filters and sorting will be applied to the exported data.",
"extract_columns_to_new_table": "{count, plural, one {Extract Column Into a New Table} other {Extract Columns Into a New Table}}",
"failed_load_preview": "Failed to load preview",
"failed_to_fetch_column_information": "Failed to fetch column information.",
Expand Down
2 changes: 2 additions & 0 deletions mathesar_ui/src/icons/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
faDatabase,
faDiagramNext,
faDollarSign,
faDownload,
faEllipsisV,
faEnvelope,
faExternalLink,
Expand Down Expand Up @@ -121,6 +122,7 @@ export const iconDeleteMinor: IconProps = { data: faTimes };
export const iconEdit: IconProps = { data: faPencilAlt };
export const iconEditUser: IconProps = { data: faUserEdit };
export const iconExpandRight: IconProps = { data: faChevronRight };
export const iconExport: IconProps = { data: faDownload };
export const iconImportData: IconProps = { data: faUpload };
export const iconInferColumnType: IconProps = { data: faMicroscope };
export const iconMoreActions: IconProps = { data: faEllipsisV };
Expand Down
38 changes: 35 additions & 3 deletions mathesar_ui/src/systems/table-view/actions-pane/ActionsPane.svelte
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
<script lang="ts">
import { _ } from 'svelte-i18n';

import { getQueryStringFromParams } from '@mathesar/api/rest/utils/requestUtils';
import EntityPageHeader from '@mathesar/components/EntityPageHeader.svelte';
import ModificationStatus from '@mathesar/components/ModificationStatus.svelte';
import { iconInspector, iconTable } from '@mathesar/icons';
import { iconExport, iconInspector, iconTable } from '@mathesar/icons';
import { tableInspectorVisible } from '@mathesar/stores/localStorage';
import { getTabularDataStoreFromContext } from '@mathesar/stores/table-data';
import { Button, Icon } from '@mathesar-component-library';
import {
AnchorButton,
Button,
Icon,
Tooltip,
} from '@mathesar-component-library';

import FilterDropdown from './record-operations/filter/FilterDropdown.svelte';
import GroupDropdown from './record-operations/group/GroupDropdown.svelte';
Expand All @@ -23,6 +29,12 @@
$: ({ filtering, sorting, grouping, sheetState } = meta);

$: isSelectable = $currentRolePrivileges.has('SELECT');
$: exportLinkParams = getQueryStringFromParams({
database_id: table.schema.database.id,
table_oid: table.oid,
...$sorting.recordsRequestParamsIncludingGrouping($grouping),
...$filtering.recordsRequestParams(),
});

const canViewLinkedEntities = true;

Expand Down Expand Up @@ -55,6 +67,26 @@
<!-- TODO: Display Share option when we re-implement it with the new permissions structure -->
<!-- <ShareTableDropdown id={table.oid} /> -->

<Tooltip allowHover>
<AnchorButton
slot="trigger"
href="/api/export/v0/tables/?{exportLinkParams}"
data-tinro-ignore
appearance="secondary"
size="medium"
aria-label={$_('export')}
download="{table.name}.csv"
>
<Icon {...iconExport} />
<span class="responsive-button-label">{$_('export')}</span>
</AnchorButton>
<span slot="content">
{$_('export_csv_help', {
values: { tableName: table.name },
})}
</span>
</Tooltip>

<Button
appearance="secondary"
size="medium"
Expand Down Expand Up @@ -87,7 +119,7 @@
align-items: center;

> :global(* + *) {
margin-left: 1rem;
margin-left: 0.5rem;
}
}
</style>
Loading