Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement tables.get_import_preview RPC endpoint #3635

Merged
merged 15 commits into from
Jun 27, 2024
132 changes: 132 additions & 0 deletions db/sql/00_msar.sql
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,72 @@ __msar.build_text_tuple(text[]) RETURNS text AS $$
SELECT '(' || string_agg(col, ', ') || ')' FROM unnest($1) x(col);
$$ LANGUAGE sql RETURNS NULL ON NULL INPUT;


----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
-- GENERAL DQL FUNCTIONS
--
-- Functions in this section are quite general, and are the basis of the others.
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------


CREATE OR REPLACE FUNCTION
__msar.exec_dql(command text) RETURNS jsonb AS $$/*
Execute the given command, returning a JSON object describing the records in the following form:
[
{"id": 1, "col1_name": "value1", "col2_name": "value2"},
{"id": 2, "col1_name": "value1", "col2_name": "value2"},
{"id": 3, "col1_name": "value1", "col2_name": "value2"},
...
]

Useful for SELECTing from tables. Most useful when you're performing DQL.

Note that you must include the primary key column(`id` in case of a Mathesar table) in the
command_template if you want the returned records to be uniquely identifiable.

Args:
command: Raw string that will be executed as a command.
*/
DECLARE
records jsonb;
BEGIN
EXECUTE 'WITH cte AS (' || command || ')
SELECT jsonb_agg(row_to_json(cte.*)) FROM cte' INTO records;
RETURN records;
END;
$$ LANGUAGE plpgsql RETURNS NULL ON NULL INPUT;


CREATE OR REPLACE FUNCTION
__msar.exec_dql(command_template text, arguments variadic anyarray) RETURNS jsonb AS $$/*
Execute a templated command, returning a JSON object describing the records in the following form:
[
{"id": 1, "col1_name": "value1", "col2_name": "value2"},
{"id": 2, "col1_name": "value1", "col2_name": "value2"},
{"id": 3, "col1_name": "value1", "col2_name": "value2"},
...
]

The template is given in the first argument, and all further arguments are used to fill in the
template. Useful for SELECTing from tables. Most useful when you're performing DQL.

Note that you must include the primary key column(`id` in case of a Mathesar table) in the
command_template if you want the returned records to be uniquely identifiable.

Args:
command_template: Raw string that will be executed as a command.
arguments: arguments that will be used to fill in the template.
*/
DECLARE formatted_command TEXT;
BEGIN
formatted_command := format(command_template, VARIADIC arguments);
RETURN __msar.exec_dql(formatted_command);
END;
$$ LANGUAGE plpgsql RETURNS NULL ON NULL INPUT;


----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
-- INFO FUNCTIONS
Expand Down Expand Up @@ -2342,6 +2408,72 @@ END;
$$ LANGUAGE plpgsql;


CREATE OR REPLACE FUNCTION
msar.get_preview(
tab_id oid,
col_cast_def jsonb,
rec_limit integer
) RETURNS jsonb AS $$/*
Preview a table, applying different type casts and options to the underlying columns before import,
returning a JSON object describing the records of the table.

Note that these casts are temporary and do not alter the data in the underlying table,
if you wish to alter these settings permanantly for the columns see msar.alter_columns.

Args:
tab_id: The OID of the table to preview.
col_cast_def: A JSON object describing the column settings to apply.
rec_limit (optional): The upper limit for the number of records to return.

The col_cast_def JSONB should have the form:
[
{
"attnum": <int>,
"type": {
"name": <str>,
"options": {
"length": <integer>,
"precision": <integer>,
"scale": <integer>
"fields": <str>,
"array": <boolean>
}
},
},
{
...
},
...
]
*/
DECLARE
tab_name text;
sel_query text;
records jsonb;
BEGIN
tab_name := __msar.get_qualified_relation_name(tab_id);
sel_query := 'SELECT %s FROM %s LIMIT %L';
WITH preview_cte AS (
SELECT string_agg(
'CAST(' ||
__msar.build_cast_expr(
quote_ident(msar.get_column_name(tab_id, (col_cast ->> 'attnum')::integer)), col_cast -> 'type' ->> 'name'
) ||
' AS ' ||
msar.build_type_text(col_cast -> 'type') ||
')'|| ' AS ' || quote_ident(msar.get_column_name(tab_id, (col_cast ->> 'attnum')::integer)),
', '
) AS cast_expr
FROM jsonb_array_elements(col_cast_def) AS col_cast
)
SELECT
__msar.exec_dql(sel_query, cast_expr, tab_name, rec_limit::text)
INTO records FROM preview_cte;
RETURN records;
END;
$$ LANGUAGE plpgsql;


----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
-- COLUMN ALTERATION FUNCTIONS
Expand Down
34 changes: 34 additions & 0 deletions db/sql/test_00_msar.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1479,6 +1479,40 @@ END;
$f$ LANGUAGE plpgsql;


CREATE OR REPLACE FUNCTION test_get_preview() RETURNS SETOF TEXT AS $f$
DECLARE
col_cast_def jsonb := $j$[
{
"attnum": 1,
"type": {"name": "integer"}
},
{
"attnum":2,
"type": {"name": "numeric", "options": {"precision":5, "scale":2}}
}
]$j$;
want_records jsonb := $j$[
{"id": 1, "length": 2.00},
{"id": 2, "length": 3.00},
{"id": 3, "length": 4.00},
{"id": 4, "length": 5.22}
]
$j$;
have_records jsonb;
BEGIN
PERFORM __setup_create_table();
CREATE TABLE tab_create_schema.foo(id INTEGER GENERATED BY DEFAULT AS IDENTITY, length FLOAT8);
INSERT INTO tab_create_schema.foo(length) VALUES (2), (3), (4), (5.2225);
have_records := msar.get_preview(
tab_id => 'tab_create_schema.foo'::regclass::oid,
col_cast_def => col_cast_def,
rec_limit => NULL
);
RETURN NEXT is(have_records, want_records);
END;
$f$ LANGUAGE plpgsql;


CREATE OR REPLACE FUNCTION test_add_mathesar_table_comment() RETURNS SETOF TEXT AS $f$
DECLARE
comment_ text := $c$my "Super;";'; DROP SCHEMA tab_create_schema;'$c$;
Expand Down
21 changes: 21 additions & 0 deletions db/tables/operations/import_.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import json
import tempfile

import clevercsv as csv

from db.connection import exec_msar_func
from db.columns.operations.alter import _transform_column_alter_dict
from db.tables.operations.create import prepare_table_for_import
from db.encoding_utils import get_sql_compatible_encoding
from mathesar.models.deprecated import DataFile
Expand Down Expand Up @@ -67,3 +72,19 @@ def insert_csv_records(
with cursor.copy(copy_sql) as copy:
while data := temp_file.read():
copy.write(data)


def get_preview(table_oid, column_list, conn, limit=20):
"""
Preview an imported table. Returning the records from the specified columns of the table.

Args:
table_oid: Identity of the imported table in the user's database.
column_list: List of settings describing the casts to be applied to the columns.
limit: The upper limit for the number of records to return.

Note that these casts are temporary and do not alter the data in the underlying table,
if you wish to alter these settings permanantly for the columns see tables/alter.py.
"""
transformed_column_data = [_transform_column_alter_dict(col) for col in column_list]
return exec_msar_func(conn, 'get_preview', table_oid, json.dumps(transformed_column_data), limit).fetchone()[0]
4 changes: 4 additions & 0 deletions docs/docs/api/rpc.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ To use an RPC function:
- delete
- patch
- import_
- get_import_preview
- TableInfo
- SettableTableInfo

Expand All @@ -83,6 +84,9 @@ To use an RPC function:
- patch
- delete
- ColumnInfo
- ColumnListReturn
- CreatableColumnInfo
- PreviewableColumnInfo
- SettableColumnInfo
- TypeOptions
- ColumnDefault
Expand Down
14 changes: 14 additions & 0 deletions mathesar/rpc/columns/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,20 @@ class SettableColumnInfo(TypedDict):
description: Optional[str]


class PreviewableColumnInfo(TypedDict):
"""
Information needed to preview a column.

Attributes:
id: The `attnum` of the column in the table.
type: The new type to be applied to a column.
type_options: The options to be applied to the column type.
"""
id: int
type: Optional[str]
type_options: Optional[TypeOptions]


class ColumnInfo(TypedDict):
"""
Information about a column. Extends the settable fields.
Expand Down
32 changes: 30 additions & 2 deletions mathesar/rpc/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from db.tables.operations.drop import drop_table_from_database
from db.tables.operations.create import create_table_on_database
from db.tables.operations.alter import alter_table_on_database
from db.tables.operations.import_ import import_csv
from mathesar.rpc.columns import CreatableColumnInfo, SettableColumnInfo
from db.tables.operations.import_ import import_csv, get_preview
from mathesar.rpc.columns import CreatableColumnInfo, SettableColumnInfo, PreviewableColumnInfo
from mathesar.rpc.constraints import CreatableConstraintInfo
from mathesar.rpc.exceptions.handlers import handle_rpc_exceptions
from mathesar.rpc.utils import connect
Expand Down Expand Up @@ -200,3 +200,31 @@ def import_(
user = kwargs.get(REQUEST_KEY).user
with connect(database_id, user) as conn:
return import_csv(data_file_id, table_name, schema_oid, conn, comment)


@rpc_method(name="tables.get_import_preview")
@http_basic_auth_login_required
@handle_rpc_exceptions
def get_import_preview(
*,
table_oid: int,
columns: list[PreviewableColumnInfo],
database_id: int,
limit: int = 20,
**kwargs
) -> list[dict]:
"""
Preview an imported table.

Args:
table_oid: Identity of the imported table in the user's database.
columns: List of settings describing the casts to be applied to the columns.
database_id: The Django id of the database containing the table.
limit: The upper limit for the number of records to return.

Returns:
The records from the specified columns of the table.
"""
user = kwargs.get(REQUEST_KEY).user
with connect(database_id, user) as conn:
return get_preview(table_oid, columns, conn, limit)
5 changes: 5 additions & 0 deletions mathesar/tests/rpc/test_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@
tables.import_,
"tables.import",
[user_is_authenticated]
),
(
tables.get_import_preview,
"tables.get_import_preview",
[user_is_authenticated]
)
]

Expand Down
42 changes: 42 additions & 0 deletions mathesar/tests/rpc/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
rf(pytest-django): Provides mocked `Request` objects.
monkeypatch(pytest): Lets you monkeypatch an object for testing.
"""
from decimal import Decimal
from contextlib import contextmanager

from mathesar.rpc import tables
Expand Down Expand Up @@ -225,3 +226,44 @@ def mock_table_import(_data_file_id, table_name, _schema_oid, conn, comment):
request=request
)
assert imported_table_oid == 1964474


def test_tables_preview(rf, monkeypatch):
request = rf.post('/api/rpc/v0', data={})
request.user = User(username='alice', password='pass1234')
table_oid = 1964474
database_id = 11

@contextmanager
def mock_connect(_database_id, user):
if _database_id == database_id and user.username == 'alice':
try:
yield True
finally:
pass
else:
raise AssertionError('incorrect parameters passed')

def mock_table_preview(_table_oid, columns, conn, limit):
if _table_oid != table_oid:
raise AssertionError('incorrect parameters passed')
return [
{'id': 1, 'length': Decimal('2.0')},
{'id': 2, 'length': Decimal('3.0')},
{'id': 3, 'length': Decimal('4.0')},
{'id': 4, 'length': Decimal('5.22')}
]
monkeypatch.setattr(tables, 'connect', mock_connect)
monkeypatch.setattr(tables, 'get_preview', mock_table_preview)
records = tables.get_import_preview(
table_oid=1964474,
columns=[{'attnum': 2, 'type': {'name': 'numeric', 'options': {'precision': 3, 'scale': 2}}}],
database_id=11,
request=request
)
assert records == [
{'id': 1, 'length': Decimal('2.0')},
{'id': 2, 'length': Decimal('3.0')},
{'id': 3, 'length': Decimal('4.0')},
{'id': 4, 'length': Decimal('5.22')}
]
Loading