Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement explorations.run RPC endpoint #3696

Merged
merged 13 commits into from
Aug 1, 2024
3 changes: 3 additions & 0 deletions docs/docs/api/rpc.md
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,10 @@ To use an RPC function:
members:
- list_
- delete
- run
- ExplorationInfo
- ExplorationDef
- ExplorationResult

## Roles

Expand Down
112 changes: 109 additions & 3 deletions mathesar/rpc/explorations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@
"""
from typing import Optional, TypedDict

from modernrpc.core import rpc_method
from modernrpc.core import rpc_method, REQUEST_KEY
from modernrpc.auth.basic import http_basic_auth_login_required

from mathesar.rpc.exceptions.handlers import handle_rpc_exceptions
from mathesar.utils.explorations import get_explorations, delete_exploration
from mathesar.rpc.utils import connect
from mathesar.utils.explorations import get_explorations, delete_exploration, run_exploration


class ExplorationInfo(TypedDict):
"""
Information about a Exploration.
Information about an exploration.

Attributes:
id: The Django id of an exploration.
Expand Down Expand Up @@ -50,6 +51,94 @@ def from_model(cls, model):
)


class ExplorationDef(TypedDict):
"""
Definition about a runnable exploration.

Attributes:
base_table_oid: The OID of the base table of the exploration on the database.
initial_columns: A list describing the columns to be included in the exploration.
display_names: A map between the actual column names on the database and the alias to be displayed.
transformations: A list describing the transformations to be made on the included columns.
limit: Specifies the number of rows to return.(default 100)
offset: Specifies the number of rows to skip.(default 0)
filter: A dict describing filters to be applied to an exploration.
e.g. Here is a dict describing getting records from exploration where "col1" = NULL and "col2" = "abc"
```
{"and": [
{"null": [
{"column_name": ["col1"]},
]},
{"equal": [
{"to_lowercase": [
{"column_name": ["col2"]},
]},
{"literal": ["abc"]},
]},
]}
```
Refer to db/functions/base.py for all the possible filters.
order_by: A list of dicts, where each dict has a `field` and `direction` field.
Here the value for `field` should be column name and `direction` should be either `asc` or `desc`.
search: A list of dicts, where each dict has a `column` and `literal` field.
Here the value for `column` should be a column name and `literal` should be a string to be searched in the aforementioned column.
duplicate_only: A list of column names for which you want duplicate records.
"""
base_table_oid: int
initial_columns: list
display_names: dict
transformations: Optional[list]
limit: Optional[int]
offset: Optional[int]
filter: Optional[dict]
order_by: Optional[list[dict]]
search: Optional[list[dict]]
duplicate_only: Optional[list]


class ExplorationResult(TypedDict):
"""
Result of an exploration run.

Attributes:
query: A dict describing the exploration that ran.
records: A dict describing the total count of records along with the contents of those records.
output_columns: A tuple describing the names of the columns included in the exploration.
column_metadata: A dict describing the metadata applied to included columns.
limit: Specifies the max number of rows returned.(default 100)
offset: Specifies the number of rows skipped.(default 0)
filter: A dict describing filters applied to an exploration.
order_by: The ordering applied to the columns of an exploration.
search: Specifies a list of dicts containing column names and searched expression.
duplicate_only: A list of column names for which you want duplicate records.
"""
query: dict
records: dict
output_columns: tuple
column_metadata: dict
limit: Optional[int]
offset: Optional[int]
filter: Optional[dict]
order_by: Optional[list[dict]]
search: Optional[list[dict]]
duplicate_only: Optional[list]

@classmethod
def from_dict(cls, e):
return cls(
query=e["query"],
records=e["records"],
output_columns=e["output_columns"],
column_metadata=e["column_metadata"],
limit=e["limit"],
offset=e["offset"],
filter=e["filter"],
order_by=e["order_by"],
search=e["search"],
duplicate_only=e["duplicate_only"]
)


@rpc_method(name="explorations.list")
@http_basic_auth_login_required
@handle_rpc_exceptions
Expand Down Expand Up @@ -78,3 +167,20 @@ def delete(*, exploration_id: int, **kwargs) -> None:
exploration_id: The Django id of the exploration to delete.
"""
delete_exploration(exploration_id)


@rpc_method(name="explorations.run")
@http_basic_auth_login_required
@handle_rpc_exceptions
def run(*, exploration_def: ExplorationDef, database_id: int, **kwargs) -> ExplorationResult:
"""
Run an exploration.

Args:
exploration_def: A dict describing an exploration to run.
database_id: The Django id of the database containing the base table for the exploration.
"""
user = kwargs.get(REQUEST_KEY).user
with connect(database_id, user) as conn:
exploration_result = run_exploration(exploration_def, database_id, conn)
return ExplorationResult.from_dict(exploration_result)
5 changes: 5 additions & 0 deletions mathesar/tests/rpc/test_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,11 @@
"explorations.delete",
[user_is_authenticated]
),
(
explorations.run,
"explorations.run",
[user_is_authenticated]
),
(
roles.list_,
"roles.list",
Expand Down
124 changes: 123 additions & 1 deletion mathesar/utils/explorations.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
from mathesar.models.base import Explorations
from db.engine import create_future_engine_with_custom_types
from db.records.operations.select import get_count
from db.queries.base import DBQuery, InitialColumn, JoinParameter
from db.tables.operations.select import get_table
from mathesar.api.utils import process_annotated_records
from mathesar.models.base import Explorations, ColumnMetaData
from mathesar.rpc.columns.metadata import ColumnMetaDataRecord
from mathesar.state import get_cached_metadata


def get_explorations(database_id):
Expand All @@ -7,3 +14,118 @@ def get_explorations(database_id):

def delete_exploration(exploration_id):
Explorations.objects.get(id=exploration_id).delete()


def run_exploration(exploration_def, database_id, conn):
engine = create_future_engine_with_custom_types(
conn.info.user,
conn.info.password,
conn.info.host,
conn.info.dbname,
conn.info.port
)
metadata = get_cached_metadata()
base_table_oid = exploration_def["base_table_oid"]
initial_columns = exploration_def['initial_columns']
processed_initial_columns = []
for column in initial_columns:
jp_path = column.get("join_path")
if jp_path is not None:
join_path = [
JoinParameter(
left_oid=i[0][0],
left_attnum=i[0][1],
right_oid=i[1][0],
right_attnum=i[1][1]
) for i in jp_path
]
processed_initial_columns.append(
InitialColumn(
reloid=jp_path[-1][-1][0] if jp_path else base_table_oid,
attnum=column["attnum"],
alias=column["alias"],
jp_path=join_path if jp_path else None
)
)
db_query = DBQuery(
base_table_oid=base_table_oid,
initial_columns=processed_initial_columns,
engine=engine,
transformations=exploration_def.get("transformations", []),
name=None,
metadata=metadata
)
records = db_query.get_records(
limit=exploration_def.get('limit', 100),
offset=exploration_def.get('offset', 0),
filter=exploration_def.get('filter', None),
order_by=exploration_def.get('order_by', []),
search=exploration_def.get('search', []),
duplicate_only=exploration_def.get('duplicate_only', None)
)
processed_records = process_annotated_records(records)[0]
column_metadata = _get_exploration_column_metadata(
exploration_def,
processed_initial_columns,
database_id,
db_query,
conn,
engine,
metadata
)
return {
"query": exploration_def,
"records": {
"count": get_count(
table=db_query.transformed_relation,
engine=engine,
filter=exploration_def.get('filter', None)
),
"results": processed_records
},
"output_columns": tuple(sa_col.name for sa_col in db_query.sa_output_columns),
"column_metadata": column_metadata,
"limit": exploration_def.get('limit', 100),
"offset": exploration_def.get('offset', 0),
"filter": exploration_def.get('filter', None),
"order_by": exploration_def.get('order_by', []),
"search": exploration_def.get('search', []),
"duplicate_only": exploration_def.get('duplicate_only', None)
}


def _get_exploration_column_metadata(
exploration_def,
processed_initial_columns,
database_id,
db_query,
conn,
engine,
metadata
):
exploration_column_metadata = {}
for alias, sa_col in db_query.all_sa_columns_map.items():
initial_column = None
for col in processed_initial_columns:
if alias == col.alias:
initial_column = col
column_metadata = ColumnMetaData.objects.filter(
database__id=database_id,
table_oid=initial_column.reloid,
attnum=sa_col.column_attnum
).first() if initial_column else None
input_table_name = get_table(initial_column.reloid, conn)["name"] if initial_column else None
input_column_name = initial_column.get_name(engine, metadata) if initial_column else None
exploration_column_metadata[alias] = {
"alias": alias,
"display_name": exploration_def["display_names"].get(alias),
"type": sa_col.db_type.id,
"type_options": sa_col.type_options,
"display_options": ColumnMetaDataRecord.from_model(column_metadata) if column_metadata else None,
"is_initial_column": True if initial_column else False,
"input_column_name": input_column_name,
"input_table_name": input_table_name,
"input_table_id": initial_column.reloid if initial_column else None,
"input_alias": db_query.get_input_alias_for_output_alias(alias)
}
return exploration_column_metadata
Loading