Skip to content

Commit

Permalink
Added stack all columns expression
Browse files Browse the repository at this point in the history
  • Loading branch information
edurdevic committed Dec 31, 2023
1 parent 6169b97 commit a4e181e
Showing 1 changed file with 25 additions and 0 deletions.
25 changes: 25 additions & 0 deletions discoverx/explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,23 @@ def unpivot_string_columns(self, sample_size=None) -> "DataExplorerActions":

return self.with_sql(sql_query_template)

def unpivot_all_columns(self, sample_size=None) -> "DataExplorerActions":
"""Returns a DataExplorerActions object that will run a query that will melt all columns into a pair of columns (column_name, string_value)
Args:
sample_size (int, optional): The number of rows to sample. Defaults to None (Return all rows).
"""

sql_query_template = """
SELECT
{stack_all_columns_as_string} AS (column_name, string_value)
FROM {full_table_name}
"""
if sample_size is not None:
sql_query_template += f"TABLESAMPLE ({sample_size} ROWS)"

return self.with_sql(sql_query_template)

def scan(
self,
rules="*",
Expand Down Expand Up @@ -200,6 +217,12 @@ def _get_stack_string_columns_expression(table_info: TableInfo) -> str:
stack_parameters = ", ".join([f"'{c}', `{c}`" for c in string_col_names])
return f"stack({len(string_col_names)}, {stack_parameters})"

@staticmethod
def _get_stack_all_columns_expression(table_info: TableInfo) -> str:
col_names = [c.name for c in table_info.columns]
stack_parameters = ", ".join([f"'{c}', cast(`{c}` AS string)" for c in col_names])
return f"stack({len(col_names)}, {stack_parameters})"

@staticmethod
def _build_sql(sql_template: str, table_info: TableInfo) -> str:
if table_info.catalog and table_info.catalog != "None":
Expand All @@ -208,13 +231,15 @@ def _build_sql(sql_template: str, table_info: TableInfo) -> str:
full_table_name = f"{table_info.schema}.{table_info.table}"

stack_string_columns = DataExplorerActions._get_stack_string_columns_expression(table_info)
stack_all_columns_as_string = DataExplorerActions._get_stack_all_columns_expression(table_info)

sql = sql_template.format(
table_catalog=table_info.catalog,
table_schema=table_info.schema,
table_name=table_info.table,
full_table_name=full_table_name,
stack_string_columns=stack_string_columns,
stack_all_columns_as_string=stack_all_columns_as_string,
)
return sql

Expand Down

0 comments on commit a4e181e

Please sign in to comment.