diff --git a/crates/amalthea/src/comm/data_explorer_comm.rs b/crates/amalthea/src/comm/data_explorer_comm.rs index 4670c6331..f84d6bc1c 100644 --- a/crates/amalthea/src/comm/data_explorer_comm.rs +++ b/crates/amalthea/src/comm/data_explorer_comm.rs @@ -74,7 +74,7 @@ pub struct ColumnSchema { pub type_name: String, /// Canonical Positron display name of data type - pub type_display: ColumnSchemaTypeDisplay, + pub type_display: ColumnDisplayType, /// Column annotation / description pub description: Option, @@ -198,9 +198,26 @@ pub struct ColumnProfileResult { pub frequency_table: Option } -/// ColumnSummaryStats in Schemas +/// Profile result containing summary stats for a column based on the data +/// type #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] pub struct ColumnSummaryStats { + /// Canonical Positron display name of data type + pub type_display: ColumnDisplayType, + + /// Statistics for a numeric data type + pub number_stats: Option, + + /// Statistics for a string-like data type + pub string_stats: Option, + + /// Statistics for a boolean data type + pub boolean_stats: Option +} + +/// SummaryStatsNumber in Schemas +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub struct SummaryStatsNumber { /// Minimum value as string pub min_value: String, @@ -208,16 +225,33 @@ pub struct ColumnSummaryStats { pub max_value: String, /// Average value as string - pub mean_value: Option, + pub mean: String, /// Sample median (50% value) value as string - pub median: Option, + pub median: String, - /// 25th percentile value as string - pub q25: Option, + /// Sample standard deviation as a string + pub stdev: String +} + +/// SummaryStatsBoolean in Schemas +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub struct SummaryStatsBoolean { + /// The number of non-null true values + pub true_count: i64, + + /// The number of non-null false values + pub false_count: i64 +} + +/// SummaryStatsString in Schemas +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub struct SummaryStatsString { + /// The number of empty / length-zero values + pub num_empty: i64, - /// 75th percentile value as string - pub q75: Option + /// The exact number of distinct values + pub num_unique: i64 } /// Result from a histogram profile request @@ -274,9 +308,9 @@ pub struct ColumnSortKey { pub ascending: bool } -/// Possible values for TypeDisplay in ColumnSchema +/// Possible values for ColumnDisplayType #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] -pub enum ColumnSchemaTypeDisplay { +pub enum ColumnDisplayType { #[serde(rename = "number")] Number, diff --git a/crates/ark/src/data_explorer/r_data_explorer.rs b/crates/ark/src/data_explorer/r_data_explorer.rs index 2b33a3aa5..1e4df0065 100644 --- a/crates/ark/src/data_explorer/r_data_explorer.rs +++ b/crates/ark/src/data_explorer/r_data_explorer.rs @@ -11,7 +11,7 @@ use amalthea::comm::comm_channel::CommMsg; use amalthea::comm::data_explorer_comm::ColumnProfileRequestType; use amalthea::comm::data_explorer_comm::ColumnProfileResult; use amalthea::comm::data_explorer_comm::ColumnSchema; -use amalthea::comm::data_explorer_comm::ColumnSchemaTypeDisplay; +use amalthea::comm::data_explorer_comm::ColumnDisplayType; use amalthea::comm::data_explorer_comm::ColumnSortKey; use amalthea::comm::data_explorer_comm::DataExplorerBackendReply; use amalthea::comm::data_explorer_comm::DataExplorerBackendRequest; @@ -684,61 +684,61 @@ impl RDataExplorer { // This returns the type of an _element_ of the column. In R atomic // vectors do not have a distinct internal type but we pretend that they // do for the purpose of integrating with Positron types. -fn display_type(x: SEXP) -> ColumnSchemaTypeDisplay { +fn display_type(x: SEXP) -> ColumnDisplayType { if r_is_s4(x) { - return ColumnSchemaTypeDisplay::Unknown; + return ColumnDisplayType::Unknown; } if r_is_object(x) { if r_inherits(x, "logical") { - return ColumnSchemaTypeDisplay::Boolean; + return ColumnDisplayType::Boolean; } if r_inherits(x, "integer") { - return ColumnSchemaTypeDisplay::Number; + return ColumnDisplayType::Number; } if r_inherits(x, "double") { - return ColumnSchemaTypeDisplay::Number; + return ColumnDisplayType::Number; } if r_inherits(x, "complex") { - return ColumnSchemaTypeDisplay::Number; + return ColumnDisplayType::Number; } if r_inherits(x, "numeric") { - return ColumnSchemaTypeDisplay::Number; + return ColumnDisplayType::Number; } if r_inherits(x, "character") { - return ColumnSchemaTypeDisplay::String; + return ColumnDisplayType::String; } if r_inherits(x, "factor") { - return ColumnSchemaTypeDisplay::String; + return ColumnDisplayType::String; } if r_inherits(x, "Date") { - return ColumnSchemaTypeDisplay::Date; + return ColumnDisplayType::Date; } if r_inherits(x, "POSIXct") { - return ColumnSchemaTypeDisplay::Datetime; + return ColumnDisplayType::Datetime; } if r_inherits(x, "POSIXlt") { - return ColumnSchemaTypeDisplay::Datetime; + return ColumnDisplayType::Datetime; } // TODO: vctrs's list_of if r_inherits(x, "list") { - return ColumnSchemaTypeDisplay::Unknown; + return ColumnDisplayType::Unknown; } // Catch-all, including for data frame - return ColumnSchemaTypeDisplay::Unknown; + return ColumnDisplayType::Unknown; } match r_typeof(x) { - LGLSXP => return ColumnSchemaTypeDisplay::Boolean, - INTSXP | REALSXP | CPLXSXP => return ColumnSchemaTypeDisplay::Number, - STRSXP => return ColumnSchemaTypeDisplay::String, - VECSXP => return ColumnSchemaTypeDisplay::Unknown, - _ => return ColumnSchemaTypeDisplay::Unknown, + LGLSXP => return ColumnDisplayType::Boolean, + INTSXP | REALSXP | CPLXSXP => return ColumnDisplayType::Number, + STRSXP => return ColumnDisplayType::String, + VECSXP => return ColumnDisplayType::Unknown, + _ => return ColumnDisplayType::Unknown, } }