Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Follow introduction of ColumnDisplayType enum in data explorer comm #293

Merged
merged 1 commit into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 44 additions & 10 deletions crates/amalthea/src/comm/data_explorer_comm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ pub struct ColumnSchema {
pub type_name: String,

/// Canonical Positron display name of data type
pub type_display: ColumnSchemaTypeDisplay,
pub type_display: ColumnDisplayType,

/// Column annotation / description
pub description: Option<String>,
Expand Down Expand Up @@ -198,26 +198,60 @@ pub struct ColumnProfileResult {
pub frequency_table: Option<ColumnFrequencyTable>
}

/// ColumnSummaryStats in Schemas
/// Profile result containing summary stats for a column based on the data
/// type
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct ColumnSummaryStats {
/// Canonical Positron display name of data type
pub type_display: ColumnDisplayType,

/// Statistics for a numeric data type
pub number_stats: Option<SummaryStatsNumber>,

/// Statistics for a string-like data type
pub string_stats: Option<SummaryStatsString>,

/// Statistics for a boolean data type
pub boolean_stats: Option<SummaryStatsBoolean>
}

/// SummaryStatsNumber in Schemas
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct SummaryStatsNumber {
/// Minimum value as string
pub min_value: String,

/// Maximum value as string
pub max_value: String,

/// Average value as string
pub mean_value: Option<String>,
pub mean: String,

/// Sample median (50% value) value as string
pub median: Option<String>,
pub median: String,

/// 25th percentile value as string
pub q25: Option<String>,
/// Sample standard deviation as a string
pub stdev: String
}

/// SummaryStatsBoolean in Schemas
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct SummaryStatsBoolean {
/// The number of non-null true values
pub true_count: i64,

/// The number of non-null false values
pub false_count: i64
}

/// SummaryStatsString in Schemas
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct SummaryStatsString {
/// The number of empty / length-zero values
pub num_empty: i64,

/// 75th percentile value as string
pub q75: Option<String>
/// The exact number of distinct values
pub num_unique: i64
}

/// Result from a histogram profile request
Expand Down Expand Up @@ -274,9 +308,9 @@ pub struct ColumnSortKey {
pub ascending: bool
}

/// Possible values for TypeDisplay in ColumnSchema
/// Possible values for ColumnDisplayType
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub enum ColumnSchemaTypeDisplay {
pub enum ColumnDisplayType {
#[serde(rename = "number")]
Number,

Expand Down
40 changes: 20 additions & 20 deletions crates/ark/src/data_explorer/r_data_explorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use amalthea::comm::comm_channel::CommMsg;
use amalthea::comm::data_explorer_comm::ColumnProfileRequestType;
use amalthea::comm::data_explorer_comm::ColumnProfileResult;
use amalthea::comm::data_explorer_comm::ColumnSchema;
use amalthea::comm::data_explorer_comm::ColumnSchemaTypeDisplay;
use amalthea::comm::data_explorer_comm::ColumnDisplayType;
use amalthea::comm::data_explorer_comm::ColumnSortKey;
use amalthea::comm::data_explorer_comm::DataExplorerBackendReply;
use amalthea::comm::data_explorer_comm::DataExplorerBackendRequest;
Expand Down Expand Up @@ -684,61 +684,61 @@ impl RDataExplorer {
// This returns the type of an _element_ of the column. In R atomic
// vectors do not have a distinct internal type but we pretend that they
// do for the purpose of integrating with Positron types.
fn display_type(x: SEXP) -> ColumnSchemaTypeDisplay {
fn display_type(x: SEXP) -> ColumnDisplayType {
if r_is_s4(x) {
return ColumnSchemaTypeDisplay::Unknown;
return ColumnDisplayType::Unknown;
}

if r_is_object(x) {
if r_inherits(x, "logical") {
return ColumnSchemaTypeDisplay::Boolean;
return ColumnDisplayType::Boolean;
}

if r_inherits(x, "integer") {
return ColumnSchemaTypeDisplay::Number;
return ColumnDisplayType::Number;
}
if r_inherits(x, "double") {
return ColumnSchemaTypeDisplay::Number;
return ColumnDisplayType::Number;
}
if r_inherits(x, "complex") {
return ColumnSchemaTypeDisplay::Number;
return ColumnDisplayType::Number;
}
if r_inherits(x, "numeric") {
return ColumnSchemaTypeDisplay::Number;
return ColumnDisplayType::Number;
}

if r_inherits(x, "character") {
return ColumnSchemaTypeDisplay::String;
return ColumnDisplayType::String;
}
if r_inherits(x, "factor") {
return ColumnSchemaTypeDisplay::String;
return ColumnDisplayType::String;
}

if r_inherits(x, "Date") {
return ColumnSchemaTypeDisplay::Date;
return ColumnDisplayType::Date;
}
if r_inherits(x, "POSIXct") {
return ColumnSchemaTypeDisplay::Datetime;
return ColumnDisplayType::Datetime;
}
if r_inherits(x, "POSIXlt") {
return ColumnSchemaTypeDisplay::Datetime;
return ColumnDisplayType::Datetime;
}

// TODO: vctrs's list_of
if r_inherits(x, "list") {
return ColumnSchemaTypeDisplay::Unknown;
return ColumnDisplayType::Unknown;
}

// Catch-all, including for data frame
return ColumnSchemaTypeDisplay::Unknown;
return ColumnDisplayType::Unknown;
}

match r_typeof(x) {
LGLSXP => return ColumnSchemaTypeDisplay::Boolean,
INTSXP | REALSXP | CPLXSXP => return ColumnSchemaTypeDisplay::Number,
STRSXP => return ColumnSchemaTypeDisplay::String,
VECSXP => return ColumnSchemaTypeDisplay::Unknown,
_ => return ColumnSchemaTypeDisplay::Unknown,
LGLSXP => return ColumnDisplayType::Boolean,
INTSXP | REALSXP | CPLXSXP => return ColumnDisplayType::Number,
STRSXP => return ColumnDisplayType::String,
VECSXP => return ColumnDisplayType::Unknown,
_ => return ColumnDisplayType::Unknown,
}
}

Expand Down
Loading