Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data Explorer: Add support for date summary stats #388

Merged
merged 11 commits into from
Jun 20, 2024
1 change: 1 addition & 0 deletions crates/ark/src/data_explorer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
pub mod export_selection;
pub mod format;
pub mod r_data_explorer;
pub mod summary_stats;
86 changes: 14 additions & 72 deletions crates/ark/src/data_explorer/r_data_explorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ use amalthea::comm::data_explorer_comm::SetRowFiltersFeatures;
use amalthea::comm::data_explorer_comm::SetRowFiltersParams;
use amalthea::comm::data_explorer_comm::SetSortColumnsFeatures;
use amalthea::comm::data_explorer_comm::SetSortColumnsParams;
use amalthea::comm::data_explorer_comm::SummaryStatsBoolean;
use amalthea::comm::data_explorer_comm::SummaryStatsNumber;
use amalthea::comm::data_explorer_comm::SummaryStatsString;
use amalthea::comm::data_explorer_comm::SupportStatus;
use amalthea::comm::data_explorer_comm::SupportedFeatures;
use amalthea::comm::data_explorer_comm::TableData;
Expand All @@ -65,10 +62,7 @@ use harp::tbl_get_column;
use harp::utils::r_inherits;
use harp::utils::r_is_object;
use harp::utils::r_is_s4;
use harp::utils::r_names2;
use harp::utils::r_typeof;
use harp::vector::CharacterVector;
use harp::vector::Vector;
use harp::TableInfo;
use harp::TableKind;
use libr::*;
Expand All @@ -82,7 +76,7 @@ use uuid::Uuid;

use crate::data_explorer::export_selection;
use crate::data_explorer::format;
use crate::data_explorer::format::format_string;
use crate::data_explorer::summary_stats::summary_stats;
use crate::interface::RMain;
use crate::lsp::events::EVENTS;
use crate::modules::ARK_ENVS;
Expand Down Expand Up @@ -674,72 +668,10 @@ impl RDataExplorer {
let column = tbl_get_column(self.table.get().sexp, column_index, self.shape.kind)?;
let dtype = display_type(column.sexp);

let call_summary_fn = |fun| {
RFunction::new("", fun)
.param("column", column)
.param("filtered_indices", match &self.filtered_indices {
Some(indices) => RObject::try_from(indices)?,
None => RObject::null(),
})
.call_in(ARK_ENVS.positron_ns)
};

let mut stats = ColumnSummaryStats {
type_display: dtype.clone(),
number_stats: None,
string_stats: None,
boolean_stats: None,
date_stats: None, // TODO: add support for date/datetime stats
datetime_stats: None,
};

match dtype {
ColumnDisplayType::Number => {
let r_stats = call_summary_fn("number_summary_stats")?;

let names = unsafe { CharacterVector::new_unchecked(r_names2(r_stats.sexp)) };
let values = format_string(r_stats.sexp, format_options);

let r_stats: HashMap<String, String> = names
.iter()
.zip(values.into_iter())
.map(|(name, value)| match name {
Some(name) => (name, value),
None => ("unk".to_string(), value),
})
.collect();

stats.number_stats = Some(SummaryStatsNumber {
min_value: Some(r_stats["min_value"].clone()),
max_value: Some(r_stats["max_value"].clone()),
mean: Some(r_stats["mean"].clone()),
median: Some(r_stats["median"].clone()),
stdev: Some(r_stats["stdev"].clone()),
});
},
ColumnDisplayType::String => {
let r_stats: HashMap<String, i32> =
call_summary_fn("string_summary_stats")?.try_into()?;
// Filter the column if we have filtered indices before computing the summmary
let filtered_column = r_filter_indices(column, &self.filtered_indices)?;

stats.string_stats = Some(SummaryStatsString {
num_empty: r_stats["num_empty"].clone() as i64,
num_unique: r_stats["num_unique"].clone() as i64,
});
},
ColumnDisplayType::Boolean => {
let r_stats: HashMap<String, i32> =
call_summary_fn("boolean_summary_stats")?.try_into()?;

stats.boolean_stats = Some(SummaryStatsBoolean {
true_count: r_stats["true_count"].clone() as i64,
false_count: r_stats["false_count"].clone() as i64,
});
},
_ => {
bail!("Summary stats not implemented for type: {:?}", dtype);
},
}
Ok(stats)
Ok(summary_stats(filtered_column.sexp, dtype, format_options))
}

/// Sort the rows of the data object according to the sort keys in
Expand Down Expand Up @@ -1199,6 +1131,16 @@ fn table_info_or_bail(x: SEXP) -> anyhow::Result<TableInfo> {
harp::table_info(x).ok_or(anyhow!("Unsupported type for data viewer"))
}

fn r_filter_indices(x: RObject, indices: &Option<Vec<i32>>) -> anyhow::Result<RObject> {
Ok(match &indices {
Some(indices) => RFunction::from("col_filter_indices")
.add(x)
.add(RObject::try_from(indices)?)
.call_in(ARK_ENVS.positron_ns)?,
None => x,
})
}

/// Open an R object in the data viewer.
///
/// This function is called from the R side to open an R object in the data viewer.
Expand Down
Loading
Loading