From 217f5342e0dd40809324c96307f090012bb9aad2 Mon Sep 17 00:00:00 2001 From: tarapk Date: Tue, 9 Jul 2024 11:04:19 +0000 Subject: [PATCH] Examples file, examples 1 and 2 work, some issues with 3 and 4 --- vignettes/blockr_examples.Rmd | 716 ++++++++++++++++++++++++++++++++++ 1 file changed, 716 insertions(+) create mode 100644 vignettes/blockr_examples.Rmd diff --git a/vignettes/blockr_examples.Rmd b/vignettes/blockr_examples.Rmd new file mode 100644 index 00000000..1417c069 --- /dev/null +++ b/vignettes/blockr_examples.Rmd @@ -0,0 +1,716 @@ +--- +title: "blockr: Flexible Data Pipeline Building for Everyone" +output: html_document +date: "2024-07-07" +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## Why blockr? + +`blockr` is an R package designed to democratize data analysis by providing a flexible, intuitive, and code-free approach to building data pipelines. It allows users to create powerful data workflows using pre-built blocks that can be easily connected and customized, all without writing a single line of code. + +### Key Features + +1. **User-Friendly Interface**: Build data pipelines using a drag-and-drop interface. +2. **Flexibility**: Easily add, remove, or rearrange blocks in your pipeline. +3. **Extensibility**: Developers can create custom blocks to extend functionality. +4. **Reproducibility**: Pipelines created with `blockr` are easily shareable and reproducible, with exportable code. +5. **Interactivity**: Real-time feedback as you build and modify your pipeline. + +## blockr Across Industries + +The flexibility of `blockr` makes it valuable across various industries. Let's explore how it can be applied in different sectors with detailed examples: + + + +```{r slider_block, include=FALSE} +# Provided by John, new_range_field not working +new_slider_field <- function(value = 5L, min = 0L, max = 10L, step = 1L, ...){ + blockr::new_field( + value = value, + min = min, + max = max, + ..., + class = "slider_field" + ) +} +slider_field <- function(...){ + validate_field(new_slider_field(...)) +} + +#' @method ui_input slider_field +#' @export +ui_input.slider_field <- function(x, id, name){ + ns <- NS(input_ids(x, id)) + + shiny::sliderInput( + blockr:::input_ids(x, id), + name, + value = blockr::value(x, "value"), + min = blockr::value(x, "min"), + max = blockr::value(x, "max"), + step = blockr::value(x, "step") + ) +} + +#' @method validate_field slider_field +#' @export +validate_field.slider_field <- function(x, ...){ + x +} + +#' @method ui_update slider_field +#' @export +ui_update.slider_field <- function(x, session, id, name) { + updateSliderInput( + session, + blockr:::input_ids(x, id), + blockr:::get_field_name(x, name), + blockr::value(x), + blockr::value(x, "min"), + blockr::value(x, "max") + ) +} + + +``` + +```{r filter_block, include=FALSE} +#' Correlation plot +#' @param data dataset. +#' @param ... ignored. +#' @export +filter_in_block <- function(data, ...) { + sub_fields <- \(data, column){ + if(!length(column)) + column <- names(data)[1] + + list( + values = blockr::select_field(data[[column]][1], unique(data[[column]]), multiple = TRUE) + ) + } + + fields <- list( + column = blockr::new_select_field(\(data) names(data)[1], choices = \(data) names(data), title = "Column"), + values = blockr::new_list_field(character(), sub_fields) + ) + + blockr::new_block( + fields = fields, + expr = quote({ + vals <- unname(.(values)) + data |> + dplyr::filter(!!dplyr::sym(.(column)) %in% vals[[1]]) + }), + ..., + class = c("filter_in_block", "transform_block", "submit_block") + ) +} + + blockr::register_block( + constructor = filter_in_block, + name = "Filter in", + description = "Filter on a vector", + classes = c("filter_in_block", "transform_block"), + input = "data.frame", + output = "data.frame" + ) +``` + + +### 1. Finance: Stock Price Forecasting + +In this example, we'll create a pipeline that fetches recent stock data using the [quantmod package](https://github.com/joshuaulrich/quantmod), performs time series analysis, and forecasts future stock prices using the [Prophet model](https://github.com/facebook/prophet). + +```{r stock_example} +library(blockr) +library(quantmod) +library(prophet) + +# Custom block to fetch stock data +stock_data_block <- function(...) { + # stocks to pick (top 10) + pick_stock <- \() c("NVDA", "TSLA", "AAPL", "MSFT", "AVGO", "AMZN", "AMD", "PLTR", "TSM", "META") + + + new_block( + fields = list( + ticker = new_select_field(pick_stock()[1], pick_stock, multiple = FALSE, title = "Ticker") + ), + expr = quote({ + data_xts <- getSymbols(.(ticker), src = "yahoo", auto.assign = FALSE) + data.frame(Date = index(data_xts), coredata(data_xts)) |> + tail(700) # only considering last 700 days for this example + }), + class = c("stock_data_block", "data_block"), + ... + ) +} + +# Custom block for Prophet forecasting +prophet_forecast_block <- function(columns = character(), ...) { + all_cols <- function(data) colnames(data)[2:length(colnames(data))] + + + new_block( + fields = list( + # date_col = new_select_field(columns, all_cols, multiple=FALSE, title="Date"), + value_col = new_select_field(columns, all_cols, multiple = FALSE, title = "Value"), + periods = new_slider_field(7, min = 0, max = 365, title = "Forecast duration") + ), + expr = quote({ + + df <- data.frame( + ds = data$Date, + y = data[[.(value_col)]] + ) + model <- prophet(df) + future <- make_future_dataframe(model, periods =.( periods)) + forecast <- predict(model, future) + plot(model, forecast) + }), + class = c("prophet_forecast_block", "plot_block"), + ... + ) +} + +# Register custom blocks +register_block( + stock_data_block, + name = "Stock Data", + description = "Fetch stock data", + classes = c("stock_data_block", "data_block"), + input = NA_character_, + output = "data.frame" +) +register_block( + prophet_forecast_block, + name = "Prophet Forecast", + description = "Forecast using Prophet", + classes = c("prophet_forecast_block", "plot_block"), + input = "data.frame", + output = "plot" +) + +# Create the stack +stock_forecast_stack <- new_stack( + stock_data_block(), + prophet_forecast_block() +) + +serve_stack(stock_forecast_stack) +``` + + +### 2. Pharmaceutical: Clinical Trial Analysis + +#### 2.1 AE Forest Plot + +This forest plot visualizes the relative risk of adverse events between two treatment arms in a clinical trial. In this case, it compares "Xanomeline High Dose" to "Xanomeline Low Dose" starting from the [pharmaverseadam](https://github.com/pharmaverse/pharmaverseadam) `adae` dataset. + +```{r ae_forest} + +# Function to create adverse event forest plot +create_ae_forest_plot <- function(data, usubjid_col, arm_col, aedecod_col, n_events) { + + + # Convert column names to strings + usubjid_col <- as.character(substitute(usubjid_col)) + arm_col <- as.character(substitute(arm_col)) + aedecod_col <- as.character(substitute(aedecod_col)) + + # Calculate the total number of subjects in each arm + n_subjects <- data |> + select(all_of(c(usubjid_col, arm_col))) |> + distinct() |> + group_by(across(all_of(arm_col))) |> + summarise(n = n(), .groups = "drop") + + # Calculate AE frequencies and proportions + ae_summary <- data |> + group_by(across(all_of(c(arm_col, aedecod_col)))) |> + summarise(n_events = n_distinct(.data[[usubjid_col]]), .groups = "drop") |> + left_join(n_subjects, by = arm_col) |> + mutate(proportion = n_events / n) + + # Select top N most frequent AEs across all arms + top_aes <- ae_summary |> + group_by(across(all_of(aedecod_col))) |> + summarise(total_events = sum(n_events), .groups = "drop") |> + top_n(n_events, total_events) |> + pull(all_of(aedecod_col)) + + # Get unique treatment arms + arms <- unique(data[[arm_col]]) + if(length(arms) != 2) { + stop("This plot requires exactly two treatment arms.") + } + active_arm <- arms[1] + control_arm <- arms[2] + + # Filter for top AEs and calculate relative risk + ae_rr <- ae_summary |> + filter(.data[[aedecod_col]] %in% top_aes) |> + pivot_wider(id_cols = all_of(aedecod_col), + names_from = all_of(arm_col), + values_from = c(n_events, n, proportion)) |> + mutate( + RR = .data[[paste0("proportion_", active_arm)]] / .data[[paste0("proportion_", control_arm)]], + lower_ci = exp(log(RR) - 1.96 * sqrt( + 1/.data[[paste0("n_events_", active_arm)]] + + 1/.data[[paste0("n_events_", control_arm)]] - + 1/.data[[paste0("n_", active_arm)]] - + 1/.data[[paste0("n_", control_arm)]] + )), + upper_ci = exp(log(RR) + 1.96 * sqrt( + 1/.data[[paste0("n_events_", active_arm)]] + + 1/.data[[paste0("n_events_", control_arm)]] - + 1/.data[[paste0("n_", active_arm)]] - + 1/.data[[paste0("n_", control_arm)]] + )) + ) + + # Prepare data for forest plot + forest_data <- ae_rr |> + mutate( + label = paste0(.data[[aedecod_col]], " (", + .data[[paste0("n_events_", active_arm)]], "/", .data[[paste0("n_", active_arm)]], " vs ", + .data[[paste0("n_events_", control_arm)]], "/", .data[[paste0("n_", control_arm)]], ")") + ) + + # Create forest plot + forestplot::forestplot( + labeltext = cbind(forest_data$label, + sprintf("%.2f (%.2f-%.2f)", forest_data$RR, forest_data$lower_ci, forest_data$upper_ci)), + mean = forest_data$RR, + lower = forest_data$lower_ci, + upper = forest_data$upper_ci, + align = c("l", "r"), + graphwidth = unit(60, "mm"), + cex = 0.9, + lineheight = unit(8, "mm"), + boxsize = 0.35, + col = fpColors(box = "royalblue", line = "darkblue", summary = "royalblue"), + txt_gp = fpTxtGp(label = gpar(cex = 0.9), ticks = gpar(cex = 0.9), xlab = gpar(cex = 0.9)), + xlab = paste("Relative Risk (", active_arm, " / ", control_arm, ")"), + zero = 1, + lwd.zero = 2, + lwd.ci = 2, + xticks = c(0.5, 1, 2, 4), + grid = TRUE, + title = paste("Relative Risk of Adverse Events (", active_arm, " vs ", control_arm, ")") + ) +} + + + +forest_plot_block <- function(...) { + new_block( + fields = list( + usubjid_col = new_select_field("USUBJID", function(data) colnames(data), multiple = FALSE, title = "Subject ID Column"), + arm_col = new_select_field("ACTARM", function(data) colnames(data), multiple = FALSE, title = "Treatment Arm Column"), + aedecod_col = new_select_field("AEDECOD", function(data) colnames(data), multiple = FALSE, title = "AE Term Column"), + n_events = new_numeric_field(10, min = 5, max = 20, step = 1, title = "Number of Top AEs to Display") + ), + expr = quote({ + data |> + filter(.data[[.(arm_col)]] != "Placebo") |> + create_ae_forest_plot(.(usubjid_col), .(arm_col), .(aedecod_col), .(n_events)) + }), + class = c("adverse_event_plot_block", "plot_block"), + ... + ) +} + +# Register the custom block +register_block( + forest_plot_block, + name = "Adverse Event Forest Plot", + description = "Create a forest plot of adverse events comparing two treatment arms", + classes = c("adverse_event_plot_block", "plot_block"), + input = "data.frame", + output = "plot" +) + +# Create the stack +clinical_trial_stack <- new_stack( + new_dataset_block( + selected = "adae", + package = "pharmaverseadam" + ), + # filter_in_block(), + forest_plot_block() + +) + +serve_stack(clinical_trial_stack) +``` +#### 2.2 Demographics Table + +This demographics table is taken from the `cardinal` package of [FDA Safety Tables and Figures](https://github.com/pharmaverse/cardinal) and demonstrates `gt` and `rtables` outputs starting from the [pharmaverseadam](https://github.com/pharmaverse/pharmaverseadam) `adsl` dataset. + +```{r tables_blocks, include=FALSE} + + +#' @export +cardinal02_block <- function(data, ...){ + blockr::initialize_block(new_cardinal02_block(data, ...), data) +} + +#' @method server_output rtables_block +#' @export +server_output.rtables_block <- function (x, result, output) { + shiny::renderUI({ + txt <- utils::capture.output(print(result()$rtables)) + txt <- paste0(txt, collapse = "\n") + + if(length(result()$gt) > 0){ + shiny::tabsetPanel( + shiny::tabPanel( + "GT", + result()$gt + ), + shiny::tabPanel( + "Text", + tags$pre( + tags$code( + txt + ) + ) + ), + shiny::tabPanel( + "HTML", + rtables::as_html(result()$rtables, class_table = "table") + ) + ) + } else { + shiny::tabsetPanel( + shiny::tabPanel( + "Text", + tags$pre( + tags$code( + txt + ) + ) + ), + shiny::tabPanel( + "HTML", + rtables::as_html(result()$rtables, class_table = "table") + ) + ) + + } + }) +} + +#' @method uiOutputBlock rtables_block +#' @export +uiOutputBlock.rtables_block <- function (x, ns) { + shiny::uiOutput(ns("res")) +} + +#' @method evaluate_block rtables_block +#' @export +evaluate_block.rtables_block <- function (x, data, ...) { + stopifnot(...length() == 0L) + eval(substitute(data %>% expr, list(expr = generate_code(x))), + list(data = data)) +} + +#' @method generate_server rtables_block +#' @export +generate_server.rtables_block <- function (...) { + blockr:::generate_server_block(...) +} + +#' @method block_combiner rtables_block +#' @export +block_combiner.rtables_block <- function (left, right, ...) { + substitute(left %>% right, list(left = generate_code(left), + right = generate_code(right))) +} + + +``` + + + +```{r cardinal_tables} +library(shiny) +library(blockr) +library(pharmaverseadam) + + +cardinal02_block <- function( + data, + ... +){ + + all_cols <- function(data) colnames(data) + + fields <- list( + columns = new_select_field(c("SEX", "AGE", "AGEGR1", "RACE", "ETHNIC", "COUNTRY"), all_cols, multiple = TRUE, title = "Variables") + ) + + expr <- quote({ + data <- droplevels(data) + + rtables <- cardinal::make_table_02( + df = data, + vars = .(columns) + ) + + gt <- cardinal::make_table_02_gtsum( + df = data, + vars = .(columns) + ) + + list( + rtables = rtables, + gt = gt + ) + }) + + blockr::new_block( + expr = expr, + fields = fields, + ..., + class = c("cardinal02_block", "rtables_block", "submit_block") + ) +} + + +blockr::register_block( + cardinal02_block, + "Cardinal 02", + "A Cardinal 02 table", + input = "data.frame", + output = "list", + classes = c("cardinal02_block", "rtables_block", "submit_block") +) + + +# Create the stack +rtables_stack <- new_stack( + blockr::new_dataset_block( package="pharmaverseadam", + selected = "adsl", + ), + cardinal02_block() +) + +serve_stack(rtables_stack) + +``` + + + + + +### 3. Environmental Science: Air Quality Analysis and Prediction + +This example demonstrates a pipeline for analyzing air quality data and predicting future pollution levels using actual data from the [openair](https://github.com/cran/openair) package. +This pipeline imports actual air quality data from the `openair` package and forecasts future pollution levels using an ARIMA model. + +```{r environment} +library(blockr) +library(openair) + + +# Custom block for air quality data import +air_quality_block <- function(...) { + new_block( + fields = list( + site = new_select_field("kc1", \()openair::importMeta()$code, multiple = FALSE, title = "Monitoring Site"), + start_year = new_numeric_field(2020, min = 1990, max = as.numeric(format(Sys.Date(), "%Y")), step = 1, title = "Start Year"), + end_year = new_numeric_field(as.numeric(format(Sys.Date(), "%Y")), min = 1990, max = as.numeric(format(Sys.Date(), "%Y")), step = 1, title = "End Year") + ), + expr = quote({ + importAURN(site = .(site), year = .(start_year):.(end_year)) |> tail(700) + }), + class = c("air_quality_block", "data_block"), + ... + ) +} + + +# Custom block for pollution forecasting +pollution_forecast_block <- function(columns = character(), ...) { + all_cols <- function(data) setdiff(colnames(data), c("date", "site", "source")) + + new_block( + fields = list( + pollutant = new_select_field(columns, all_cols, multiple = FALSE, title = "Pollutant"), + horizon = new_slider_field(30, min = 1, max = 365, step = 1, title = "Forecast Horizon (days)") + ), + expr = quote({ + ts_data <- ts(na.omit(data[[.(pollutant)]]), frequency = 365) + model <- auto.arima(ts_data) + forecast_result <- forecast(model, h = .(horizon)) + plot(forecast_result, main = paste("Forecast of", .(pollutant), "levels")) + }), + class = c("pollution_forecast_block", "plot_block"), + ... + ) +} + +# Register custom blocks +register_block(air_quality_block, name = "Air Quality Data", description = "Import air quality data", classes = c("air_quality_block", "data_block"), input = NA_character_, output = "data.frame") + +register_block(pollution_forecast_block, name = "Pollution Forecast", description = "Forecast pollution levels", classes = c("pollution_forecast_block", "plot_block"), input = "data.frame", output = "plot") + +# Create the stack +air_quality_stack <- new_stack( + air_quality_block(), + pollution_forecast_block(columns="no2") +) + +serve_stack(air_quality_stack) +``` +### 4. Marketing: Causal Impact Analysis of Marketing Interventions + +This example demonstrates how to use [CausalImpact](https://google.github.io/CausalImpact/CausalImpact.html) to analyze the effect of marketing interventions on sales data. +This pipeline generates dummy marketing data with an intervention, then uses CausalImpact to analyze the effect of the intervention on sales. + +```{r date_field, include=FALSE} +new_date_field <- function(value = Sys.Date(), min = NULL, max = NULL, ...) { + blockr::new_field( + value = value, + min = min, + max = max, + ..., + class = "date_field" + ) +} + +date_field <- function(...) { + validate_field(new_date_field(...)) +} + +#' @method ui_input date_field +#' @export +ui_input.date_field <- function(x, id, name) { + ns <- NS(input_ids(x, id)) + + shiny::dateInput( + blockr:::input_ids(x, id), + name, + value = blockr::value(x, "value"), + min = blockr::value(x, "min"), + max = blockr::value(x, "max") + ) +} + +#' @method validate_field date_field +#' @export +validate_field.date_field <- function(x, ...) { + x +} + +#' @method ui_update date_field +#' @export +ui_update.date_field <- function(x, session, id, name) { + updateDateInput( + session, + blockr:::input_ids(x, id), + blockr:::get_field_name(x, name), + value = blockr::value(x), + min = blockr::value(x, "min"), + max = blockr::value(x, "max") + ) +} +``` + + + + +```{r intervention_impact} +library(blockr) +library(CausalImpact) +library(dplyr) + +# Custom block to load and prepare marketing data +marketing_data_block <- function(...) { + new_block( + fields = list( + start_date = date_field(Sys.Date() - 365, min = Sys.Date() - 730, max = Sys.Date() - 1, label = "Start Date"), + intervention_date = date_field(Sys.Date() - 180, min = Sys.Date() - 729, max = Sys.Date(), label = "Intervention Date"), + end_date = date_field(Sys.Date(), min = Sys.Date() - 364, max = Sys.Date(), label = "End Date") + ), + expr = quote({ + # Generate dummy data for demonstration + dates <- seq(as.Date(.(start_date)), as.Date(.(end_date)), by = "day") + sales <- cumsum(rnorm(length(dates), mean = 100, sd = 10)) + ad_spend <- cumsum(rnorm(length(dates), mean = 50, sd = 5)) + + # Add intervention effect + intervention_index <- which(dates == as.Date(.(intervention_date))) + sales[intervention_index:length(sales)] <- sales[intervention_index:length(sales)] * 1.2 + + data.frame( + date = dates, + sales = sales, + ad_spend = ad_spend + ) + }), + class = c("marketing_data_block", "data_block"), + ... + ) +} + +# Custom block for CausalImpact analysis +causal_impact_block <- function(columns = character(), ...) { + all_cols <- function(data) colnames(data)[2:length(colnames(data))] + + new_block( + fields = list( + response_var = new_select_field(columns, all_cols, multiple = FALSE, title = "Response Variable"), + covariate_var = new_select_field(columns, all_cols, multiple = FALSE, title = "Covariate Variable"), + pre_period_end = date_field(Sys.Date() - 181, min = Sys.Date() - 729, max = Sys.Date() - 1, label = "Pre-Period End Date"), + post_period_start = date_field(Sys.Date() - 180, min = Sys.Date() - 728, max = Sys.Date(), label = "Post-Period Start Date") + ), + expr = quote({ + data <- data.frame( + y = data[[.(response_var)]], + x = data[[.(covariate_var)]] + ) + + pre_period <- c(min(data$date), as.Date(.(pre_period_end))) + post_period <- c(as.Date(.(post_period_start)), max(data$date)) + + impact <- CausalImpact(data, pre_period, post_period) + plot(impact) + }), + class = c("causal_impact_block", "plot_block"), + ... + ) +} + +# Register custom blocks +register_block( + new_marketing_data_block, + name = "Marketing Data", + description = "Load and prepare marketing data", + classes = c("marketing_data_block", "data_block"), + input = NA_character_, + output = "data.frame" +) +register_block( + new_causal_impact_block, + name = "Causal Impact Analysis", + description = "Perform Causal Impact analysis on marketing data", + classes = c("causal_impact_block", "plot_block"), + input = "data.frame", + output = "plot" +) + +# Create the stack +marketing_impact_stack <- new_stack( + marketing_data_block(), + causal_impact_block() +) + +serve_stack(marketing_impact_stack) +``` \ No newline at end of file