From 217f5342e0dd40809324c96307f090012bb9aad2 Mon Sep 17 00:00:00 2001
From: tarapk <karma.tarap@bms.com>
Date: Tue, 9 Jul 2024 11:04:19 +0000
Subject: [PATCH] Examples file, examples 1 and 2 work, some issues with 3 and
 4

---
 vignettes/blockr_examples.Rmd | 716 ++++++++++++++++++++++++++++++++++
 1 file changed, 716 insertions(+)
 create mode 100644 vignettes/blockr_examples.Rmd

diff --git a/vignettes/blockr_examples.Rmd b/vignettes/blockr_examples.Rmd
new file mode 100644
index 00000000..1417c069
--- /dev/null
+++ b/vignettes/blockr_examples.Rmd
@@ -0,0 +1,716 @@
+---
+title: "blockr: Flexible Data Pipeline Building for Everyone"
+output: html_document
+date: "2024-07-07"
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+## Why blockr?
+
+`blockr` is an R package designed to democratize data analysis by providing a flexible, intuitive, and code-free approach to building data pipelines. It allows users to create powerful data workflows using pre-built blocks that can be easily connected and customized, all without writing a single line of code.
+
+### Key Features
+
+1. **User-Friendly Interface**: Build data pipelines using a drag-and-drop interface.
+2. **Flexibility**: Easily add, remove, or rearrange blocks in your pipeline.
+3. **Extensibility**: Developers can create custom blocks to extend functionality.
+4. **Reproducibility**: Pipelines created with `blockr` are easily shareable and reproducible, with exportable code.
+5. **Interactivity**: Real-time feedback as you build and modify your pipeline.
+
+## blockr Across Industries
+
+The flexibility of `blockr` makes it valuable across various industries. Let's explore how it can be applied in different sectors with detailed examples:
+
+
+
+```{r slider_block, include=FALSE}
+# Provided by John, new_range_field not working
+new_slider_field <- function(value = 5L, min = 0L, max = 10L, step = 1L, ...){
+  blockr::new_field(
+    value = value,
+    min = min,
+    max = max,
+    ...,
+    class = "slider_field"
+  )
+}
+slider_field <- function(...){
+  validate_field(new_slider_field(...))
+}
+
+#' @method ui_input slider_field
+#' @export
+ui_input.slider_field <- function(x, id, name){
+  ns <- NS(input_ids(x, id))
+
+  shiny::sliderInput(
+    blockr:::input_ids(x, id),
+    name,
+    value = blockr::value(x, "value"),
+    min = blockr::value(x, "min"),
+    max = blockr::value(x, "max"),
+    step = blockr::value(x, "step")
+  )
+}
+
+#' @method validate_field slider_field
+#' @export
+validate_field.slider_field <- function(x, ...){
+  x
+}
+
+#' @method ui_update slider_field
+#' @export
+ui_update.slider_field <- function(x, session, id, name) {
+  updateSliderInput(
+    session,
+    blockr:::input_ids(x, id),
+    blockr:::get_field_name(x, name),
+    blockr::value(x),
+    blockr::value(x, "min"),
+    blockr::value(x, "max")
+  )
+}
+
+
+```
+
+```{r filter_block, include=FALSE}
+#' Correlation plot
+#' @param data dataset.
+#' @param ... ignored.
+#' @export
+filter_in_block <- function(data, ...) {
+  sub_fields <- \(data, column){
+    if(!length(column))
+      column <- names(data)[1]
+
+    list(
+      values = blockr::select_field(data[[column]][1], unique(data[[column]]), multiple = TRUE)
+    )
+  }
+
+  fields <- list(
+    column = blockr::new_select_field(\(data) names(data)[1], choices = \(data) names(data), title = "Column"),
+    values = blockr::new_list_field(character(), sub_fields)
+  )
+
+  blockr::new_block(
+    fields = fields,
+    expr = quote({
+      vals <- unname(.(values))
+      data |>
+        dplyr::filter(!!dplyr::sym(.(column)) %in% vals[[1]])
+    }),
+    ...,
+    class = c("filter_in_block", "transform_block", "submit_block")
+  )
+}
+
+  blockr::register_block(
+    constructor = filter_in_block,
+    name = "Filter in",
+    description = "Filter on a vector",
+    classes = c("filter_in_block", "transform_block"),
+    input = "data.frame",
+    output = "data.frame"
+  )
+```
+
+
+### 1. Finance: Stock Price Forecasting
+
+In this example, we'll create a pipeline that fetches recent stock data using the [quantmod package](https://github.com/joshuaulrich/quantmod), performs time series analysis, and forecasts future stock prices using the [Prophet model](https://github.com/facebook/prophet).
+
+```{r stock_example}
+library(blockr)
+library(quantmod)
+library(prophet)
+
+# Custom block to fetch stock data
+stock_data_block <- function(...) {
+  # stocks to pick (top 10)
+  pick_stock <- \()  c("NVDA", "TSLA", "AAPL", "MSFT", "AVGO", "AMZN", "AMD", "PLTR", "TSM", "META")
+
+
+  new_block(
+    fields = list(
+      ticker = new_select_field(pick_stock()[1], pick_stock, multiple = FALSE, title = "Ticker")
+    ),
+    expr = quote({
+      data_xts <- getSymbols(.(ticker), src = "yahoo", auto.assign = FALSE)
+      data.frame(Date = index(data_xts), coredata(data_xts)) |>
+        tail(700) # only considering last 700 days for this example
+    }),
+    class = c("stock_data_block", "data_block"),
+    ...
+  )
+}
+
+# Custom block for Prophet forecasting
+prophet_forecast_block <- function(columns = character(), ...) {
+  all_cols <- function(data) colnames(data)[2:length(colnames(data))]
+
+
+  new_block(
+    fields = list(
+      # date_col = new_select_field(columns, all_cols, multiple=FALSE, title="Date"),
+      value_col = new_select_field(columns, all_cols, multiple = FALSE, title = "Value"),
+      periods = new_slider_field(7, min = 0, max = 365, title = "Forecast duration")
+    ),
+    expr = quote({
+    
+      df <- data.frame(
+        ds = data$Date,
+        y = data[[.(value_col)]]
+      )
+      model <- prophet(df)
+      future <- make_future_dataframe(model, periods =.( periods))
+      forecast <- predict(model, future)
+      plot(model, forecast)
+    }),
+    class = c("prophet_forecast_block", "plot_block"),
+    ...
+  )
+}
+
+# Register custom blocks
+register_block(
+  stock_data_block,
+  name = "Stock Data",
+  description = "Fetch stock data",
+  classes = c("stock_data_block", "data_block"),
+  input = NA_character_,
+  output = "data.frame"
+)
+register_block(
+  prophet_forecast_block,
+  name = "Prophet Forecast",
+  description = "Forecast using Prophet",
+  classes = c("prophet_forecast_block", "plot_block"),
+  input = "data.frame",
+  output = "plot"
+)
+
+# Create the stack
+stock_forecast_stack <- new_stack(
+  stock_data_block(),
+  prophet_forecast_block()
+)
+
+serve_stack(stock_forecast_stack)
+```
+ 
+
+### 2. Pharmaceutical: Clinical Trial Analysis
+
+#### 2.1 AE Forest Plot
+
+This forest plot visualizes the relative risk of adverse events between two treatment arms in a clinical trial. In this case, it compares "Xanomeline High Dose" to "Xanomeline Low Dose" starting from the [pharmaverseadam](https://github.com/pharmaverse/pharmaverseadam) `adae` dataset.
+
+```{r ae_forest}
+
+# Function to create adverse event forest plot
+create_ae_forest_plot <- function(data, usubjid_col, arm_col, aedecod_col, n_events) {
+
+  
+    # Convert column names to strings
+    usubjid_col <- as.character(substitute(usubjid_col))
+    arm_col <- as.character(substitute(arm_col))
+    aedecod_col <- as.character(substitute(aedecod_col))
+    
+    # Calculate the total number of subjects in each arm
+    n_subjects <- data |>
+        select(all_of(c(usubjid_col, arm_col))) |>
+        distinct() |>
+        group_by(across(all_of(arm_col))) |>
+        summarise(n = n(), .groups = "drop")
+    
+    # Calculate AE frequencies and proportions
+    ae_summary <- data |>
+        group_by(across(all_of(c(arm_col, aedecod_col)))) |>
+        summarise(n_events = n_distinct(.data[[usubjid_col]]), .groups = "drop") |>
+        left_join(n_subjects, by = arm_col) |>
+        mutate(proportion = n_events / n)
+    
+    # Select top N most frequent AEs across all arms
+    top_aes <- ae_summary |>
+        group_by(across(all_of(aedecod_col))) |>
+        summarise(total_events = sum(n_events), .groups = "drop") |>
+        top_n(n_events, total_events) |>
+        pull(all_of(aedecod_col))
+    
+    # Get unique treatment arms
+    arms <- unique(data[[arm_col]])
+    if(length(arms) != 2) {
+        stop("This plot requires exactly two treatment arms.")
+    }
+    active_arm <- arms[1]
+    control_arm <- arms[2]
+    
+    # Filter for top AEs and calculate relative risk
+    ae_rr <- ae_summary |>
+        filter(.data[[aedecod_col]] %in% top_aes) |>
+        pivot_wider(id_cols = all_of(aedecod_col), 
+                    names_from = all_of(arm_col), 
+                    values_from = c(n_events, n, proportion)) |>
+        mutate(
+            RR = .data[[paste0("proportion_", active_arm)]] / .data[[paste0("proportion_", control_arm)]],
+            lower_ci = exp(log(RR) - 1.96 * sqrt(
+                1/.data[[paste0("n_events_", active_arm)]] + 
+                    1/.data[[paste0("n_events_", control_arm)]] - 
+                    1/.data[[paste0("n_", active_arm)]] - 
+                    1/.data[[paste0("n_", control_arm)]]
+            )),
+            upper_ci = exp(log(RR) + 1.96 * sqrt(
+                1/.data[[paste0("n_events_", active_arm)]] + 
+                    1/.data[[paste0("n_events_", control_arm)]] - 
+                    1/.data[[paste0("n_", active_arm)]] - 
+                    1/.data[[paste0("n_", control_arm)]]
+            ))
+        )
+    
+    # Prepare data for forest plot
+    forest_data <- ae_rr |>
+        mutate(
+            label = paste0(.data[[aedecod_col]], " (", 
+                           .data[[paste0("n_events_", active_arm)]], "/", .data[[paste0("n_", active_arm)]], " vs ",
+                           .data[[paste0("n_events_", control_arm)]], "/", .data[[paste0("n_", control_arm)]], ")")
+        )
+    
+    # Create forest plot
+    forestplot::forestplot(
+        labeltext = cbind(forest_data$label, 
+                          sprintf("%.2f (%.2f-%.2f)", forest_data$RR, forest_data$lower_ci, forest_data$upper_ci)),
+        mean = forest_data$RR,
+        lower = forest_data$lower_ci,
+        upper = forest_data$upper_ci,
+        align = c("l", "r"),
+        graphwidth = unit(60, "mm"),
+        cex = 0.9,
+        lineheight = unit(8, "mm"),
+        boxsize = 0.35,
+        col = fpColors(box = "royalblue", line = "darkblue", summary = "royalblue"),
+        txt_gp = fpTxtGp(label = gpar(cex = 0.9), ticks = gpar(cex = 0.9), xlab = gpar(cex = 0.9)),
+        xlab = paste("Relative Risk (", active_arm, " / ", control_arm, ")"),
+        zero = 1,
+        lwd.zero = 2,
+        lwd.ci = 2,
+        xticks = c(0.5, 1, 2, 4),
+        grid = TRUE,
+        title = paste("Relative Risk of Adverse Events (", active_arm, " vs ", control_arm, ")")
+    )
+}
+ 
+ 
+
+forest_plot_block <- function(...) {
+  new_block(
+    fields = list(
+      usubjid_col = new_select_field("USUBJID", function(data) colnames(data), multiple = FALSE, title = "Subject ID Column"),
+      arm_col = new_select_field("ACTARM", function(data) colnames(data), multiple = FALSE, title = "Treatment Arm Column"),
+      aedecod_col = new_select_field("AEDECOD", function(data) colnames(data), multiple = FALSE, title = "AE Term Column"),
+      n_events = new_numeric_field(10, min = 5, max = 20, step = 1, title = "Number of Top AEs to Display")
+    ),
+    expr = quote({
+      data |> 
+      filter(.data[[.(arm_col)]] != "Placebo") |> 
+      create_ae_forest_plot(.(usubjid_col), .(arm_col), .(aedecod_col), .(n_events))
+    }),
+    class = c("adverse_event_plot_block", "plot_block"),
+    ...
+  )
+}
+
+# Register the custom block
+register_block(
+  forest_plot_block,
+  name = "Adverse Event Forest Plot",
+  description = "Create a forest plot of adverse events comparing two treatment arms",
+  classes = c("adverse_event_plot_block", "plot_block"),
+  input = "data.frame",
+  output = "plot"
+)
+
+# Create the stack
+clinical_trial_stack <- new_stack(
+  new_dataset_block(
+    selected = "adae",
+    package = "pharmaverseadam"
+  ),
+ # filter_in_block(),
+  forest_plot_block() 
+  
+)
+
+serve_stack(clinical_trial_stack)
+```
+#### 2.2 Demographics Table
+
+This demographics table is taken from the `cardinal` package of [FDA Safety Tables and Figures](https://github.com/pharmaverse/cardinal) and demonstrates `gt` and `rtables` outputs starting from the [pharmaverseadam](https://github.com/pharmaverse/pharmaverseadam) `adsl` dataset.
+
+```{r tables_blocks, include=FALSE}
+
+
+#' @export
+cardinal02_block <- function(data, ...){
+  blockr::initialize_block(new_cardinal02_block(data, ...), data)
+}
+
+#' @method server_output rtables_block
+#' @export
+server_output.rtables_block <- function (x, result, output) {
+    shiny::renderUI({
+        txt <- utils::capture.output(print(result()$rtables))
+        txt <- paste0(txt, collapse = "\n")
+        
+        if(length(result()$gt) > 0){
+            shiny::tabsetPanel(
+                shiny::tabPanel(
+                    "GT",
+                    result()$gt
+                ),
+                shiny::tabPanel(
+                    "Text",
+                    tags$pre(
+                        tags$code(
+                            txt
+                        )
+                    )
+                ),
+                shiny::tabPanel(
+                    "HTML",
+                    rtables::as_html(result()$rtables, class_table = "table")
+                )
+            )
+        } else {
+            shiny::tabsetPanel(
+                shiny::tabPanel(
+                    "Text",
+                    tags$pre(
+                        tags$code(
+                            txt
+                        )
+                    )
+                ),
+                shiny::tabPanel(
+                    "HTML",
+                    rtables::as_html(result()$rtables, class_table = "table")
+                )
+            )
+            
+        }
+    })
+}
+
+#' @method uiOutputBlock rtables_block
+#' @export
+uiOutputBlock.rtables_block <- function (x, ns) {
+    shiny::uiOutput(ns("res"))
+}
+
+#' @method evaluate_block rtables_block
+#' @export
+evaluate_block.rtables_block <- function (x, data, ...) {
+    stopifnot(...length() == 0L)
+    eval(substitute(data %>% expr, list(expr = generate_code(x))), 
+         list(data = data))
+}
+
+#' @method generate_server rtables_block
+#' @export
+generate_server.rtables_block <- function (...) {
+    blockr:::generate_server_block(...)
+}
+
+#' @method block_combiner rtables_block
+#' @export
+block_combiner.rtables_block <- function (left, right, ...) {
+    substitute(left %>% right, list(left = generate_code(left), 
+                                    right = generate_code(right)))
+}
+
+
+```
+
+
+
+```{r cardinal_tables}
+library(shiny)
+library(blockr)
+library(pharmaverseadam)
+
+
+cardinal02_block <- function(
+    data,
+    ...
+){
+ 
+  all_cols <- function(data) colnames(data)
+  
+  fields <- list(
+    columns = new_select_field(c("SEX", "AGE", "AGEGR1", "RACE", "ETHNIC", "COUNTRY"), all_cols, multiple = TRUE, title = "Variables")
+  )
+  
+  expr <- quote({
+    data <- droplevels(data)
+    
+    rtables <- cardinal::make_table_02(
+      df = data,
+      vars = .(columns)
+    )
+    
+    gt <- cardinal::make_table_02_gtsum(
+      df = data,
+      vars = .(columns)
+    )
+    
+    list(
+      rtables = rtables,
+      gt = gt
+    )
+  })
+  
+  blockr::new_block(
+    expr = expr,
+    fields = fields,
+    ...,
+    class = c("cardinal02_block", "rtables_block", "submit_block")
+  )
+}
+
+
+blockr::register_block(
+  cardinal02_block,
+  "Cardinal 02",
+  "A Cardinal 02 table",
+  input = "data.frame",
+  output = "list",
+  classes = c("cardinal02_block", "rtables_block", "submit_block")
+)
+  
+
+# Create the stack
+rtables_stack <- new_stack(
+    blockr::new_dataset_block( package="pharmaverseadam",
+        selected = "adsl",
+    ),
+    cardinal02_block() 
+)
+
+serve_stack(rtables_stack) 
+
+```
+
+
+
+
+ 
+### 3. Environmental Science: Air Quality Analysis and Prediction
+
+This example demonstrates a pipeline for analyzing air quality data and predicting future pollution levels using actual data from the [openair](https://github.com/cran/openair) package.
+This pipeline imports actual air quality data from the `openair` package and forecasts future pollution levels using an ARIMA model.
+
+```{r environment}
+library(blockr)
+library(openair)
+
+
+# Custom block for air quality data import
+air_quality_block <- function(...) {
+  new_block(
+    fields = list(
+      site = new_select_field("kc1", \()openair::importMeta()$code, multiple = FALSE, title = "Monitoring Site"),
+      start_year = new_numeric_field(2020, min = 1990, max = as.numeric(format(Sys.Date(), "%Y")), step = 1, title = "Start Year"),
+      end_year = new_numeric_field(as.numeric(format(Sys.Date(), "%Y")), min = 1990, max = as.numeric(format(Sys.Date(), "%Y")), step = 1, title = "End Year")
+    ),
+    expr = quote({
+      importAURN(site = .(site), year = .(start_year):.(end_year)) |> tail(700)
+    }),
+    class = c("air_quality_block", "data_block"),
+    ...
+  )
+}
+ 
+
+# Custom block for pollution forecasting
+pollution_forecast_block <- function(columns = character(), ...) {
+  all_cols <- function(data) setdiff(colnames(data), c("date", "site", "source"))
+  
+  new_block(
+    fields = list(
+      pollutant = new_select_field(columns, all_cols, multiple = FALSE, title = "Pollutant"),
+      horizon = new_slider_field(30, min = 1, max = 365, step = 1, title = "Forecast Horizon (days)")
+    ),
+    expr = quote({
+      ts_data <- ts(na.omit(data[[.(pollutant)]]), frequency = 365)
+      model <- auto.arima(ts_data)
+      forecast_result <- forecast(model, h = .(horizon))
+      plot(forecast_result, main = paste("Forecast of", .(pollutant), "levels"))
+    }),
+    class = c("pollution_forecast_block", "plot_block"),
+    ...
+  )
+}
+
+# Register custom blocks
+register_block(air_quality_block, name = "Air Quality Data", description = "Import air quality data", classes = c("air_quality_block", "data_block"), input = NA_character_, output = "data.frame")
+
+register_block(pollution_forecast_block, name = "Pollution Forecast", description = "Forecast pollution levels", classes = c("pollution_forecast_block", "plot_block"), input = "data.frame", output = "plot")
+
+# Create the stack
+air_quality_stack <- new_stack(
+  air_quality_block(),
+  pollution_forecast_block(columns="no2")
+)
+
+serve_stack(air_quality_stack)
+```
+### 4. Marketing: Causal Impact Analysis of Marketing Interventions
+
+This example demonstrates how to use [CausalImpact](https://google.github.io/CausalImpact/CausalImpact.html) to analyze the effect of marketing interventions on sales data. 
+This pipeline generates dummy marketing data with an intervention, then uses CausalImpact to analyze the effect of the intervention on sales.
+
+```{r date_field, include=FALSE}
+new_date_field <- function(value = Sys.Date(), min = NULL, max = NULL, ...) {
+    blockr::new_field(
+        value = value,
+        min = min,
+        max = max,
+        ...,
+        class = "date_field"
+    )
+}
+
+date_field <- function(...) {
+    validate_field(new_date_field(...))
+}
+
+#' @method ui_input date_field
+#' @export
+ui_input.date_field <- function(x, id, name) {
+    ns <- NS(input_ids(x, id))
+    
+    shiny::dateInput(
+        blockr:::input_ids(x, id),
+        name,
+        value = blockr::value(x, "value"),
+        min = blockr::value(x, "min"),
+        max = blockr::value(x, "max")
+    )
+}
+
+#' @method validate_field date_field
+#' @export
+validate_field.date_field <- function(x, ...) {
+    x
+}
+
+#' @method ui_update date_field
+#' @export
+ui_update.date_field <- function(x, session, id, name) {
+    updateDateInput(
+        session,
+        blockr:::input_ids(x, id),
+        blockr:::get_field_name(x, name),
+        value = blockr::value(x),
+        min = blockr::value(x, "min"),
+        max = blockr::value(x, "max")
+    )
+}
+```
+
+
+
+
+```{r intervention_impact}
+library(blockr)
+library(CausalImpact)
+library(dplyr)
+
+# Custom block to load and prepare marketing data
+marketing_data_block <- function(...) {
+    new_block(
+        fields = list(
+            start_date = date_field(Sys.Date() - 365, min = Sys.Date() - 730, max = Sys.Date() - 1, label = "Start Date"),
+            intervention_date = date_field(Sys.Date() - 180, min = Sys.Date() - 729, max = Sys.Date(), label = "Intervention Date"),
+            end_date = date_field(Sys.Date(), min = Sys.Date() - 364, max = Sys.Date(), label = "End Date")
+        ),
+        expr = quote({
+            # Generate dummy data for demonstration
+            dates <- seq(as.Date(.(start_date)), as.Date(.(end_date)), by = "day")
+            sales <- cumsum(rnorm(length(dates), mean = 100, sd = 10))
+            ad_spend <- cumsum(rnorm(length(dates), mean = 50, sd = 5))
+            
+            # Add intervention effect
+            intervention_index <- which(dates == as.Date(.(intervention_date)))
+            sales[intervention_index:length(sales)] <- sales[intervention_index:length(sales)] * 1.2
+            
+            data.frame(
+                date = dates,
+                sales = sales,
+                ad_spend = ad_spend
+            )
+        }),
+        class = c("marketing_data_block", "data_block"),
+        ...
+    )
+}
+
+# Custom block for CausalImpact analysis
+causal_impact_block <- function(columns = character(), ...) {
+    all_cols <- function(data) colnames(data)[2:length(colnames(data))]
+    
+    new_block(
+        fields = list(
+            response_var = new_select_field(columns, all_cols, multiple = FALSE, title = "Response Variable"),
+            covariate_var = new_select_field(columns, all_cols, multiple = FALSE, title = "Covariate Variable"),
+            pre_period_end = date_field(Sys.Date() - 181, min = Sys.Date() - 729, max = Sys.Date() - 1, label = "Pre-Period End Date"),
+            post_period_start = date_field(Sys.Date() - 180, min = Sys.Date() - 728, max = Sys.Date(), label = "Post-Period Start Date")
+        ),
+        expr = quote({
+            data <- data.frame(
+                y = data[[.(response_var)]],
+                x = data[[.(covariate_var)]]
+            )
+            
+            pre_period <- c(min(data$date), as.Date(.(pre_period_end)))
+            post_period <- c(as.Date(.(post_period_start)), max(data$date))
+            
+            impact <- CausalImpact(data, pre_period, post_period)
+            plot(impact)
+        }),
+        class = c("causal_impact_block", "plot_block"),
+        ...
+    )
+}
+
+# Register custom blocks
+register_block(
+    new_marketing_data_block,
+    name = "Marketing Data",
+    description = "Load and prepare marketing data",
+    classes = c("marketing_data_block", "data_block"),
+    input = NA_character_,
+    output = "data.frame"
+)
+register_block(
+    new_causal_impact_block,
+    name = "Causal Impact Analysis",
+    description = "Perform Causal Impact analysis on marketing data",
+    classes = c("causal_impact_block", "plot_block"),
+    input = "data.frame",
+    output = "plot"
+)
+
+# Create the stack
+marketing_impact_stack <- new_stack(
+    marketing_data_block(),
+    causal_impact_block()
+)
+
+serve_stack(marketing_impact_stack)
+```
\ No newline at end of file