♻️ move more out of habit_plots.R and add run_tests.R

LabNeuroCogDevel · Dec 10, 2023 · baa22b0 · baa22b0
1 parent fa0d7f6
commit baa22b0
Show file tree

Hide file tree

Showing 4 changed files with 126 additions and 47 deletions.
diff --git a/results/Makefile b/results/Makefile
@@ -4,6 +4,8 @@
 .ALWAYS: /Volumes/Hera/Projects/Habit/mr/habit/txt/all_habit.json
 
 all: lab.summary.csv # prev data_all.tsv, no longer need to update heroku.summary.csv
+check:
+	Rscript run_tests.R
 
 #mkifdiff from lncdtools
 heroku.raw.json: .ALWAYS

diff --git a/results/habit_plots.R b/results/habit_plots.R
@@ -6,43 +6,6 @@ source('read_raw.R') # read_raw()
 theme_set(theme_cowplot())
 select <- dplyr::select
 
-side_label_to_num <- function(side) ifelse(side == 'left', 1, ifelse(side=='up', 2, ifelse(side=='right', 3, NA)))
-blocktype_to_num <- function(blocktype)
-    ifelse(   blocktype == 'init',                1,
-     ifelse(  blocktype == 'switch1',             2,
-      ifelse( blocktype %in% c("rev2","switch2"), 3,
-       ifelse(grepl('devalue', blocktype),        4,
-                                                  NA))))
-
-# make sure we get the blocks correctly
-test_blocktypenum<-function(){
-    testthat::expect_equal(blocktype_to_num('switch1'), 2)
-    testthat::expect_equal(blocktype_to_num('devalue_all_100'), 4)
-    testthat::expect_equal(blocktype_to_num('devalue_good_75'), 4)
-    testthat::expect_equal(blocktype_to_num(c('switch1','devalue_all_low')), c(2,4))
-    testthat::expect_false(rawdata$blocktype %>% unique %>% blocktype_to_num %>% is.na %>% any)
-}
-
-true.na <- function(x) !is.na(x) & x
-add_choice_cols <- function(data) {
-   data %>%
-   mutate(
-     choiceWell  = side_label_to_num(picked),
-     avoidedWell = side_label_to_num(avoided),
-     # what wells be chosen
-     farAvailable = choiceWell == farWell | avoidedWell == farWell,
-     initHighAval = choiceWell == initHigh | avoidedWell == initHigh,
-     # only care about high and best decisions
-     # NA needed for subsetting? cant use e.g.: choseFar = farAvailable & choiceWell == farWell,
-     choseFar        = ifelse(farAvailable, choiceWell == farWell, NA),
-     avoidedFar      = ifelse(farAvailable, avoidedWell == farWell, NA),
-     choseInitHigh   = ifelse(initHighAval, choiceWell == initHigh,NA),
-     avoidedInitHigh = ifelse(initHighAval, avoidedWell == initHigh,NA),
-     # 
-     blocknum = blocktype_to_num(blocktype),
-     choiceType = ifelse(true.na(choseFar), 'Far', ifelse(true.na(choseInitHigh), 'InitHigh', 'InitLow')))
-}
-
 read_summary <- function() read.csv("lab.summary.csv", comment.char="")
 
 

diff --git a/results/read_raw.R b/results/read_raw.R
@@ -1,8 +1,69 @@
-read_raw <- function(fname="lab.data.tsv") {
+# sourced from habit_plots.R.
+# reads output of read.R:fix_and_save(). likely "lab.data.tsv"
+# see Makefile
+#
+# 20231210WF - pull out from habit_plots.R to work on bad merge (FC fixes)
+#
+# NOTES:
+# originally the task had a "far" that was always "good" (prob=highest)
+# now "far" means constant-highest
+# 
+# one version of the task devalued all to a lower prob even the "far"/"good" well: "deval_all_low"
+#  we nowe "devalue" by setting all wells to 100% probability: "devalue_all_100"
+pacman::p_load(tidyr, dplyr)
+
+# exclude testing runs (IDs with our initials, or 'x')
+BAD_IDS <- c("WWF|ACP|^x$")
+MIN_TRIALS <- 90
+
+side_label_to_num <- function(side) ifelse(side == 'left', 1, ifelse(side=='up', 2, ifelse(side=='right', 3, NA)))
+
+# "harmonize" blocks to run order: parse known names to 1-4
+# devaule always last
+# TODO: rewrite with case_when?
+blocktype_to_num <- function(blocktype)
+    ifelse(   blocktype == 'init',                1,
+     ifelse(  blocktype == 'switch1',             2,
+      ifelse( blocktype %in% c("rev2","switch2"), 3,
+       ifelse(grepl('devalue', blocktype),        4,
+                                                  NA))))
+
+# make sure we get the blocks correctly
+test_blocktypenum<-function(){
+    testthat::expect_equal(blocktype_to_num('switch1'), 2)
+    testthat::expect_equal(blocktype_to_num('devalue_all_100'), 4)
+    testthat::expect_equal(blocktype_to_num('devalue_good_75'), 4)
+    testthat::expect_equal(blocktype_to_num(c('switch1','devalue_all_low')), c(2,4))
+    testthat::expect_false(format_raw_data('lab.data.tsv')$blocktype %>%
+                           unique %>% blocktype_to_num %>% is.na %>% any)
+}
+
+
+
+true.na <- function(x) !is.na(x) & x
+add_choice_cols <- function(data) {
+   data %>%
+   mutate(
+     choiceWell  = side_label_to_num(picked),
+     avoidedWell = side_label_to_num(avoided),
+     # what wells be chosen
+     farAvailable = choiceWell == farWell | avoidedWell == farWell,
+     initHighAval = choiceWell == initHigh | avoidedWell == initHigh,
+     # only care about high and best decisions
+     # NA needed for subsetting? cant use e.g.: choseFar = farAvailable & choiceWell == farWell,
+     choseFar        = ifelse(farAvailable, choiceWell == farWell, NA),
+     avoidedFar      = ifelse(farAvailable, avoidedWell == farWell, NA),
+     choseInitHigh   = ifelse(initHighAval, choiceWell == initHigh,NA),
+     avoidedInitHigh = ifelse(initHighAval, avoidedWell == initHigh,NA),
+     # 
+     blocknum = blocktype_to_num(blocktype),
+     choiceType = ifelse(true.na(choseFar), 'Far', ifelse(true.na(choseInitHigh), 'InitHigh', 'InitLow')))
+}
+
+test_add_choice_cols <- function(){
+}
 
-  # exclude testing runs (IDs with our initials, or 'x')
-  BAD_IDS <- c("WWF|ACP|^x$")
-  MIN_TRIALS <- 90
+format_raw_data <- function(fname){
 
   rawdata <- read.csv(fname, sep='\t') %>% 
     filter(!grepl(BAD_IDS, id)) %>%
@@ -13,10 +74,19 @@ read_raw <- function(fname="lab.data.tsv") {
            age=ifelse(is.na(age),survey_age,age))
     #filter(id != 'WWF34M' & id != 'ACP34F' & id != 'AP' & id != 'FC' & ver == '20211104v4-90max_pbar_moredeval')
     #filter(ver == '20211025v3-longertrials')
+}
+
+read_raw <- function(fname="lab.data.tsv") {
+
+  rawdata <- format_raw_data(fname)
 
   # have various combinations of blocks
   #   init-switch1-devalue_all_100
   #   init-switch1-devalue_all_100-devalue_all_low
+  #
+  # cols like:
+  #      id vdate               task      ver   timepoint blockseq
+  # 11734 2022-11-11 00:00:00 habit_mr  mr_1   20221111 init-switch1-devalue_all_100  
   blockseq_df <-
       rawdata %>% group_by(id,vdate, task, ver,timepoint) %>%
       arrange(trial) %>%
@@ -37,12 +107,37 @@ read_raw <- function(fname="lab.data.tsv") {
        group_by(id, vdate, task, ver, age=survey_age) %>%
        summarize(ntrials = max(trial)) %>%
        filter(ntrials > MIN_TRIALS)
+
+  nvisits <- nrow(blockseq_df)
+  testthat::expect_equal(nvisits, nrow(first_trial_wellnames))
+  testthat::expect_equal(nvisits, nrow(total_trials))
 
-
-  perSubj <- merge(total_trials, first_trial_wellnames, by=c('id','vdate','ver','task'))
+  perSubj <-
+     merge(total_trials, first_trial_wellnames, by=c('id','vdate','ver','task')) %>%
+     merge(blockseq_df, by=c("id","vdate","ver","task"),all.x=T)
+
+  # number of visits should match across visit summary stats dataframes
+  testthat::expect_equal(nvisits, nrow(perSubj))
+
+
+  raw_with_visit_summary <-
+     merge(rawdata, perSubj,
+           by=c('id','vdate', 'ver', 'task','timepoint')) %>%
+     add_choice_cols 
+
+  # number of visits should match across visit summary stats dataframes
+  testthat::expect_equal(nrow(rawdata), nrow(raw_with_visit_summary))
+
+  return(raw_with_visit_summary)
+}
 
-  rawdata %>%
-      merge(perSubj, by=c('id','vdate', 'ver', 'task')) %>%
-      left_join(blockseq_df, by=c("id","vdate","ver","task","timepoint")) %>%
-  add_choice_cols 
+test_read_raw <- function(){
+  fname <- "lab.data.tsv"
+  raw_orig <- read.table(fname,sep="\t",header=T)
+  raw_read <- read_raw(fname)
+  # raw shouldn't have added more rows
+  testthat::expect_gte(nrow(raw_orig),nrow(raw_read))
+  # shouldn't have more than one trial per timepoint+task (habit_mr, habit_eeg)
+  n_bad_merge <- raw_read %>% count(id,vdate,timepoint,task,trial) %>% filter(n>1) %>% nrow
+  testthat::expect_equal(n_bad_merge,0)
 }
diff --git a/results/run_tests.R b/results/run_tests.R
@@ -0,0 +1,19 @@
+#!/usr/bin/env Rscript
+
+# run all test_* functions
+# test functions are kept next to the actual functions they test.
+# this is not idomatic R, where we'd want tests/test_*.R (eg. from `usethis::use_test("habit")`)
+# so we need our own wrapper instead of testthat::run_test()
+#
+# 20231210WF - init
+
+source('read_raw.R')
+#lapply(grep('test_',ls(),value=T),do.call,args=list())
+my_test_funcs <- grep('test_',ls(),value=T)
+my_test_funcs <- Filter(function(fname) class(get(fname))=="function", my_test_funcs)
+
+invisible(lapply(my_test_funcs,
+       function(tname)
+          testthat::test_that(tname, {
+             do.call(tname,args=list())
+          })))