From baa22b08a7565fb51102f64830726f7b90471873 Mon Sep 17 00:00:00 2001 From: WillForan Date: Sun, 10 Dec 2023 13:36:08 -0500 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20move=20more=20out=20of=20h?= =?UTF-8?q?abit=5Fplots.R=20and=20add=20run=5Ftests.R?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- results/Makefile | 2 + results/habit_plots.R | 37 -------------- results/read_raw.R | 115 ++++++++++++++++++++++++++++++++++++++---- results/run_tests.R | 19 +++++++ 4 files changed, 126 insertions(+), 47 deletions(-) create mode 100644 results/run_tests.R diff --git a/results/Makefile b/results/Makefile index 7215198..a5c1d4f 100644 --- a/results/Makefile +++ b/results/Makefile @@ -4,6 +4,8 @@ .ALWAYS: /Volumes/Hera/Projects/Habit/mr/habit/txt/all_habit.json all: lab.summary.csv # prev data_all.tsv, no longer need to update heroku.summary.csv +check: + Rscript run_tests.R #mkifdiff from lncdtools heroku.raw.json: .ALWAYS diff --git a/results/habit_plots.R b/results/habit_plots.R index 282b522..1212152 100644 --- a/results/habit_plots.R +++ b/results/habit_plots.R @@ -6,43 +6,6 @@ source('read_raw.R') # read_raw() theme_set(theme_cowplot()) select <- dplyr::select -side_label_to_num <- function(side) ifelse(side == 'left', 1, ifelse(side=='up', 2, ifelse(side=='right', 3, NA))) -blocktype_to_num <- function(blocktype) - ifelse( blocktype == 'init', 1, - ifelse( blocktype == 'switch1', 2, - ifelse( blocktype %in% c("rev2","switch2"), 3, - ifelse(grepl('devalue', blocktype), 4, - NA)))) - -# make sure we get the blocks correctly -test_blocktypenum<-function(){ - testthat::expect_equal(blocktype_to_num('switch1'), 2) - testthat::expect_equal(blocktype_to_num('devalue_all_100'), 4) - testthat::expect_equal(blocktype_to_num('devalue_good_75'), 4) - testthat::expect_equal(blocktype_to_num(c('switch1','devalue_all_low')), c(2,4)) - testthat::expect_false(rawdata$blocktype %>% unique %>% blocktype_to_num %>% is.na %>% any) -} - -true.na <- function(x) !is.na(x) & x -add_choice_cols <- function(data) { - data %>% - mutate( - choiceWell = side_label_to_num(picked), - avoidedWell = side_label_to_num(avoided), - # what wells be chosen - farAvailable = choiceWell == farWell | avoidedWell == farWell, - initHighAval = choiceWell == initHigh | avoidedWell == initHigh, - # only care about high and best decisions - # NA needed for subsetting? cant use e.g.: choseFar = farAvailable & choiceWell == farWell, - choseFar = ifelse(farAvailable, choiceWell == farWell, NA), - avoidedFar = ifelse(farAvailable, avoidedWell == farWell, NA), - choseInitHigh = ifelse(initHighAval, choiceWell == initHigh,NA), - avoidedInitHigh = ifelse(initHighAval, avoidedWell == initHigh,NA), - # - blocknum = blocktype_to_num(blocktype), - choiceType = ifelse(true.na(choseFar), 'Far', ifelse(true.na(choseInitHigh), 'InitHigh', 'InitLow'))) -} - read_summary <- function() read.csv("lab.summary.csv", comment.char="") diff --git a/results/read_raw.R b/results/read_raw.R index b8cb588..dbbc47f 100644 --- a/results/read_raw.R +++ b/results/read_raw.R @@ -1,8 +1,69 @@ -read_raw <- function(fname="lab.data.tsv") { +# sourced from habit_plots.R. +# reads output of read.R:fix_and_save(). likely "lab.data.tsv" +# see Makefile +# +# 20231210WF - pull out from habit_plots.R to work on bad merge (FC fixes) +# +# NOTES: +# originally the task had a "far" that was always "good" (prob=highest) +# now "far" means constant-highest +# +# one version of the task devalued all to a lower prob even the "far"/"good" well: "deval_all_low" +# we nowe "devalue" by setting all wells to 100% probability: "devalue_all_100" +pacman::p_load(tidyr, dplyr) + +# exclude testing runs (IDs with our initials, or 'x') +BAD_IDS <- c("WWF|ACP|^x$") +MIN_TRIALS <- 90 + +side_label_to_num <- function(side) ifelse(side == 'left', 1, ifelse(side=='up', 2, ifelse(side=='right', 3, NA))) + +# "harmonize" blocks to run order: parse known names to 1-4 +# devaule always last +# TODO: rewrite with case_when? +blocktype_to_num <- function(blocktype) + ifelse( blocktype == 'init', 1, + ifelse( blocktype == 'switch1', 2, + ifelse( blocktype %in% c("rev2","switch2"), 3, + ifelse(grepl('devalue', blocktype), 4, + NA)))) + +# make sure we get the blocks correctly +test_blocktypenum<-function(){ + testthat::expect_equal(blocktype_to_num('switch1'), 2) + testthat::expect_equal(blocktype_to_num('devalue_all_100'), 4) + testthat::expect_equal(blocktype_to_num('devalue_good_75'), 4) + testthat::expect_equal(blocktype_to_num(c('switch1','devalue_all_low')), c(2,4)) + testthat::expect_false(format_raw_data('lab.data.tsv')$blocktype %>% + unique %>% blocktype_to_num %>% is.na %>% any) +} + + + +true.na <- function(x) !is.na(x) & x +add_choice_cols <- function(data) { + data %>% + mutate( + choiceWell = side_label_to_num(picked), + avoidedWell = side_label_to_num(avoided), + # what wells be chosen + farAvailable = choiceWell == farWell | avoidedWell == farWell, + initHighAval = choiceWell == initHigh | avoidedWell == initHigh, + # only care about high and best decisions + # NA needed for subsetting? cant use e.g.: choseFar = farAvailable & choiceWell == farWell, + choseFar = ifelse(farAvailable, choiceWell == farWell, NA), + avoidedFar = ifelse(farAvailable, avoidedWell == farWell, NA), + choseInitHigh = ifelse(initHighAval, choiceWell == initHigh,NA), + avoidedInitHigh = ifelse(initHighAval, avoidedWell == initHigh,NA), + # + blocknum = blocktype_to_num(blocktype), + choiceType = ifelse(true.na(choseFar), 'Far', ifelse(true.na(choseInitHigh), 'InitHigh', 'InitLow'))) +} + +test_add_choice_cols <- function(){ +} - # exclude testing runs (IDs with our initials, or 'x') - BAD_IDS <- c("WWF|ACP|^x$") - MIN_TRIALS <- 90 +format_raw_data <- function(fname){ rawdata <- read.csv(fname, sep='\t') %>% filter(!grepl(BAD_IDS, id)) %>% @@ -13,10 +74,19 @@ read_raw <- function(fname="lab.data.tsv") { age=ifelse(is.na(age),survey_age,age)) #filter(id != 'WWF34M' & id != 'ACP34F' & id != 'AP' & id != 'FC' & ver == '20211104v4-90max_pbar_moredeval') #filter(ver == '20211025v3-longertrials') +} + +read_raw <- function(fname="lab.data.tsv") { + + rawdata <- format_raw_data(fname) # have various combinations of blocks # init-switch1-devalue_all_100 # init-switch1-devalue_all_100-devalue_all_low + # + # cols like: + # id vdate task ver timepoint blockseq + # 11734 2022-11-11 00:00:00 habit_mr mr_1 20221111 init-switch1-devalue_all_100 blockseq_df <- rawdata %>% group_by(id,vdate, task, ver,timepoint) %>% arrange(trial) %>% @@ -37,12 +107,37 @@ read_raw <- function(fname="lab.data.tsv") { group_by(id, vdate, task, ver, age=survey_age) %>% summarize(ntrials = max(trial)) %>% filter(ntrials > MIN_TRIALS) + + nvisits <- nrow(blockseq_df) + testthat::expect_equal(nvisits, nrow(first_trial_wellnames)) + testthat::expect_equal(nvisits, nrow(total_trials)) - - perSubj <- merge(total_trials, first_trial_wellnames, by=c('id','vdate','ver','task')) + perSubj <- + merge(total_trials, first_trial_wellnames, by=c('id','vdate','ver','task')) %>% + merge(blockseq_df, by=c("id","vdate","ver","task"),all.x=T) + + # number of visits should match across visit summary stats dataframes + testthat::expect_equal(nvisits, nrow(perSubj)) + + + raw_with_visit_summary <- + merge(rawdata, perSubj, + by=c('id','vdate', 'ver', 'task','timepoint')) %>% + add_choice_cols + + # number of visits should match across visit summary stats dataframes + testthat::expect_equal(nrow(rawdata), nrow(raw_with_visit_summary)) + + return(raw_with_visit_summary) +} - rawdata %>% - merge(perSubj, by=c('id','vdate', 'ver', 'task')) %>% - left_join(blockseq_df, by=c("id","vdate","ver","task","timepoint")) %>% - add_choice_cols +test_read_raw <- function(){ + fname <- "lab.data.tsv" + raw_orig <- read.table(fname,sep="\t",header=T) + raw_read <- read_raw(fname) + # raw shouldn't have added more rows + testthat::expect_gte(nrow(raw_orig),nrow(raw_read)) + # shouldn't have more than one trial per timepoint+task (habit_mr, habit_eeg) + n_bad_merge <- raw_read %>% count(id,vdate,timepoint,task,trial) %>% filter(n>1) %>% nrow + testthat::expect_equal(n_bad_merge,0) } diff --git a/results/run_tests.R b/results/run_tests.R new file mode 100644 index 0000000..5559246 --- /dev/null +++ b/results/run_tests.R @@ -0,0 +1,19 @@ +#!/usr/bin/env Rscript + +# run all test_* functions +# test functions are kept next to the actual functions they test. +# this is not idomatic R, where we'd want tests/test_*.R (eg. from `usethis::use_test("habit")`) +# so we need our own wrapper instead of testthat::run_test() +# +# 20231210WF - init + +source('read_raw.R') +#lapply(grep('test_',ls(),value=T),do.call,args=list()) +my_test_funcs <- grep('test_',ls(),value=T) +my_test_funcs <- Filter(function(fname) class(get(fname))=="function", my_test_funcs) + +invisible(lapply(my_test_funcs, + function(tname) + testthat::test_that(tname, { + do.call(tname,args=list()) + })))