From ec4a8cffc47c668c0c65990e60007840ad0c5792 Mon Sep 17 00:00:00 2001 From: Rachael Stickland <50215726+RayStick@users.noreply.github.com> Date: Wed, 31 Jan 2024 13:37:27 +0000 Subject: [PATCH 01/16] test --- R/domain_mapping.R | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/R/domain_mapping.R b/R/domain_mapping.R index f5e3d1dc..28c9566d 100755 --- a/R/domain_mapping.R +++ b/R/domain_mapping.R @@ -225,26 +225,35 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) { Output$Domain_code[datavar] <- decision Output$Note[datavar] <- decision_note } - } - # Fill in columns that have all rows identical - Output$Initials <- User_Initials - Output$MetaDataVersion <- meta_json$dataModel$documentationVersion - Output$MetaDataLastUpdated <- meta_json$dataModel$lastUpdated - Output$DomainListDesc <- DomainListDesc - Output$DataAsset <- meta_json$dataModel$label - Output$DataClass <- meta_json$dataModel$childDataClasses[[dc]]$label + # Fill in columns that have all rows identical + Output$Initials <- User_Initials + Output$MetaDataVersion <- meta_json$dataModel$documentationVersion + Output$MetaDataLastUpdated <- meta_json$dataModel$lastUpdated + Output$DomainListDesc <- DomainListDesc + Output$DataAsset <- meta_json$dataModel$label + Output$DataClass <- meta_json$dataModel$childDataClasses[[dc]]$label + + # Save as we go in case session terminates prematurely + Output[Output == ""] <- NA + utils::write.csv(Output, output_fname, row.names = FALSE) # save as we go in case session terminates prematurely + } # end of loop for variable + + # Print the AUTO CATEGORISED responses for this DataClass + Output_auto <- filter(Output,Note =='AUTO CATEGORISED') + + cat("\n \n") + cli_alert_warning("Please check the auto categorised data elements are accurate!") + cli_alert_warning("Manually edit csv file to correct errors, if needed.") - # Save file & print the responses to be saved + print(Output_auto[, c("DataClass", "DataElement", "Domain_code")]) + + # Save final categorisations for this data class Output[Output == ""] <- NA - utils::write.csv(Output, output_fname, row.names = FALSE) # save as we go in case session terminates prematurely - cat("\n") - cli_alert_info("The below responses will be saved to {output_fname}") + utils::write.csv(Output, output_fname, row.names = FALSE) cat("\n") - print(Output[, c("DataClass", "DataElement", "Domain_code", "Note")]) - } + cli_alert_info("Your final categorisations have been saved to {output_fname}") + + } # end of loop for each data class - cat("\n \n") - cli_alert_warning("Please check the auto categorised data elements are accurate!") - cli_alert_warning("Manually edit csv file to correct errors, if needed.") -} +} # end of function From fa3e72e8871f536a24a3bd125c5e06a3f35eaea3 Mon Sep 17 00:00:00 2001 From: Rachael Stickland <50215726+RayStick@users.noreply.github.com> Date: Wed, 31 Jan 2024 15:15:28 +0000 Subject: [PATCH 02/16] it works --- R/domain_mapping.R | 50 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/R/domain_mapping.R b/R/domain_mapping.R index 28c9566d..4cee766b 100755 --- a/R/domain_mapping.R +++ b/R/domain_mapping.R @@ -173,7 +173,6 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) { Output$Domain_code[datavar] <- "2" Output$Note[datavar] <- "AUTO CATEGORISED" } else if (grepl("_ID_", selectDataClass_df$Label[datavar], ignore.case = TRUE)) { # picking up generic IDs - Output[nrow(Output) + 1, ] <- NA Output$DataElement[datavar] <- selectDataClass_df$Label[datavar] Output$Domain_code[datavar] <- "3" @@ -239,16 +238,51 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) { utils::write.csv(Output, output_fname, row.names = FALSE) # save as we go in case session terminates prematurely } # end of loop for variable - # Print the AUTO CATEGORISED responses for this DataClass - Output_auto <- filter(Output,Note =='AUTO CATEGORISED') - + # Print the AUTO CATEGORISED responses for this DataClass - request review + Output_auto <- subset(Output, Note == 'AUTO CATEGORISED') + cat("\n \n") + cli_alert_warning("Please check the auto categorised data elements are accurate:") cat("\n \n") - cli_alert_warning("Please check the auto categorised data elements are accurate!") - cli_alert_warning("Manually edit csv file to correct errors, if needed.") - print(Output_auto[, c("DataClass", "DataElement", "Domain_code")]) + cat("\n \n") + auto_row_str <- readline(prompt = "Enter row numbers you'd like to change (for example: 1,5,10) or press enter to accept the auto categorisations: ") + + if (auto_row_str != "") { + + auto_row <- as.integer(unlist(strsplit(auto_row_str,","))) #probably sub-optimal coding + + for (datavar_auto in auto_row) { + + # user response + cat(paste( + "\nDATA ELEMENT -----> ", selectDataClass_df$Label[datavar_auto], + "\n\nDESCRIPTION -----> ", selectDataClass_df$Description[datavar_auto], + "\n\nDATA TYPE -----> ", selectDataClass_df$Type[datavar_auto], "\n" + )) + + decision <- "" + while (decision == "") { + cat("\n \n") + decision <- readline(prompt = "CATEGORISE THIS VARIABLE (input a comma separated list of domain numbers): ") + } + + decision_note <- "" + while (decision_note == "") { + cat("\n \n") + decision_note <- readline(prompt = "NOTES (write 'N' if no notes): ") + } + + Output$DataElement[datavar_auto] <- selectDataClass_df$Label[datavar] + Output$Domain_code[datavar_auto] <- decision + Output$Note[datavar_auto] <- decision_note + + if (datavar_auto == length(auto_row)) {auto_finished == "Y"} + + } + + } - # Save final categorisations for this data class + # Save final categorisations for this DataClass Output[Output == ""] <- NA utils::write.csv(Output, output_fname, row.names = FALSE) cat("\n") From 8fee5d809edff7549b180ccd26669766e8f57230 Mon Sep 17 00:00:00 2001 From: Rachael Stickland <50215726+RayStick@users.noreply.github.com> Date: Mon, 5 Feb 2024 12:44:24 +0000 Subject: [PATCH 03/16] refactor-step1 --- R/domain_mapping.R | 49 +++++++++-------------------------------- R/user_categorisation.R | 35 +++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 39 deletions(-) create mode 100644 R/user_categorisation.R diff --git a/R/domain_mapping.R b/R/domain_mapping.R index 4cee766b..459067a5 100755 --- a/R/domain_mapping.R +++ b/R/domain_mapping.R @@ -200,29 +200,15 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) { Output$Domain_code[datavar] <- "4" Output$Note[datavar] <- "AUTO CATEGORISED" } else { - # user response - cat(paste( - "\nDATA ELEMENT -----> ", selectDataClass_df$Label[datavar], - "\n\nDESCRIPTION -----> ", selectDataClass_df$Description[datavar], - "\n\nDATA TYPE -----> ", selectDataClass_df$Type[datavar], "\n" - )) - - decision <- "" - while (decision == "") { - cat("\n \n") - decision <- readline(prompt = "CATEGORISE THIS VARIABLE (input a comma separated list of domain numbers): ") - } - decision_note <- "" - while (decision_note == "") { - cat("\n \n") - decision_note <- readline(prompt = "NOTES (write 'N' if no notes): ") - } + # collect user responses + decision_output <- user_categorisation(selectDataClass_df$Label[datavar],selectDataClass_df$Description[datavar],selectDataClass_df$Type[datavar]) + # input user responses into output Output[nrow(Output) + 1, ] <- NA Output$DataElement[datavar] <- selectDataClass_df$Label[datavar] - Output$Domain_code[datavar] <- decision - Output$Note[datavar] <- decision_note + Output$Domain_code[datavar] <- decision_output$decision + Output$Note[datavar] <- decision_output$decision_note } # Fill in columns that have all rows identical @@ -253,28 +239,13 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) { for (datavar_auto in auto_row) { - # user response - cat(paste( - "\nDATA ELEMENT -----> ", selectDataClass_df$Label[datavar_auto], - "\n\nDESCRIPTION -----> ", selectDataClass_df$Description[datavar_auto], - "\n\nDATA TYPE -----> ", selectDataClass_df$Type[datavar_auto], "\n" - )) - - decision <- "" - while (decision == "") { - cat("\n \n") - decision <- readline(prompt = "CATEGORISE THIS VARIABLE (input a comma separated list of domain numbers): ") - } - - decision_note <- "" - while (decision_note == "") { - cat("\n \n") - decision_note <- readline(prompt = "NOTES (write 'N' if no notes): ") - } + # collect user responses + decision_output <- user_categorisation(selectDataClass_df$Label[datavar_auto],selectDataClass_df$Description[datavar_auto],selectDataClass_df$Type[datavar_auto]) + # input user responses into output Output$DataElement[datavar_auto] <- selectDataClass_df$Label[datavar] - Output$Domain_code[datavar_auto] <- decision - Output$Note[datavar_auto] <- decision_note + Output$Domain_code[datavar_auto] <- decision_output$decision + Output$Note[datavar_auto] <- decision_output$decision_note if (datavar_auto == length(auto_row)) {auto_finished == "Y"} diff --git a/R/user_categorisation.R b/R/user_categorisation.R new file mode 100644 index 00000000..f44441fa --- /dev/null +++ b/R/user_categorisation.R @@ -0,0 +1,35 @@ +user_categorisation <- function(data_element,data_desc,data_type) { + + # print text to R console + cat(paste( + "\nDATA ELEMENT -----> ", data_element, + "\n\nDESCRIPTION -----> ", data_desc, + "\n\nDATA TYPE -----> ", data_type, "\n" + )) + + state <- "redo" + while (state == "redo") { + + # ask user for categorisation + decision <- "" + while (decision == "") { + cat("\n \n") + decision <- readline(prompt = "CATEGORISE THIS VARIABLE (input a comma separated list of domain numbers): ") + } + + # ask user for note on categorisation + decision_note <- "" + while (decision_note == "") { + cat("\n \n") + decision_note <- readline(prompt = "NOTES (write 'N' if no notes): ") + } + + # check if user wants to continue or redo + cat("\n \n") + state <- readline(prompt = "Press enter to continue or write 'redo' to correct previous answer: ") + + } + +return(list(decision = decision,decision_note = decision_note)) + +} From 2b64707b358e1a9154429399ac9a51ef51bb9ee8 Mon Sep 17 00:00:00 2001 From: Rachael Stickland <50215726+RayStick@users.noreply.github.com> Date: Mon, 5 Feb 2024 13:08:16 +0000 Subject: [PATCH 04/16] refactor-step2 --- R/domain_mapping.R | 35 ++++++++++++++++++++++++++++++----- R/user_categorisation.R | 12 ++++++++++++ 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/R/domain_mapping.R b/R/domain_mapping.R index 459067a5..81fa022a 100755 --- a/R/domain_mapping.R +++ b/R/domain_mapping.R @@ -203,7 +203,6 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) { # collect user responses decision_output <- user_categorisation(selectDataClass_df$Label[datavar],selectDataClass_df$Description[datavar],selectDataClass_df$Type[datavar]) - # input user responses into output Output[nrow(Output) + 1, ] <- NA Output$DataElement[datavar] <- selectDataClass_df$Label[datavar] @@ -241,16 +240,42 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) { # collect user responses decision_output <- user_categorisation(selectDataClass_df$Label[datavar_auto],selectDataClass_df$Description[datavar_auto],selectDataClass_df$Type[datavar_auto]) - # input user responses into output - Output$DataElement[datavar_auto] <- selectDataClass_df$Label[datavar] + Output$DataElement[datavar_auto] <- selectDataClass_df$Label[datavar_auto] Output$Domain_code[datavar_auto] <- decision_output$decision Output$Note[datavar_auto] <- decision_output$decision_note + } + } - if (datavar_auto == length(auto_row)) {auto_finished == "Y"} + # Ask if user wants to review their responses for this DataClass + review_cats <- "" + while (review_cats != "Y" & review_cats != "N") { + cat("\n \n") + review_cats <- readline(prompt = "Would you like to review the categorisation you made? (Y/N) ") + } - } + if (review_cats == 'Y') { + Output_not_auto <- subset(Output, Note != 'AUTO CATEGORISED') + cat("\n \n") + print(Output_not_auto[, c("DataClass", "DataElement", "Domain_code")]) + cat("\n \n") + not_auto_row_str <- readline(prompt = "Enter row numbers you'd like to change or press enter to accept: ") + + if (not_auto_row_str != "") { + + not_auto_row <- as.integer(unlist(strsplit(not_auto_row_str,","))) #probably sub-optimal coding + + for (datavar_not_auto in not_auto_row) { + + # collect user responses + decision_output <- user_categorisation(selectDataClass_df$Label[datavar_not_auto],selectDataClass_df$Description[datavar_not_auto],selectDataClass_df$Type[datavar_not_auto]) + # input user responses into output + Output$DataElement[datavar_not_auto] <- selectDataClass_df$Label[datavar_not_auto] + Output$Domain_code[datavar_not_auto] <- decision_output$decision + Output$Note[datavar_not_auto] <- decision_output$decision_note + } + } } # Save final categorisations for this DataClass diff --git a/R/user_categorisation.R b/R/user_categorisation.R index f44441fa..f64d42f6 100644 --- a/R/user_categorisation.R +++ b/R/user_categorisation.R @@ -1,3 +1,15 @@ +#' user_categorisation +#' +#' This function is used within the domain_mapping function \cr \cr +#' It displays data properties to the user and requests a categorisation into a domain \cr \cr +#' An optional note can be included with the categorisation +#' +#' @param data_element Name of the variable +#' @param data_desc Description of the variable +#' @param data_type Data type of the variable +#' @return A list containing the decision and decision note +#' @export + user_categorisation <- function(data_element,data_desc,data_type) { # print text to R console From fa404715fbbbd16c173640ade573fac0fc1ddc13 Mon Sep 17 00:00:00 2001 From: Rachael Stickland <50215726+RayStick@users.noreply.github.com> Date: Mon, 5 Feb 2024 13:09:33 +0000 Subject: [PATCH 05/16] format --- R/user_categorisation.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/user_categorisation.R b/R/user_categorisation.R index f64d42f6..ff39b503 100644 --- a/R/user_categorisation.R +++ b/R/user_categorisation.R @@ -1,8 +1,8 @@ #' user_categorisation #' -#' This function is used within the domain_mapping function \cr \cr -#' It displays data properties to the user and requests a categorisation into a domain \cr \cr -#' An optional note can be included with the categorisation +#' This function is used within the domain_mapping function. \cr \cr +#' It displays data properties to the user and requests a categorisation into a domain. \cr \cr +#' An optional note can be included with the categorisation. #' #' @param data_element Name of the variable #' @param data_desc Description of the variable From 60674e1224a5977bc6196558860caef627a1bbfd Mon Sep 17 00:00:00 2001 From: Rachael Stickland <50215726+RayStick@users.noreply.github.com> Date: Mon, 5 Feb 2024 13:09:40 +0000 Subject: [PATCH 06/16] docs --- NAMESPACE | 1 + man/user_categorisation.Rd | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 man/user_categorisation.Rd diff --git a/NAMESPACE b/NAMESPACE index d5be7486..d1fe3961 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(domain_mapping) +export(user_categorisation) import(cli) import(devtools) import(grid) diff --git a/man/user_categorisation.Rd b/man/user_categorisation.Rd new file mode 100644 index 00000000..54ec5800 --- /dev/null +++ b/man/user_categorisation.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/user_categorisation.R +\name{user_categorisation} +\alias{user_categorisation} +\title{user_categorisation} +\usage{ +user_categorisation(data_element, data_desc, data_type) +} +\arguments{ +\item{data_element}{Name of the variable} + +\item{data_desc}{Description of the variable} + +\item{data_type}{Data type of the variable} +} +\value{ +A list containing the decision and decision note +} +\description{ +This function is used within the domain_mapping function \cr \cr +It displays data properties to the user and requests a categorisation into a domain \cr \cr +An optional note can be included with the categorisation +} From 3cc8b5fdb1708cf3f9c3b0fadb7668f4268a317f Mon Sep 17 00:00:00 2001 From: Rachael Stickland <50215726+RayStick@users.noreply.github.com> Date: Mon, 5 Feb 2024 13:10:14 +0000 Subject: [PATCH 07/16] docs --- man/user_categorisation.Rd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/man/user_categorisation.Rd b/man/user_categorisation.Rd index 54ec5800..7845b312 100644 --- a/man/user_categorisation.Rd +++ b/man/user_categorisation.Rd @@ -17,7 +17,7 @@ user_categorisation(data_element, data_desc, data_type) A list containing the decision and decision note } \description{ -This function is used within the domain_mapping function \cr \cr -It displays data properties to the user and requests a categorisation into a domain \cr \cr -An optional note can be included with the categorisation +This function is used within the domain_mapping function. \cr \cr +It displays data properties to the user and requests a categorisation into a domain. \cr \cr +An optional note can be included with the categorisation. } From c61f97e86a1a646f1b235f59d10e1ffdcfd6c785 Mon Sep 17 00:00:00 2001 From: Rachael Stickland <50215726+RayStick@users.noreply.github.com> Date: Mon, 5 Feb 2024 13:12:00 +0000 Subject: [PATCH 08/16] docs --- R/user_categorisation.R | 2 +- man/user_categorisation.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/user_categorisation.R b/R/user_categorisation.R index ff39b503..8a35c3fc 100644 --- a/R/user_categorisation.R +++ b/R/user_categorisation.R @@ -7,7 +7,7 @@ #' @param data_element Name of the variable #' @param data_desc Description of the variable #' @param data_type Data type of the variable -#' @return A list containing the decision and decision note +#' @return It returns a list containing the decision and decision note #' @export user_categorisation <- function(data_element,data_desc,data_type) { diff --git a/man/user_categorisation.Rd b/man/user_categorisation.Rd index 7845b312..d7e64427 100644 --- a/man/user_categorisation.Rd +++ b/man/user_categorisation.Rd @@ -14,7 +14,7 @@ user_categorisation(data_element, data_desc, data_type) \item{data_type}{Data type of the variable} } \value{ -A list containing the decision and decision note +It returns a list containing the decision and decision note } \description{ This function is used within the domain_mapping function. \cr \cr From e3e49782bb154359a74907917f4a4ed73840e43b Mon Sep 17 00:00:00 2001 From: Rachael Stickland <50215726+RayStick@users.noreply.github.com> Date: Mon, 5 Feb 2024 14:02:20 +0000 Subject: [PATCH 09/16] docs --- R/domain_mapping.R | 14 +++++++------- man/domain_mapping.Rd | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/R/domain_mapping.R b/R/domain_mapping.R index 81fa022a..cf341356 100755 --- a/R/domain_mapping.R +++ b/R/domain_mapping.R @@ -16,8 +16,8 @@ #' # Respond 'Demo List ' for the description of domain list. #' # Respond 'Y' if you want to see the descriptions printed out. #' # Respond '1,10' to the RANGE OF VARIABLES prompt (or process the full 93 variables if you like!) -#' # Reference the plot tab and categorise each variable into a single ('1') -#' # or multiple ('1,2') domain. +#' # Reference the plot tab and categorise each variable into a single ('1') domain +#' # or multiple ('1,2') domains. #' # Write a note explaining your category choice (optional). #' @export #' @importFrom graphics plot.new @@ -54,7 +54,7 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) { User_Initials <- "" while (User_Initials == "") { cat("\n \n") - User_Initials <- readline(prompt = "ENTER INITIALS: ") + User_Initials <- readline(prompt = "Enter Initials: ") } # Print information about Data Asset ---- @@ -135,7 +135,7 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) { # User inputs ---- cat("\n \n") - select_vars_n <- readline(prompt = "RANGE OF VARIABLES (DATA ELEMENTS) TO PROCESS (write as 'start_var,end_var' or press Enter to process all): ") + select_vars_n <- readline(prompt = "Enter the range of variables (data elements) to process. Press Enter to process all: ") if (select_vars_n == "") { start_var <- 1 end_var <- length(thisDataClass) @@ -230,7 +230,7 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) { cat("\n \n") print(Output_auto[, c("DataClass", "DataElement", "Domain_code")]) cat("\n \n") - auto_row_str <- readline(prompt = "Enter row numbers you'd like to change (for example: 1,5,10) or press enter to accept the auto categorisations: ") + auto_row_str <- readline(prompt = "Enter row numbers you'd like to edit or press enter to accept the auto categorisations: ") if (auto_row_str != "") { @@ -251,7 +251,7 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) { review_cats <- "" while (review_cats != "Y" & review_cats != "N") { cat("\n \n") - review_cats <- readline(prompt = "Would you like to review the categorisation you made? (Y/N) ") + review_cats <- readline(prompt = "Would you like to review your categorisations? (Y/N) ") } if (review_cats == 'Y') { @@ -260,7 +260,7 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) { cat("\n \n") print(Output_not_auto[, c("DataClass", "DataElement", "Domain_code")]) cat("\n \n") - not_auto_row_str <- readline(prompt = "Enter row numbers you'd like to change or press enter to accept: ") + not_auto_row_str <- readline(prompt = "Enter row numbers you'd like to edit or press enter to accept: ") if (not_auto_row_str != "") { diff --git a/man/domain_mapping.Rd b/man/domain_mapping.Rd index bf4ae21a..5d7280fd 100644 --- a/man/domain_mapping.Rd +++ b/man/domain_mapping.Rd @@ -29,7 +29,7 @@ Example inputs are provided within the package data, for the user to run this fu # Respond 'Demo List ' for the description of domain list. # Respond 'Y' if you want to see the descriptions printed out. # Respond '1,10' to the RANGE OF VARIABLES prompt (or process the full 93 variables if you like!) -# Reference the plot tab and categorise each variable into a single ('1') -# or multiple ('1,2') domain. +# Reference the plot tab and categorise each variable into a single ('1') domain +# or multiple ('1,2') domains. # Write a note explaining your category choice (optional). } From df61d5ba8a50cbe83654c16a4f98aa5ca59f739f Mon Sep 17 00:00:00 2001 From: Rachael Stickland <50215726+RayStick@users.noreply.github.com> Date: Mon, 5 Feb 2024 15:01:18 +0000 Subject: [PATCH 10/16] update demo --- R/user_categorisation.R | 4 +- README.md | 119 +++++++++++++++++++++++++--------------- 2 files changed, 78 insertions(+), 45 deletions(-) diff --git a/R/user_categorisation.R b/R/user_categorisation.R index 8a35c3fc..6424ea28 100644 --- a/R/user_categorisation.R +++ b/R/user_categorisation.R @@ -26,14 +26,14 @@ user_categorisation <- function(data_element,data_desc,data_type) { decision <- "" while (decision == "") { cat("\n \n") - decision <- readline(prompt = "CATEGORISE THIS VARIABLE (input a comma separated list of domain numbers): ") + decision <- readline(prompt = "Categorise this variable: ") } # ask user for note on categorisation decision_note <- "" while (decision_note == "") { cat("\n \n") - decision_note <- readline(prompt = "NOTES (write 'N' if no notes): ") + decision_note <- readline(prompt = "Notes (write 'N' if no notes): ") } # check if user wants to continue or redo diff --git a/README.md b/README.md index c70e6adf..a0e1e3ba 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ The R console will show: ``` ℹ Running domain_mapping in demo mode using package data files -ENTER INITIALS: +Enter Initials: RS ``` Respond with your initials and press enter. @@ -111,19 +111,19 @@ Rachael Stickland at 2024-01-05T13:22:09.774Z ℹ Found 2 Data Classes (2 tables) in this Data Asset -Would you like to read a description of the Data Asset? (Y/N) +Would you like to read a description of the Data Asset? (Y/N) Y ``` -Press Y to read these descriptions, for the purpose of the demo. +Enter Y to read these descriptions, for the purpose of the demo. For this example, the Data Asset is called MIDS and the tables inside this Data Class are BIRTH and INITIAL_ASSESSMENT. -It will then ask which variables to process: +For each table, it will ask which variables to process: ``` -RANGE OF VARIABLES (DATA ELEMENTS) TO PROCESS (write as 'start_var,end_var' or press Enter to process all): 1,10 +Enter the range of variables (data elements) to process. Press Enter to process all: 1,10 ``` -If you press enter it will process all the variables, so use a smaller number like 10 for this demo. +If you press enter it will process all the variables, so use a smaller range like 1 to 10 for this demo. For each data element (variable) you will be shown this structure: @@ -136,19 +136,58 @@ DATA TYPE -----> CHARACTER ``` By referencing the plots tab, and other info you may have, categorise this variable with a number(s). -A variable can map to more than one domain. +A variable can map to more than one domain so a comma separated list of numbers can be given (7,8). There is an (optional) note field to explain your choice. ``` -CATEGORISE THIS VARIABLE (input a comma separated list of domain numbers): 8 +Categorise this variable: 8 -NOTES (write 'N' if no notes): N +Notes (write 'N' if no notes): N ``` +If you make a mistake, the next prompt allows you to redo. Or press enter if you are happy to continue. + +``` +Press enter to continue or write 'redo' to correct previous answer: +``` + +When you get to the end of your requested number of variables it will show you variables that have been auto categorised. + +If you want to change these auto categorisations, and do them manually, include the row number (1,9) in the list. + +``` +! Please check the auto categorised data elements are accurate: + + DataClass DataElement Domain_code +1 BIRTH AVAIL_FROM_DT 1 +9 BIRTH CHILD_ALF_E 2 +10 BIRTH CHILD_ALF_STS_CD 2 + +Enter row numbers you'd like to edit or press enter to accept the auto categorisations: +``` + +Finally, it will ask you if you want to review the categorisations you previously made. + +``` +Would you like to review your categorisations? (Y/N) +``` + +If you say yes (with Y) it will take you through the same review process you just did for auto categorisations. + +At the end of processing that Data Class (table) it will show: + +``` +ℹ Your final categorisations have been saved to LOG_MaternityIndicatorsDataset(MIDS)_BIRTH_2024-01-30_10-42-15.csv +``` + +The function will then repeat the same steps for the next Data Class (table). + +#### Understanding the domain list + For this demo, a simple list of domains are provided, see [data-raw/domain_list_demo.csv](data-raw/domain_list_demo.csv). -This list is in this plot tab: +This list shows up in the R plot tab: - [0] *NO MATCH / UNSURE* - [1] *METADATA* @@ -162,12 +201,11 @@ This list is in this plot tab: There are 5 default domains always included [0-4], appended on to any domain list given. -For a research study, your domains will likely be more specific e.g. 'Prenatal, antenatal, neonatal and birth' or 'Health behaviours and diet'. +For a research study, your domains are likely to be much more specific e.g. 'Prenatal, antenatal, neonatal and birth' or 'Health behaviours and diet'. #### Output -The output of your decisions will be pasted to the R console. -These decisions will also be saved to a csv file. +The output of your decisions will be saved to a csv file. The csv file name includes the data asset, data class, and date stamp. This csv file, in addition to what is shown on the console, contains: - user initials (from user input) @@ -179,40 +217,35 @@ The intended use case for this log file is to be loaded up, compared across users, and used as an input in later analysis steps when working out which variables can be used to represent which research domains. +Example outputs below, running with '1,10' data elements: + ``` -ℹ The below responses will be saved to LOG_MaternityIndicatorsDataset(MIDS)_BIRTH_2024-01-30_10-42-15.csv - - DataClass DataElement Domain_code Note -1 BIRTH AVAIL_FROM_DT 1 AUTO CATEGORISED -2 BIRTH BABY_BIRTH_DT 4 N -3 BIRTH BIRTH_APGAR_SCORE 8 N -4 BIRTH BIRTH_MODE_CD 8 N -5 BIRTH BIRTH_ORDER 8 N -6 BIRTH BIRTH_OUTCOME_CD 8 N -7 BIRTH BIRTH_TREAT_CD 0 No description given -8 BIRTH BIRTH_TREAT_SITE_CD 6 N -9 BIRTH CHILD_ALF_E 2 AUTO CATEGORISED -10 BIRTH CHILD_ALF_STS_CD 2 AUTO CATEGORISED + DataClass DataElement Domain_code Note +1 BIRTH AVAIL_FROM_DT 1 AUTO CATEGORISED +2 BIRTH BABY_BIRTH_DT 4 N +3 BIRTH BIRTH_APGAR_SCORE 8 N +4 BIRTH BIRTH_MODE_CD 8 N +5 BIRTH BIRTH_ORDER 8 N +6 BIRTH BIRTH_OUTCOME_CD 8 N +7 BIRTH BIRTH_TREAT_CD 0 No description given +8 BIRTH BIRTH_TREAT_SITE_CD 6 N +9 BIRTH CHILD_ALF_E 2 AUTO CATEGORISED +10 BIRTH CHILD_ALF_STS_CD 2 AUTO CATEGORISED ``` ``` -ℹ The below responses will be saved to LOG_MaternityIndicatorsDataset(MIDS)_INITIAL_ASSESSMENT_2024-01-30_10-43-05.csv - - DataClass DataElement Domain_code Note -1 INITIAL_ASSESSMENT AVAIL_FROM_DT 1 AUTO CATEGORISED -2 INITIAL_ASSESSMENT GEST_WEEKS 8 N -3 INITIAL_ASSESSMENT INITIAL_ASS_DT 8 Date of health visit -4 INITIAL_ASSESSMENT MAT_AGE_AT_ASS 4 AUTO CATEGORISED -5 INITIAL_ASSESSMENT MOTHER_ALF_E 2 AUTO CATEGORISED -6 INITIAL_ASSESSMENT MOTHER_ALF_STS_CD 2 AUTO CATEGORISED -7 INITIAL_ASSESSMENT PROV_CD 6,8 Org code for health provider -8 INITIAL_ASSESSMENT SERVICE_USER_GRAVIDA_CD 8 N -9 INITIAL_ASSESSMENT SERVICE_USER_HAS_MENTAL_HEALTH_CARE_PLAN_CD 8 N -10 INITIAL_ASSESSMENT SERVICE_USER_HAS_MENTAL_HEALTH_CONDITION_CD 8 N - -! Please check the auto categorised data elements are accurate! -! Manually edit csv file to correct errors, if needed. -``` + DataClass DataElement Domain_code Note +1 INITIAL_ASSESSMENT AVAIL_FROM_DT 1 AUTO CATEGORISED +2 INITIAL_ASSESSMENT GEST_WEEKS 8 N +3 INITIAL_ASSESSMENT INITIAL_ASS_DT 8 Date of health visit +4 INITIAL_ASSESSMENT MAT_AGE_AT_ASS 4 AUTO CATEGORISED +5 INITIAL_ASSESSMENT MOTHER_ALF_E 2 AUTO CATEGORISED +6 INITIAL_ASSESSMENT MOTHER_ALF_STS_CD 2 AUTO CATEGORISED +7 INITIAL_ASSESSMENT PROV_CD 6,8 Org code for health provider +8 INITIAL_ASSESSMENT SERVICE_USER_GRAVIDA_CD 8 N +9 INITIAL_ASSESSMENT SERVICE_USER_HAS_MENTAL_HEALTH_CARE_PLAN_CD 8 N +10 INITIAL_ASSESSMENT SERVICE_USER_HAS_MENTAL_HEALTH_CONDITION_CD 8 N + ``` ### Using your own input files From 788f9db9b255ce998c3d4c987c9257902308d8ab Mon Sep 17 00:00:00 2001 From: Rachael Stickland <50215726+RayStick@users.noreply.github.com> Date: Mon, 5 Feb 2024 15:03:29 +0000 Subject: [PATCH 11/16] clarity --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a0e1e3ba..2f2d70d0 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ For each table, it will ask which variables to process: Enter the range of variables (data elements) to process. Press Enter to process all: 1,10 ``` -If you press enter it will process all the variables, so use a smaller range like 1 to 10 for this demo. +If you press enter it will process all the variables, so use a smaller range like 1 to 10 the first time you run this demo. For each data element (variable) you will be shown this structure: From f2bb561ee6d01c9ae68e49d36a6a26a43c57226a Mon Sep 17 00:00:00 2001 From: Rachael Stickland <50215726+RayStick@users.noreply.github.com> Date: Mon, 5 Feb 2024 15:36:22 +0000 Subject: [PATCH 12/16] clarify --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2f2d70d0..9ee59cb1 100644 --- a/README.md +++ b/README.md @@ -217,7 +217,7 @@ The intended use case for this log file is to be loaded up, compared across users, and used as an input in later analysis steps when working out which variables can be used to represent which research domains. -Example outputs below, running with '1,10' data elements: +A subset of columns from the csv outputs are shown below, running with '1,10' data elements: ``` DataClass DataElement Domain_code Note From fc2ddc1a549e8e90203ad182023a773989f06fc2 Mon Sep 17 00:00:00 2001 From: Rachael Stickland <50215726+RayStick@users.noreply.github.com> Date: Wed, 28 Feb 2024 10:22:01 +0000 Subject: [PATCH 13/16] bug fix --- R/domain_mapping.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/domain_mapping.R b/R/domain_mapping.R index cf341356..40884ef4 100755 --- a/R/domain_mapping.R +++ b/R/domain_mapping.R @@ -241,7 +241,6 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) { # collect user responses decision_output <- user_categorisation(selectDataClass_df$Label[datavar_auto],selectDataClass_df$Description[datavar_auto],selectDataClass_df$Type[datavar_auto]) # input user responses into output - Output$DataElement[datavar_auto] <- selectDataClass_df$Label[datavar_auto] Output$Domain_code[datavar_auto] <- decision_output$decision Output$Note[datavar_auto] <- decision_output$decision_note } @@ -271,7 +270,6 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) { # collect user responses decision_output <- user_categorisation(selectDataClass_df$Label[datavar_not_auto],selectDataClass_df$Description[datavar_not_auto],selectDataClass_df$Type[datavar_not_auto]) # input user responses into output - Output$DataElement[datavar_not_auto] <- selectDataClass_df$Label[datavar_not_auto] Output$Domain_code[datavar_not_auto] <- decision_output$decision Output$Note[datavar_not_auto] <- decision_output$decision_note } From 7dd20c01d010f1d14e08a4a8469b797f67a1296c Mon Sep 17 00:00:00 2001 From: Mahwish Mohammad <43926907+Rainiefantasy@users.noreply.github.com> Date: Thu, 29 Feb 2024 15:11:49 +0000 Subject: [PATCH 14/16] Update README.md adding library(devtools) line to import library, line 74 --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 9ee59cb1..af2cd2b5 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,7 @@ Run in the R console: ``` r install.packages("devtools") +library(devtools) devtools::install_github("aim-rsf/browseMetadata") ``` From 0ebc0accbf6277f15b5ca8d3ec8d0effaf8d022d Mon Sep 17 00:00:00 2001 From: Mahwish Mohammad <43926907+Rainiefantasy@users.noreply.github.com> Date: Thu, 29 Feb 2024 15:38:46 +0000 Subject: [PATCH 15/16] Update README.md removed line 74 --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index af2cd2b5..9ee59cb1 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,6 @@ Run in the R console: ``` r install.packages("devtools") -library(devtools) devtools::install_github("aim-rsf/browseMetadata") ``` From b1654f20b7f73b81af3e4a2fecff2da2c3e5c81c Mon Sep 17 00:00:00 2001 From: Rachael Stickland <50215726+RayStick@users.noreply.github.com> Date: Mon, 4 Mar 2024 16:42:19 +0000 Subject: [PATCH 16/16] for MM --- R/domain_mapping.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/domain_mapping.R b/R/domain_mapping.R index 40884ef4..2292463f 100755 --- a/R/domain_mapping.R +++ b/R/domain_mapping.R @@ -50,6 +50,9 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) { domains_extend <- rbind(c("*NO MATCH / UNSURE*"), c("*METADATA*"), c("*ALF ID*"), c("*OTHER ID*"), c("*DEMOGRAPHICS*"), domains) gridExtra::grid.table(domains_extend[1], cols = "Domain", rows = 0:(nrow(domains_extend) - 1)) + # temp - delete later + cat("\n You are in the improve-auto branch \n") + # Get user and demo list info for log file ---- User_Initials <- "" while (User_Initials == "") {