Skip to content

Commit

Permalink
Merge pull request #67 from aim-rsf/improve-auto
Browse files Browse the repository at this point in the history
Improve manual checking of categorisations
  • Loading branch information
RayStick authored Mar 6, 2024
2 parents a177547 + b1654f2 commit 33ed942
Show file tree
Hide file tree
Showing 6 changed files with 231 additions and 87 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(domain_mapping)
export(user_categorisation)
import(cli)
import(devtools)
import(grid)
Expand Down
124 changes: 82 additions & 42 deletions R/domain_mapping.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#' # Respond 'Demo List ' for the description of domain list.
#' # Respond 'Y' if you want to see the descriptions printed out.
#' # Respond '1,10' to the RANGE OF VARIABLES prompt (or process the full 93 variables if you like!)
#' # Reference the plot tab and categorise each variable into a single ('1')
#' # or multiple ('1,2') domain.
#' # Reference the plot tab and categorise each variable into a single ('1') domain
#' # or multiple ('1,2') domains.
#' # Write a note explaining your category choice (optional).
#' @export
#' @importFrom graphics plot.new
Expand Down Expand Up @@ -50,11 +50,14 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) {
domains_extend <- rbind(c("*NO MATCH / UNSURE*"), c("*METADATA*"), c("*ALF ID*"), c("*OTHER ID*"), c("*DEMOGRAPHICS*"), domains)
gridExtra::grid.table(domains_extend[1], cols = "Domain", rows = 0:(nrow(domains_extend) - 1))

# temp - delete later
cat("\n You are in the improve-auto branch \n")

# Get user and demo list info for log file ----
User_Initials <- ""
while (User_Initials == "") {
cat("\n \n")
User_Initials <- readline(prompt = "ENTER INITIALS: ")
User_Initials <- readline(prompt = "Enter Initials: ")
}

# Print information about Data Asset ----
Expand Down Expand Up @@ -135,7 +138,7 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) {
# User inputs ----

cat("\n \n")
select_vars_n <- readline(prompt = "RANGE OF VARIABLES (DATA ELEMENTS) TO PROCESS (write as 'start_var,end_var' or press Enter to process all): ")
select_vars_n <- readline(prompt = "Enter the range of variables (data elements) to process. Press Enter to process all: ")
if (select_vars_n == "") {
start_var <- 1
end_var <- length(thisDataClass)
Expand Down Expand Up @@ -173,7 +176,6 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) {
Output$Domain_code[datavar] <- "2"
Output$Note[datavar] <- "AUTO CATEGORISED"
} else if (grepl("_ID_", selectDataClass_df$Label[datavar], ignore.case = TRUE)) { # picking up generic IDs

Output[nrow(Output) + 1, ] <- NA
Output$DataElement[datavar] <- selectDataClass_df$Label[datavar]
Output$Domain_code[datavar] <- "3"
Expand Down Expand Up @@ -201,50 +203,88 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL) {
Output$Domain_code[datavar] <- "4"
Output$Note[datavar] <- "AUTO CATEGORISED"
} else {
# user response
cat(paste(
"\nDATA ELEMENT -----> ", selectDataClass_df$Label[datavar],
"\n\nDESCRIPTION -----> ", selectDataClass_df$Description[datavar],
"\n\nDATA TYPE -----> ", selectDataClass_df$Type[datavar], "\n"
))

decision <- ""
while (decision == "") {
cat("\n \n")
decision <- readline(prompt = "CATEGORISE THIS VARIABLE (input a comma separated list of domain numbers): ")
}

decision_note <- ""
while (decision_note == "") {
cat("\n \n")
decision_note <- readline(prompt = "NOTES (write 'N' if no notes): ")
}

# collect user responses
decision_output <- user_categorisation(selectDataClass_df$Label[datavar],selectDataClass_df$Description[datavar],selectDataClass_df$Type[datavar])
# input user responses into output
Output[nrow(Output) + 1, ] <- NA
Output$DataElement[datavar] <- selectDataClass_df$Label[datavar]
Output$Domain_code[datavar] <- decision
Output$Note[datavar] <- decision_note
Output$Domain_code[datavar] <- decision_output$decision
Output$Note[datavar] <- decision_output$decision_note
}

# Fill in columns that have all rows identical
Output$Initials <- User_Initials
Output$MetaDataVersion <- meta_json$dataModel$documentationVersion
Output$MetaDataLastUpdated <- meta_json$dataModel$lastUpdated
Output$DomainListDesc <- DomainListDesc
Output$DataAsset <- meta_json$dataModel$label
Output$DataClass <- meta_json$dataModel$childDataClasses[[dc]]$label

# Save as we go in case session terminates prematurely
Output[Output == ""] <- NA
utils::write.csv(Output, output_fname, row.names = FALSE) # save as we go in case session terminates prematurely
} # end of loop for variable

# Print the AUTO CATEGORISED responses for this DataClass - request review
Output_auto <- subset(Output, Note == 'AUTO CATEGORISED')
cat("\n \n")
cli_alert_warning("Please check the auto categorised data elements are accurate:")
cat("\n \n")
print(Output_auto[, c("DataClass", "DataElement", "Domain_code")])
cat("\n \n")
auto_row_str <- readline(prompt = "Enter row numbers you'd like to edit or press enter to accept the auto categorisations: ")

if (auto_row_str != "") {

auto_row <- as.integer(unlist(strsplit(auto_row_str,","))) #probably sub-optimal coding

for (datavar_auto in auto_row) {

# collect user responses
decision_output <- user_categorisation(selectDataClass_df$Label[datavar_auto],selectDataClass_df$Description[datavar_auto],selectDataClass_df$Type[datavar_auto])
# input user responses into output
Output$Domain_code[datavar_auto] <- decision_output$decision
Output$Note[datavar_auto] <- decision_output$decision_note
}
}

# Fill in columns that have all rows identical
Output$Initials <- User_Initials
Output$MetaDataVersion <- meta_json$dataModel$documentationVersion
Output$MetaDataLastUpdated <- meta_json$dataModel$lastUpdated
Output$DomainListDesc <- DomainListDesc
Output$DataAsset <- meta_json$dataModel$label
Output$DataClass <- meta_json$dataModel$childDataClasses[[dc]]$label
# Ask if user wants to review their responses for this DataClass
review_cats <- ""
while (review_cats != "Y" & review_cats != "N") {
cat("\n \n")
review_cats <- readline(prompt = "Would you like to review your categorisations? (Y/N) ")
}

if (review_cats == 'Y') {

Output_not_auto <- subset(Output, Note != 'AUTO CATEGORISED')
cat("\n \n")
print(Output_not_auto[, c("DataClass", "DataElement", "Domain_code")])
cat("\n \n")
not_auto_row_str <- readline(prompt = "Enter row numbers you'd like to edit or press enter to accept: ")

if (not_auto_row_str != "") {

not_auto_row <- as.integer(unlist(strsplit(not_auto_row_str,","))) #probably sub-optimal coding

for (datavar_not_auto in not_auto_row) {

# Save file & print the responses to be saved
# collect user responses
decision_output <- user_categorisation(selectDataClass_df$Label[datavar_not_auto],selectDataClass_df$Description[datavar_not_auto],selectDataClass_df$Type[datavar_not_auto])
# input user responses into output
Output$Domain_code[datavar_not_auto] <- decision_output$decision
Output$Note[datavar_not_auto] <- decision_output$decision_note
}
}
}

# Save final categorisations for this DataClass
Output[Output == ""] <- NA
utils::write.csv(Output, output_fname, row.names = FALSE) # save as we go in case session terminates prematurely
utils::write.csv(Output, output_fname, row.names = FALSE)
cat("\n")
cli_alert_info("The below responses will be saved to {output_fname}")
cat("\n")
print(Output[, c("DataClass", "DataElement", "Domain_code", "Note")])
}
cli_alert_info("Your final categorisations have been saved to {output_fname}")

} # end of loop for each data class

cat("\n \n")
cli_alert_warning("Please check the auto categorised data elements are accurate!")
cli_alert_warning("Manually edit csv file to correct errors, if needed.")
}
} # end of function
47 changes: 47 additions & 0 deletions R/user_categorisation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#' user_categorisation
#'
#' This function is used within the domain_mapping function. \cr \cr
#' It displays data properties to the user and requests a categorisation into a domain. \cr \cr
#' An optional note can be included with the categorisation.
#'
#' @param data_element Name of the variable
#' @param data_desc Description of the variable
#' @param data_type Data type of the variable
#' @return It returns a list containing the decision and decision note
#' @export

user_categorisation <- function(data_element,data_desc,data_type) {

# print text to R console
cat(paste(
"\nDATA ELEMENT -----> ", data_element,
"\n\nDESCRIPTION -----> ", data_desc,
"\n\nDATA TYPE -----> ", data_type, "\n"
))

state <- "redo"
while (state == "redo") {

# ask user for categorisation
decision <- ""
while (decision == "") {
cat("\n \n")
decision <- readline(prompt = "Categorise this variable: ")
}

# ask user for note on categorisation
decision_note <- ""
while (decision_note == "") {
cat("\n \n")
decision_note <- readline(prompt = "Notes (write 'N' if no notes): ")
}

# check if user wants to continue or redo
cat("\n \n")
state <- readline(prompt = "Press enter to continue or write 'redo' to correct previous answer: ")

}

return(list(decision = decision,decision_note = decision_note))

}
Loading

0 comments on commit 33ed942

Please sign in to comment.