diff --git a/R/domain_mapping.R b/R/domain_mapping.R index 8952b53c..eb688b1c 100755 --- a/R/domain_mapping.R +++ b/R/domain_mapping.R @@ -60,7 +60,7 @@ domain_mapping <- function(json_file = NULL, domain_file = NULL, look_up_file = ## Present domains plots panel for user's reference ---- colnames(domains)[1] = "Domain Name" graphics::plot.new() - domains_extend <- rbind(c("*NO MATCH / UNSURE*"), c("*METADATA*"), c("*ALF ID*"), c("*OTHER ID*"), c("*DEMOGRAPHICS*"), domains) + domains_extend <- rbind(c("*NO MATCH / UNSURE*"), c("*METADATA*"), c("*ID*"), c("*DEMOGRAPHICS*"), domains) Code <- data.frame(Code = 0:(nrow(domains_extend) - 1)) Domain_table <- tableGrob(cbind(Code,domains_extend),rows = NULL,theme = ttheme_default()) grid.arrange(Domain_table,nrow=1,ncol=1) diff --git a/R/user_categorisation.R b/R/user_categorisation.R index a1fe97b5..d275067b 100644 --- a/R/user_categorisation.R +++ b/R/user_categorisation.R @@ -12,25 +12,39 @@ user_categorisation <- function(data_element,data_desc,data_type) { - # print text to R console - cat(paste( - "\nDATA ELEMENT -----> ", data_element, - "\n\nDESCRIPTION -----> ", data_desc, - "\n\nDATA TYPE -----> ", data_type, "\n" - )) - - # ask user for categorisation - - decision <- "" - cat("\n \n") - while (decision == "") { - decision <- readline("Categorise this data element into one or more domains, e.g. 5 or 5,8: ") - } + first_run = TRUE + go_back = '' - # ask user for note on categorisation - cat("\n \n") - decision_note <- readline("Optional note to explain decision (or press enter to continue): ") + while (go_back == "Y" | go_back == "y" | first_run == TRUE) { - return(list(decision = decision,decision_note = decision_note)) + go_back = '' + # print text to R console + cat(paste( + "\nDATA ELEMENT -----> ", data_element, + "\n\nDESCRIPTION -----> ", data_desc, + "\n\nDATA TYPE -----> ", data_type, "\n" + )) + + # ask user for categorisation + + decision <- "" + cat("\n \n") + while (decision == "") { + decision <- readline("Categorise data element into domain(s). E.g. 3 or 3,4: ") + } + + # ask user for note on categorisation + cat("\n \n") + decision_note <- readline("Categorisation note (or press enter to continue): ") + while (go_back != "Y" & go_back != "y" & go_back != "N" & go_back != "n") { + cat("\n \n") + go_back <- readline(prompt = "Re-do last categorisation? (y/n): ") + } + + first_run = FALSE } + + return(list(decision = decision,decision_note = decision_note)) + +} diff --git a/README.md b/README.md index 69f159b6..5deb8ea8 100644 --- a/README.md +++ b/README.md @@ -88,21 +88,20 @@ Take note of the **Plots** tab in R Studio which should show a table of domains - [0] *NO MATCH / UNSURE* - [1] *METADATA* -- [2] *ALF ID* -- [3] *OTHER ID* -- [4] *DEMOGRAPHICS* -- [5] Socioeconomic info -- [6] Location info -- [7] Education info -- [8] Health info +- [2] *ID* +- [3] *DEMOGRAPHICS* +- [4] Socioeconomic info +- [5] Location info +- [6] Education info +- [7] Health info -Reference this Plots tab throughout the demo run. You will be asked to label data elements with one (or more) of these numbers [0-8]. +Reference this Plots tab throughout the demo run. You will be asked to label data elements with one (or more) of these numbers [0-7]. -Here we have very simple domains [5-8] for the demo run. +Here we have very simple domains [4-7] for the demo run. For a research study, your domains are likely to be much more specific e.g. 'Prenatal, antenatal, neonatal and birth' or 'Health behaviours and diet'. -The 5 default domains are always included [0-4], appended on to any domain list given. +The 4 default domains are always included [0-3], appended on to any domain list given. ``` ✔ Running domain_mapping in demo mode using package data files @@ -219,17 +218,26 @@ DESCRIPTION -----> APGAR 1 score. This is a measure of a baby's physical state DATA TYPE -----> CHARACTER -Categorise this data element into one or more domains, e.g. 5 or 5,8: 8 +Categorise data element into domain(s). E.g. 3 or 3,4: 7 + +Categorisation note (or press enter to continue): your note here -Optional note to explain decision (or press enter to continue): ``` -We chose to respond with '8' because that corresponds to the 'Health' domain in the table. More than one domain can be chosen. +We chose to respond with '7' because that corresponds to the 'Health info' domain in the table. More than one domain can be chosen. A note can be included to explain why a categorisation has been made. Or press enter for no note. +You have the option to re-do the categorisation you just made, by replying 'y' to the question: + +``` +Re-do last categorisation? (y/n): y +``` + After completing 20, it will then ask you to review the auto-categorisations it made. +These auto-categorisations are based on the mappings included in the [data-raw/look_up.csv](data-raw/look_up.csv). This look-up file can be changed (see the section 'Using your own input files' below). ALF refers to 'Anonymous Linking Field' - this field is used within datasets that have been anonymised and encrypted for inclusion within SAIL Databank. + ``` ! Please check the auto categorised data elements are accurate for table CHILD: @@ -238,7 +246,7 @@ After completing 20, it will then ask you to review the auto-categorisations it 2 ALF_MTCH_PCT 2 AUTO CATEGORISED 3 ALF_STS_CD 2 AUTO CATEGORISED 6 AVAIL_FROM_DT 1 AUTO CATEGORISED -19 GNDR_CD 4 AUTO CATEGORISED +19 GNDR_CD 3 AUTO CATEGORISED ℹ Press enter to accept the auto categorisations for table CHILD or enter each row you'd like to edit: @@ -251,21 +259,21 @@ Press enter for now. It will then ask you if you want to review the categorisati Would you like to review your categorisations? (y/n): y DataElement Domain_code Note -4 APGAR_1 8 -5 APGAR_2 8 -7 BIRTH_ORDER 8 10% missingness -8 BIRTH_TM 1,8 20% missingness -9 BIRTH_WEIGHT 8 -10 BIRTH_WEIGHT_DEC 8 -11 BREASTFEED_8_WKS_FLG 8 -12 BREASTFEED_BIRTH_FLG 8 -13 CHILD_ID_E 3 -14 CURR_LHB_CD_BIRTH 6,8 Place of birth -15 DEL_CD 8 -16 DOD 4,8 -17 ETHNIC_GRP_CD 4 -18 GEST_AGE 4,8 -20 HEALTH_VISITOR_CD_E 3 +4 APGAR_1 7 +5 APGAR_2 7 +7 BIRTH_ORDER 7 10% missingness +8 BIRTH_TM 1,7 20% missingness +9 BIRTH_WEIGHT 7 +10 BIRTH_WEIGHT_DEC 7 +11 BREASTFEED_8_WKS_FLG 7 +12 BREASTFEED_BIRTH_FLG 7 +13 CHILD_ID_E 2 +14 CURR_LHB_CD_BIRTH 5,7 Place of birth +15 DEL_CD 7 +16 DOD 3,7 +17 ETHNIC_GRP_CD 3 +18 GEST_AGE 3,7 +20 HEALTH_VISITOR_CD_E 2 ℹ Press enter to accept your categorisations for table CHILD, or enter each row number you'd like to edit: @@ -325,9 +333,9 @@ The json file: The domain_file: -- a csv file created by the user, with each domain listed on a separate line +- a csv file created by the user, with each domain listed on a separate line, no header - see [data-raw/domain_list_demo.csv](data-raw/domain_list_demo.csv) for a template -- the first 5 domains will be auto populated (see demo above) +- the first 4 domains will be auto populated (see demo above) The lookup file: diff --git a/data-raw/look_up.csv b/data-raw/look_up.csv index c72ac04c..da22883f 100644 --- a/data-raw/look_up.csv +++ b/data-raw/look_up.csv @@ -1,28 +1,28 @@ DataElement,DomainLabel,DomainCode NA,No Match / Unsure,0 AVAIL_FROM_DT,Metadata,1 -ALF_E,ALF ID,2 -MOTHER_ALF_E,ALF ID,2 -CHILD_ALF_E,ALF ID,2 -RALF,ALF ID,2 -ALF_STS_CD,ALF ID,2 -MOTHER_ALF_STS_CD,ALF ID,2 -CHILD_ALF_STS_CD,ALF ID,2 -ALF_MTCH_PCT,ALF ID,2 -MOTHER_ALF_MTCH_PCT,ALF ID,2 -CHILD_ALF_MTCH_PCT,ALF ID,2 -SERVICE_USER_LOCAL_ID_E,OTHER ID,3 -MAT_SERVICE_USER_LOCAL_ID_E,OTHER ID,3 -CLIENT_ID_E,OTHER ID,3 -AGE,Demographics,4 -MAT_AGE,Demographics,4 -MAT_AGE_AT_ASS,Demographics,4 -CONTACT_AGE,Demographics,4 -WOB,Demographics,4 -MAT_WOB,Demographics,4 -SEX,Demographics,4 -SERVICE_USER_SEX_CD,Demographics,4 -NENONATE_SEX_CD,Demographics,4 -GENDER,Demographics,4 -GNDR,Demographics,4 -GNDR_CD,Demographics,4 \ No newline at end of file +ALF_E,ID,2 +MOTHER_ALF_E,ID,2 +CHILD_ALF_E,ID,2 +RALF,ID,2 +ALF_STS_CD,ID,2 +MOTHER_ALF_STS_CD,ID,2 +CHILD_ALF_STS_CD,ID,2 +ALF_MTCH_PCT,ID,2 +MOTHER_ALF_MTCH_PCT,ID,2 +CHILD_ALF_MTCH_PCT,ID,2 +SERVICE_USER_LOCAL_ID_E,ID,2 +MAT_SERVICE_USER_LOCAL_ID_E,ID,2 +CLIENT_ID_E,ID,2 +AGE,Demographics,3 +MAT_AGE,Demographics,3 +MAT_AGE_AT_ASS,Demographics,3 +CONTACT_AGE,Demographics,3 +WOB,Demographics,3 +MAT_WOB,Demographics,3 +SEX,Demographics,3 +SERVICE_USER_SEX_CD,Demographics,3 +NENONATE_SEX_CD,Demographics,3 +GENDER,Demographics,3 +GNDR,Demographics,3 +GNDR_CD,Demographics,3 \ No newline at end of file diff --git a/data/look_up.rda b/data/look_up.rda index 4a3b8aa7..2b1e3d46 100644 Binary files a/data/look_up.rda and b/data/look_up.rda differ