From 6030f00caa77c30171167a9ab319097a0c3aab75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nils=20M=C3=BCller-Schee=C3=9Fel?= Date: Sun, 7 Jul 2019 23:30:39 +0200 Subject: [PATCH 1/6] Added monotonic spline-function for dx-values. --- DESCRIPTION | 3 +- NAMESPACE | 1 + R/analytical_functions.R | 60 +++++++++++++++++++++++++++++++++++++--- man/life.table.Rd | 10 ++++++- 4 files changed, 68 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 56dc18f..949b1f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,8 @@ Imports: magrittr (>= 1.5), Rdpack (>= 0.4-20), reshape2 (>= 1.4.2), - methods (>= 3.3.3) + methods (>= 3.3.3), + demography (>= 1.22) RoxygenNote: 6.1.1 Suggests: testthat, diff --git a/NAMESPACE b/NAMESPACE index eb99f77..2481bf4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(is.mortaar_life_table_list) export(life.table) export(prep.life.table) importFrom(Rdpack,reprompt) +importFrom(demography,cm.spline) importFrom(graphics,axis) importFrom(graphics,grid) importFrom(graphics,legend) diff --git a/R/analytical_functions.R b/R/analytical_functions.R index c1973da..61544f9 100644 --- a/R/analytical_functions.R +++ b/R/analytical_functions.R @@ -43,6 +43,13 @@ #' #' Default setup is 1/3 for every age class <= 5 life years, and 1/2 for the others. #' +#' @param option_spline integer, optional. If > 0, values for adults will be +#' interpolated by a monotonic cubic spline. Usual options will by '10' or '20' which +#' will interpolate the values for individuals of an age of 20 or older by 10- or 20- +#' year cumulated values. To be used carefully, as diagnostic features of the life +#' table might be smoothed and essentially removed. Only available when the methods +#' 'Standard' or 'Equal5' in prep.life.table have been chosen. +#' #' @return #' An object of class mortaar_life_table or mortaar_life_table_list. #' Each mortaar_life_table contains the following variables: @@ -123,9 +130,10 @@ #' #' @importFrom magrittr "%>%" #' @importFrom Rdpack reprompt +#' @importFrom demography cm.spline #' #' @export -life.table <- function(neclist, agecor = TRUE, agecorfac = c()) { +life.table <- function(neclist, agecor = TRUE, agecorfac = c(), option_spline = NULL) { # Check if the input list is a data.frame, if so, it is # packed into a list. @@ -150,7 +158,7 @@ life.table <- function(neclist, agecor = TRUE, agecorfac = c()) { lapply(., function(necdf) { vars <- colnames(necdf)[colnames(necdf) %in% okvars] life.table.df( - necdf[,vars], agecor = agecor, agecorfac = agecorfac) + necdf[,vars], agecor = agecor, agecorfac = agecorfac, option_spline = option_spline) } ) %>% `class<-`(c("mortaar_life_table_list", class(.))) -> res @@ -159,7 +167,7 @@ life.table <- function(neclist, agecor = TRUE, agecorfac = c()) { necdf <- neclist[[1]] vars <- colnames(necdf)[colnames(necdf) %in% okvars] life.table.df( - necdf[,vars], agecor = agecor, agecorfac = agecorfac + necdf[,vars], agecor = agecor, agecorfac = agecorfac, option_spline = option_spline ) -> res } @@ -244,7 +252,7 @@ inputchecks <- function(neclist, okvars) { } -life.table.df <- function(necdf, agecor = TRUE, agecorfac = c()) { +life.table.df <- function(necdf, agecor = TRUE, agecorfac = c(), option_spline = NULL) { # x: well readable rownames for age classes. limit <- necdf['a'] %>% sum @@ -269,9 +277,53 @@ life.table.df <- function(necdf, agecor = TRUE, agecorfac = c()) { ) } + # dx: proportion of deaths within x. necdf['dx'] <- necdf['Dx'] / sum(necdf['Dx']) * 100 + + # in case of spline-interpolation, dx-values will be replaced with interpolated once + + if (length(option_spline) > 0) { + + a_cumsum <- necdf['a'] %>% cumsum + + # add index number to resulting data.frame + a_cumsum$id <- 1:nrow(a_cumsum) + + repeat_number <- floor((limit - 20) / option_spline) + + a_cumsum_select <- a_cumsum$id[which(a_cumsum$a <= 20)] + + for (t in 1 : repeat_number) { + a_cumsum_select <- c(a_cumsum_select, (a_cumsum$id[which(a_cumsum$a == (20 + (t * option_spline)))])) + } + + if (((limit - 20) / option_spline) - floor((limit - 20) / option_spline) > 0) { + a_cumsum_select <- c(a_cumsum_select, a_cumsum$id[nrow(necdf)]) + } + + dx_cumsum <- c(cumsum(necdf[, 'dx']))[1:nrow(necdf)] + y_spline <- dx_cumsum[a_cumsum_select] + x_spline <- a_cumsum$a[a_cumsum_select] + + # interpolating the values with a monotonic cubic spline + dem <- cm.spline(x_spline, y_spline, n = (limit/5 + 1), xmin = 0, xmax = limit) + + # the first value of the interpolation has to be discarded and replaced + if (nrow(necdf) < length(dem$y)) { + dem_dx <- dem$y[c(-1)] + } + else + { + dem_dx <- c(necdf$dx[1], dem$y[c(-1)]) + } + + necdf['dx'] <- dem_dx - c(0, dem_dx[-nrow(necdf)]) + + } + + # lx: proportion of survivorship within x. necdf['lx'] <- c(100, 100 - cumsum(necdf[, 'dx']) )[1:nrow(necdf)] diff --git a/man/life.table.Rd b/man/life.table.Rd index 95cdd10..819b4c6 100644 --- a/man/life.table.Rd +++ b/man/life.table.Rd @@ -4,7 +4,8 @@ \alias{life.table} \title{Calculates a life table} \usage{ -life.table(neclist, agecor = TRUE, agecorfac = c()) +life.table(neclist, agecor = TRUE, agecorfac = c(), + option_spline = NULL) } \arguments{ \item{neclist}{single data.frame or list of data.frames @@ -28,6 +29,13 @@ Default setup is: TRUE.} replace the standard values from the first age interval onward. Default setup is 1/3 for every age class <= 5 life years, and 1/2 for the others.} + +\item{option_spline}{integer, optional. If > 0, values for adults will be +interpolated by a monotonic cubic spline. Usual options will by '10' or '20' which +will interpolate the values for individuals of an age of 20 or older by 10- or 20- +year cumulated values. To be used carefully, as diagnostic features of the life +table might be smoothed and essentially removed. Only available when the methods +'Standard' or 'Equal5' in prep.life.table have been chosen.} } \value{ An object of class mortaar_life_table or mortaar_life_table_list. From e622b2b4510fff233878d5a79d4f2e88fd8c6102 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nils=20M=C3=BCller-Schee=C3=9Fel?= Date: Tue, 9 Jul 2019 22:31:55 +0200 Subject: [PATCH 2/6] Removed dependency of package "demography" as suggested --- DESCRIPTION | 3 +-- R/analytical_functions.R | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 949b1f9..56dc18f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,8 +27,7 @@ Imports: magrittr (>= 1.5), Rdpack (>= 0.4-20), reshape2 (>= 1.4.2), - methods (>= 3.3.3), - demography (>= 1.22) + methods (>= 3.3.3) RoxygenNote: 6.1.1 Suggests: testthat, diff --git a/R/analytical_functions.R b/R/analytical_functions.R index 61544f9..c05a748 100644 --- a/R/analytical_functions.R +++ b/R/analytical_functions.R @@ -130,7 +130,6 @@ #' #' @importFrom magrittr "%>%" #' @importFrom Rdpack reprompt -#' @importFrom demography cm.spline #' #' @export life.table <- function(neclist, agecor = TRUE, agecorfac = c(), option_spline = NULL) { @@ -308,7 +307,7 @@ life.table.df <- function(necdf, agecor = TRUE, agecorfac = c(), option_spline = x_spline <- a_cumsum$a[a_cumsum_select] # interpolating the values with a monotonic cubic spline - dem <- cm.spline(x_spline, y_spline, n = (limit/5 + 1), xmin = 0, xmax = limit) + dem <- stats::spline(x_spline, y_spline, n = (limit/5 + 1), xmin = 0, xmax = limit, method = "hyman") # the first value of the interpolation has to be discarded and replaced if (nrow(necdf) < length(dem$y)) { From 854c2214e6a54ff7204a4781249d3395788e64c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nils=20M=C3=BCller-Schee=C3=9Fel?= Date: Sun, 14 Jul 2019 16:22:04 +0200 Subject: [PATCH 3/6] Spline-option now refactored and check added for making sure that only standard-options are used for creation of life table. --- NAMESPACE | 1 - R/analytical_functions.R | 82 +++++++++++++++++++++++++--------------- 2 files changed, 52 insertions(+), 31 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 2481bf4..eb99f77 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,7 +17,6 @@ export(is.mortaar_life_table_list) export(life.table) export(prep.life.table) importFrom(Rdpack,reprompt) -importFrom(demography,cm.spline) importFrom(graphics,axis) importFrom(graphics,grid) importFrom(graphics,legend) diff --git a/R/analytical_functions.R b/R/analytical_functions.R index c05a748..ad78896 100644 --- a/R/analytical_functions.R +++ b/R/analytical_functions.R @@ -251,39 +251,14 @@ inputchecks <- function(neclist, okvars) { } -life.table.df <- function(necdf, agecor = TRUE, agecorfac = c(), option_spline = NULL) { - - # x: well readable rownames for age classes. - limit <- necdf['a'] %>% sum - lower <- c(0, necdf[, 'a'] %>% cumsum)[1:nrow(necdf)] - upper <- necdf[, 'a'] %>% cumsum %>% `-`(1) - xvec <- paste0(lower, "--", upper) - - if ("x" %in% colnames(necdf) %>% `!`) { - necdf <- cbind( - x = xvec, - necdf, - stringsAsFactors = FALSE - ) - } else if ( - "x" %in% colnames(necdf) && - (necdf[, 'x'] != xvec) %>% all - ) { - necdf <- cbind( - x_auto = xvec, - necdf, - stringsAsFactors = FALSE - ) +dx_default <- function(Dx) { + Dx / sum(Dx) * 100 } - # dx: proportion of deaths within x. - necdf['dx'] <- necdf['Dx'] / sum(necdf['Dx']) * 100 +dx_spline <- function(necdf, a, Dx, limit, option_spline) { - - # in case of spline-interpolation, dx-values will be replaced with interpolated once - - if (length(option_spline) > 0) { + necdf['dx'] <- dx_default(necdf['Dx']) a_cumsum <- necdf['a'] %>% cumsum @@ -315,14 +290,61 @@ life.table.df <- function(necdf, agecor = TRUE, agecorfac = c(), option_spline = } else { - dem_dx <- c(necdf$dx[1], dem$y[c(-1)]) + dem_dx <- c(necdf$dx[1], dem$y[c(-1)]) } necdf['dx'] <- dem_dx - c(0, dem_dx[-nrow(necdf)]) + return(necdf) + +} + + +life.table.df <- function(necdf, agecor = TRUE, agecorfac = c(), option_spline = NULL) { + + # x: well readable rownames for age classes. + limit <- necdf['a'] %>% sum + lower <- c(0, necdf[, 'a'] %>% cumsum)[1:nrow(necdf)] + upper <- necdf[, 'a'] %>% cumsum %>% `-`(1) + xvec <- paste0(lower, "--", upper) + + if ("x" %in% colnames(necdf) %>% `!`) { + necdf <- cbind( + x = xvec, + necdf, + stringsAsFactors = FALSE + ) + } else if ( + "x" %in% colnames(necdf) && + (necdf[, 'x'] != xvec) %>% all + ) { + necdf <- cbind( + x_auto = xvec, + necdf, + stringsAsFactors = FALSE + ) } + # in case of spline-interpolation, dx-values will be replaced with interpolated once + + if (!(length(option_spline) > 0)) { + necdf['dx'] <- dx_default(necdf['Dx']) + } else { + unique_a <- c(unique(necdf['a'])) + unique_a <- unlist(unique_a$a) + if (!(((length(unique_a) == 1) & (5 %in% unique_a)) || + ((length(unique_a) == 3) & ((5 %in% unique_a[3]) + & (4 %in% unique_a[2]) + & (1 %in% unique_a[1]))) + )) { + stop("Spline-interpolation works only with 5-year-age classes (or 1- and 4-year classes + for the first 5 years). Please take a look at ?life.table to determine how your + input data should look like for the option_spline option.") + } + necdf['dx'] <- dx_spline(necdf, necdf['a'], necdf['Dx'], limit, option_spline) + } + # lx: proportion of survivorship within x. necdf['lx'] <- c(100, 100 - cumsum(necdf[, 'dx']) )[1:nrow(necdf)] From 545e6a99df1ddbb9d80b98609f66ee42f518a835 Mon Sep 17 00:00:00 2001 From: nevrome Date: Sun, 21 Jul 2019 21:55:10 +0200 Subject: [PATCH 4/6] some refactoring --- R/analytical_functions.R | 119 ++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 59 deletions(-) diff --git a/R/analytical_functions.R b/R/analytical_functions.R index ad78896..d3d4704 100644 --- a/R/analytical_functions.R +++ b/R/analytical_functions.R @@ -181,6 +181,8 @@ life.table <- function(neclist, agecor = TRUE, agecorfac = c(), option_spline = return(res) } +#### input checks #### + inputchecks <- function(neclist, okvars) { # Checks if the input is a list. @@ -251,54 +253,7 @@ inputchecks <- function(neclist, okvars) { } -dx_default <- function(Dx) { - Dx / sum(Dx) * 100 - } - - -dx_spline <- function(necdf, a, Dx, limit, option_spline) { - - necdf['dx'] <- dx_default(necdf['Dx']) - - a_cumsum <- necdf['a'] %>% cumsum - - # add index number to resulting data.frame - a_cumsum$id <- 1:nrow(a_cumsum) - - repeat_number <- floor((limit - 20) / option_spline) - - a_cumsum_select <- a_cumsum$id[which(a_cumsum$a <= 20)] - - for (t in 1 : repeat_number) { - a_cumsum_select <- c(a_cumsum_select, (a_cumsum$id[which(a_cumsum$a == (20 + (t * option_spline)))])) - } - - if (((limit - 20) / option_spline) - floor((limit - 20) / option_spline) > 0) { - a_cumsum_select <- c(a_cumsum_select, a_cumsum$id[nrow(necdf)]) - } - - dx_cumsum <- c(cumsum(necdf[, 'dx']))[1:nrow(necdf)] - y_spline <- dx_cumsum[a_cumsum_select] - x_spline <- a_cumsum$a[a_cumsum_select] - - # interpolating the values with a monotonic cubic spline - dem <- stats::spline(x_spline, y_spline, n = (limit/5 + 1), xmin = 0, xmax = limit, method = "hyman") - - # the first value of the interpolation has to be discarded and replaced - if (nrow(necdf) < length(dem$y)) { - dem_dx <- dem$y[c(-1)] - } - else - { - dem_dx <- c(necdf$dx[1], dem$y[c(-1)]) - } - - necdf['dx'] <- dem_dx - c(0, dem_dx[-nrow(necdf)]) - - return(necdf) - -} - +#### core algorithm #### life.table.df <- function(necdf, agecor = TRUE, agecorfac = c(), option_spline = NULL) { @@ -325,11 +280,9 @@ life.table.df <- function(necdf, agecor = TRUE, agecorfac = c(), option_spline = ) } - # in case of spline-interpolation, dx-values will be replaced with interpolated once - - if (!(length(option_spline) > 0)) { - necdf['dx'] <- dx_default(necdf['Dx']) + if (!(length(option_spline) > 0)) { + necdf['dx'] <- dx_default(necdf[['Dx']]) } else { unique_a <- c(unique(necdf['a'])) unique_a <- unlist(unique_a$a) @@ -337,17 +290,18 @@ life.table.df <- function(necdf, agecor = TRUE, agecorfac = c(), option_spline = ((length(unique_a) == 3) & ((5 %in% unique_a[3]) & (4 %in% unique_a[2]) & (1 %in% unique_a[1]))) - )) { - stop("Spline-interpolation works only with 5-year-age classes (or 1- and 4-year classes - for the first 5 years). Please take a look at ?life.table to determine how your - input data should look like for the option_spline option.") + )) { + paste0( + "Spline-interpolation works only with 5-year-age classes (or 1- and ", + "4-year classes for the first 5 years). Please take a look at ?life.table ", + "to determine how your input data should look like for the option_spline option." + ) %>% stop } - necdf['dx'] <- dx_spline(necdf, necdf['a'], necdf['Dx'], limit, option_spline) + necdf['dx'] <- dx_spline(necdf[['a']], necdf[['Dx']], limit, option_spline) } # lx: proportion of survivorship within x. - necdf['lx'] <- c(100, 100 - cumsum(necdf[, 'dx']) - )[1:nrow(necdf)] + necdf['lx'] <- c(100, 100 - cumsum(necdf[, 'dx']))[1:nrow(necdf)] # qx: probability of death within x. necdf['qx'] <- necdf['dx'] / necdf['lx'] * 100 @@ -422,3 +376,50 @@ life.table.df <- function(necdf, agecor = TRUE, agecorfac = c(), option_spline = `class<-`(c("mortaar_life_table", class(.))) %>% return() } + +#### helper functions #### + +## dx ## +dx_default <- function(Dx) { + Dx / sum(Dx) * 100 +} + +dx_spline <- function(a, Dx, limit, option_spline) { + + working_data <- data.frame( + id = 1:length(a), + a = a, + Dx = Dx, + a_cumsum = a %>% cumsum, + dx_default = Dx %>% dx_default + ) + + repeat_number <- floor((limit - 20) / option_spline) + + a_cumsum_select <- working_data$id[which(working_data$a <= 20)] + for (t in 1:repeat_number) { + a_cumsum_select <- c(a_cumsum_select, (working_data$id[which(working_data$a == (20 + (t * option_spline)))])) + } + if (((limit - 20) / option_spline) - floor((limit - 20) / option_spline) > 0) { + a_cumsum_select <- c(a_cumsum_select, working_data$id[length(a)]) + } + + x_spline <- working_data$a_cumsum[a_cumsum_select] + dx_cumsum <- c(cumsum(working_data$dx_default))[1:length(a)] + y_spline <- dx_cumsum[a_cumsum_select] + + # interpolating the values with a monotonic cubic spline + dem <- stats::spline(x_spline, y_spline, n = (limit/5 + 1), xmin = 0, xmax = limit, method = "hyman") + + # the first value of the interpolation has to be discarded and replaced + if (length(a) < length(dem$y)) { + dem_dx <- dem$y[c(-1)] + } else { + dem_dx <- c(working_data$dx_default[1], dem$y[c(-1)]) + } + + dx_approx <- dem_dx - c(0, dem_dx[-length(a)]) + + return(dx_approx) + +} From e76d717afccbe2b77d2351c5adb455ab4a3285b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nils=20M=C3=BCller-Schee=C3=9Fel?= Date: Mon, 22 Jul 2019 00:42:10 +0200 Subject: [PATCH 5/6] Made a small change in the variable naming, and the function seems to work again. --- R/analytical_functions.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/analytical_functions.R b/R/analytical_functions.R index d3d4704..da7b51b 100644 --- a/R/analytical_functions.R +++ b/R/analytical_functions.R @@ -396,9 +396,9 @@ dx_spline <- function(a, Dx, limit, option_spline) { repeat_number <- floor((limit - 20) / option_spline) - a_cumsum_select <- working_data$id[which(working_data$a <= 20)] + a_cumsum_select <- working_data$id[which(working_data$a_cumsum <= 20)] for (t in 1:repeat_number) { - a_cumsum_select <- c(a_cumsum_select, (working_data$id[which(working_data$a == (20 + (t * option_spline)))])) + a_cumsum_select <- c(a_cumsum_select, (working_data$id[which(working_data$a_cumsum == (20 + (t * option_spline)))])) } if (((limit - 20) / option_spline) - floor((limit - 20) / option_spline) > 0) { a_cumsum_select <- c(a_cumsum_select, working_data$id[length(a)]) From b288a9806ca64fa3dcc7a8081edc8a9724bbbad6 Mon Sep 17 00:00:00 2001 From: Clemens Schmid Date: Sat, 27 Jul 2019 17:48:27 +0200 Subject: [PATCH 6/6] implemented option_spline test a bit more elegant --- R/analytical_functions.R | 48 ++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/R/analytical_functions.R b/R/analytical_functions.R index da7b51b..b89d262 100644 --- a/R/analytical_functions.R +++ b/R/analytical_functions.R @@ -145,7 +145,7 @@ life.table <- function(neclist, agecor = TRUE, agecorfac = c(), option_spline = okvars <- c("x", "a", "Dx") # Check the input. - inputchecks(neclist, okvars) + inputchecks(neclist, okvars, option_spline) # Apply life.table.vec to every column of the input df # and create an output mortaar_life_table_list of @@ -183,7 +183,7 @@ life.table <- function(neclist, agecor = TRUE, agecorfac = c(), option_spline = #### input checks #### -inputchecks <- function(neclist, okvars) { +inputchecks <- function(neclist, okvars, option_spline) { # Checks if the input is a list. if (neclist %>% is.list %>% `!`) { @@ -251,6 +251,35 @@ inputchecks <- function(neclist, okvars) { ) %>% warning } + # Checks if the special conditions for the option_spline are met + if (length(option_spline) > 0) { + + option_spline_test <- function(necdf) { + unique_a <- c(unique(necdf['a'])) + unique_a <- unlist(unique_a$a) + return( + !(((length(unique_a) == 1) & (5 %in% unique_a)) || ( + (length(unique_a) == 3) & ( + (5 %in% unique_a[3]) & + (4 %in% unique_a[2]) & + (1 %in% unique_a[1]) + ) + ) + )) + } + + if (neclist %>% lapply(option_spline_test) %>% unlist %>% any) { + paste0( + "One of your data.frames does not fulfill the conditions for the spline ", + "creation option. ", + "Spline-interpolation works only with 5-year-age classes (or 1- and ", + "4-year classes for the first 5 years). Please take a look at ?life.table ", + "to determine how your input data should look like." + ) %>% stop + } + + } + } #### core algorithm #### @@ -283,20 +312,7 @@ life.table.df <- function(necdf, agecor = TRUE, agecorfac = c(), option_spline = # in case of spline-interpolation, dx-values will be replaced with interpolated once if (!(length(option_spline) > 0)) { necdf['dx'] <- dx_default(necdf[['Dx']]) - } else { - unique_a <- c(unique(necdf['a'])) - unique_a <- unlist(unique_a$a) - if (!(((length(unique_a) == 1) & (5 %in% unique_a)) || - ((length(unique_a) == 3) & ((5 %in% unique_a[3]) - & (4 %in% unique_a[2]) - & (1 %in% unique_a[1]))) - )) { - paste0( - "Spline-interpolation works only with 5-year-age classes (or 1- and ", - "4-year classes for the first 5 years). Please take a look at ?life.table ", - "to determine how your input data should look like for the option_spline option." - ) %>% stop - } + } else { necdf['dx'] <- dx_spline(necdf[['a']], necdf[['Dx']], limit, option_spline) }