diff --git a/NAMESPACE b/NAMESPACE index d4c9a14..c2d8638 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(agg_lt) +export(agg_tf) export(ccmpp) export(check_mx_ax_qx) export(gen_Tx) diff --git a/R/00_globals.R b/R/00_globals.R index 23b5c9f..f326cef 100644 --- a/R/00_globals.R +++ b/R/00_globals.R @@ -20,5 +20,5 @@ utils::globalVariables(c("age_int", "age_group_years_start", "age_group_years_en "id_cols_no_age", "sex", "mx_inf", "has_1m0", "new_ax", "max_ax_diff", "year_start", "year_end", "initial_ax", - "prop_female", "srb", "asfr", + "prop_female", "srb", "asfr", "tf", "qx_inf")) diff --git a/R/agg_tf.R b/R/agg_tf.R new file mode 100644 index 0000000..93cca26 --- /dev/null +++ b/R/agg_tf.R @@ -0,0 +1,114 @@ +#' @title Calculate total fertility aggregates +#' +#' @description Calculate total fertility for a specified age range using +#' age-specific fertility rates. +#' +#' @param dt \[`data.table()`\]\cr +#' ASFR input data. Must include all columns in `id_cols`, and a column for +#' 'asfr'. +#' @param age_mapping \[`data.table()`\]\cr +#' Specification of age interval to aggregate to. Required columns are +#' 'age_start' and 'age_end'. +#' @inheritParams hierarchyUtils::agg +#' +#' @return \[`data.table()`\]\cr Aggregated total fertility with columns for all +#' `id_cols` and a 'tf' column. Will only return the age groups specified in +#' `age_mapping`. +#' +#' @seealso [hierarchyUtils::agg()] +#' +#' @inheritSection hierarchyUtils::agg Severity Arguments +#' +#' @details +#' Calculate total fertility aggregate for ages within a specific age range. +#' TFR (total fertility rate) is measured over the entire reproductive age span, +#' typically defined as between age 15 and 49 (or 10 and 54). `agg_tf` also +#' allows calculation of total fertility for other age spans like total +#' fertility under 25 and total fertility over 30. +#' +#' Total fertility is calculated as the sum of ASFR multiplied by the number of +#' years in an age group. This number represents the average number of children +#' born to a woman if (1) she experiences a specific set of age specific +#' fertility rates and (2) she survives through the end of the age span. +#' Preston pg 95. +#' +#' This is different from an age-specific fertility rate (ASFR) or a crude birth +#' rate (CBR), both of which are calculated as births/population for a +#' particular age group or all reproductive ages, respectively. +#' +#' @references +#' Preston, Samuel, Patrick Heuveline, and Michel Guillot. 2001. Demography: +#' Measuring and Modeling Population. Wiley. +#' +#' @examples +#' # calculate total fertility under 25 (ages 10 to 24) +#' dt <- data.table::data.table( +#' age_start = c(10, 15, 20, 25, 30, 35, 40, 45), +#' age_end = c(15, 20, 25, 30, 35, 40, 45, 50), +#' asfr = c(0.00005, 0.02, 0.07, 0.08, 0.05, 0.02, 0.004, 0.0002) +#' ) +#' +#' dt <- agg_tf( +#' dt = dt, +#' id_cols = c("age_start", "age_end"), +#' age_mapping = data.table::data.table(age_start = 10, age_end = 25) +#' ) +#' +#' @export +agg_tf <- function(dt, + id_cols, + age_mapping, + missing_dt_severity = "stop", + overlapping_dt_severity = "stop", + present_agg_severity = "stop", + na_value_severity = "stop", + quiet = FALSE) { + + # Validate arguments (before `hierarchyUtils::agg`) ----------------------- + + # basic checks for 'id_cols` argument + assertthat::assert_that( + assertive::is_character(id_cols), + all(c("age_start", "age_end") %in% id_cols), + msg = c("`id_cols` must be a character vector that includes 'age_start', + 'age_end', & 'asfr'") + ) + + # basic checks for `dt` argument + assertive::assert_is_data.table(dt) + assertable::assert_colnames( + data = dt, colnames = c(id_cols, "asfr"), + only_colnames = F, quiet = T + ) + + # prep ----------------------------------------------------------------------- + + original_col_order <- copy(names(dt)) + original_keys <- copy(key(dt)) + + dt <- copy(dt) + hierarchyUtils::gen_length(dt, col_stem = 'age') + + # calculate ------------------------------------------------------------------ + + dt <- dt[, tf := asfr * age_length] + + dt <- hierarchyUtils::agg( + dt[, .SD, .SDcols = c(id_cols, 'tf')], + id_cols = id_cols, + value_cols = 'tf', + col_stem = 'age', + col_type = 'interval', + mapping = age_mapping, + agg_function = sum, + missing_dt_severity = missing_dt_severity, + overlapping_dt_severity = overlapping_dt_severity, + present_agg_severity = present_agg_severity, + na_value_severity = na_value_severity, + quiet = quiet + ) + + data.table::setcolorder(dt, c(setdiff(original_col_order, "asfr"), "tf")) + data.table::setkeyv(dt, original_keys) + return(dt) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 44db0a4..2663749 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -38,6 +38,11 @@ reference: - gen_nLx_from_nSx - gen_lx_from_nLx_ax +- title: "Fertility metrics" + desc: "Functions for calculating fertility related metrics." +- contents: + - agg_tf + - title: "Other" desc: "Additional miscellaneous functions." - contents: diff --git a/man/agg_tf.Rd b/man/agg_tf.Rd new file mode 100644 index 0000000..d10cadc --- /dev/null +++ b/man/agg_tf.Rd @@ -0,0 +1,169 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/agg_tf.R +\name{agg_tf} +\alias{agg_tf} +\title{Calculate total fertility aggregates} +\usage{ +agg_tf( + dt, + id_cols, + age_mapping, + missing_dt_severity = "stop", + overlapping_dt_severity = "stop", + present_agg_severity = "stop", + na_value_severity = "stop", + quiet = FALSE +) +} +\arguments{ +\item{dt}{[\code{data.table()}]\cr +ASFR input data. Must include all columns in \code{id_cols}, and a column for +'asfr'.} + +\item{id_cols}{[\code{character()}]\cr +ID columns that uniquely identify each row of \code{dt}.} + +\item{age_mapping}{[\code{data.table()}]\cr +Specification of age interval to aggregate to. Required columns are +'age_start' and 'age_end'.} + +\item{missing_dt_severity}{[\code{character(1)}]\cr +What should happen when \code{dt} is missing levels of \code{col_stem} that +prevent aggregation or scaling from occurring? Can be either 'skip', +'stop', 'warning', 'message', or 'none'. Default is 'stop'. See section on +'Severity Arguments' for more information.} + +\item{overlapping_dt_severity}{[\code{character(1)}]\cr +When aggregating/scaling an interval variable or \code{collapse_interval_cols=TRUE} +what should happen when overlapping intervals are identified? Can be either +'skip', 'stop', 'warning', 'message', or 'none'. Default is 'stop'. See +section on 'Severity Arguments' for more information.} + +\item{present_agg_severity}{[\code{logical(1)}]\cr +What should happen when \code{dt} already has requested aggregates (from +\code{mapping})? Can be either 'skip', 'stop', 'warning', 'message', +or 'none'. Default is 'stop'. See section on 'Severity Arguments' for more +information.} + +\item{na_value_severity}{[\code{character(1)}]\cr +What should happen when 'NA' values are present in \code{value_cols}? Can be +either 'skip', 'stop', 'warning', 'message', or 'none'. Default is 'stop'. +See section on 'Severity Arguments' for more information.} + +\item{quiet}{[\code{logical(1)}]\cr +Should progress messages be suppressed as the function is run? Default is +False.} +} +\value{ +[\code{data.table()}]\cr Aggregated total fertility with columns for all +\code{id_cols} and a 'tf' column. Will only return the age groups specified in +\code{age_mapping}. +} +\description{ +Calculate total fertility for a specified age range using +age-specific fertility rates. +} +\details{ +Calculate total fertility aggregate for ages within a specific age range. +TFR (total fertility rate) is measured over the entire reproductive age span, +typically defined as between age 15 and 49 (or 10 and 54). \code{agg_tf} also +allows calculation of total fertility for other age spans like total +fertility under 25 and total fertility over 30. + +Total fertility is calculated as the sum of ASFR multiplied by the number of +years in an age group. This number represents the average number of children +born to a woman if (1) she experiences a specific set of age specific +fertility rates and (2) she survives through the end of the age span. +Preston pg 95. + +This is different from an age-specific fertility rate (ASFR) or a crude birth +rate (CBR), both of which are calculated as births/population for a +particular age group or all reproductive ages, respectively. +} +\section{Severity Arguments}{ + + +\strong{\code{missing_dt_severity}}: + +Check for missing levels of \code{col_stem}, the variable being aggregated or +scaled over. +\enumerate{ +\item \code{stop}: throw error (this is the default). +\item \code{warning} or \code{message}: throw warning/message and continue with +aggregation/scaling for requested aggregations/scalings where expected input +data in \code{dt} is available. +\item \code{none}: don't throw error or warning, continue with aggregation/scaling +for requested aggregations/scalings where expected input data in \code{dt} is +available. +\item \code{skip}: skip this check and continue with aggregation/scaling. +} + +\strong{\code{present_agg_severity}} (\code{agg} only): + +Check for requested aggregates in \code{mapping} that are already present +\enumerate{ +\item \code{stop}: throw error (this is the default). +\item \code{warning} or \code{message}: throw warning/message, drop aggregates and continue +with aggregation. +\item \code{none}: don't throw error or warning, drop aggregates and continue with +aggregation. +\item \code{skip}: skip this check and add to the values already present for the +aggregates. +} + +\strong{\code{na_value_severity}}: + +Check for 'NA' values in the \code{value_cols}. +\enumerate{ +\item \code{stop}: throw error (this is the default). +\item \code{warning} or \code{message}: throw warning/message, drop missing values and +continue with aggregation/scaling where possible (this likely will cause +another error because of \code{missing_dt_severity}, consider setting +\code{missing_dt_severity = "skip"} for functionality similiar to \code{na.rm = TRUE}). +\item \code{none}: don't throw error or warning, drop missing values and continue +with aggregation/scaling where possible (this likely will cause another error +because of \code{missing_dt_severity}, consider setting +\code{missing_dt_severity = "skip"} for functionality similiar to \code{na.rm = TRUE}). +\item \code{skip}: skip this check and propagate \code{NA} values through +aggregation/scaling. +} + +\strong{\code{overlapping_dt_severity}}: +Check for overlapping intervals that prevent collapsing to the most detailed +common set of intervals. Or check for overlapping intervals in \code{col_stem} +when aggregating/scaling. +\enumerate{ +\item \code{stop}: throw error (this is the default). +\item \code{warning} or \code{message}: throw warning/message, drop overlapping intervals +and continue with aggregation/scaling where possible (this may cause another +error because of \code{missing_dt_severity}). +3 \code{none}: don't throw error or warning, drop overlapping intervals and +continue with aggregation/scaling where possible (this may cause another +error because of \code{missing_dt_severity}). +\item \code{skip}: skip this check and continue with aggregation/scaling. +} + +} + +\examples{ +# calculate total fertility under 25 (ages 10 to 24) +dt <- data.table::data.table( + age_start = c(10, 15, 20, 25, 30, 35, 40, 45), + age_end = c(15, 20, 25, 30, 35, 40, 45, 50), + asfr = c(0.00005, 0.02, 0.07, 0.08, 0.05, 0.02, 0.004, 0.0002) +) + +dt <- agg_tf( + dt = dt, + id_cols = c("age_start", "age_end"), + age_mapping = data.table::data.table(age_start = 10, age_end = 25) +) + +} +\references{ +Preston, Samuel, Patrick Heuveline, and Michel Guillot. 2001. Demography: +Measuring and Modeling Population. Wiley. +} +\seealso{ +\code{\link[hierarchyUtils:agg_scale]{hierarchyUtils::agg()}} +} diff --git a/tests/testthat/test_agg_tfr.R b/tests/testthat/test_agg_tfr.R new file mode 100644 index 0000000..0c98207 --- /dev/null +++ b/tests/testthat/test_agg_tfr.R @@ -0,0 +1,37 @@ +library(data.table) + +# test dataset from Preston pg 96 Box 5.1 +dt <- data.table::data.table( + location = "USA", year = 1992, + age_start = seq(10, 45, 5), age_end = seq(15, 50, 5), + asfr = c(0.0014, 0.0607, 0.1146, 0.1174, 0.0802, 0.0325, 0.0059, 0.0003) +) + +expected <- data.table::data.table( + location = "USA", year = 1992, + age_start = 10, age_end = 50, + tf = c(2.064) +) + +id_cols <- c("location", "year", "age_start", "age_end") +mapping <- data.table(age_start = 10, age_end = 50) + +test_that("test that `agg_tf` gives expected output", { + output_dt <- agg_tf( + dt = dt, + id_cols = id_cols, + age_mapping = mapping + ) + testthat::expect_equivalent(output_dt, expected, tolerance = 1e16) +}) + +test_that("test that `agg_tf` gives expected errors", { + testthat::expect_error( + agg_tf( + dt = dt, + id_cols = id_cols, + age_mapping = data.table(age_start = 10, age_end = 18) + ), + regexp = "expected input data is missing." + ) +})