diff --git a/DESCRIPTION b/DESCRIPTION index 95671c2..9699fbc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,6 +15,10 @@ Authors@R: c( "Elin", "Rowlands", email = "elin.rowlands@ndorms.ox.ac.uk", role = c("aut"), comment = c(ORCID = "0009-0005-5166-0417") ), + person( + "Cecilia", "Campanile", email = "cecilia.campanile@ndorms.ox.ac.uk", + role = c("aut"), comment = c(ORCID = "0009-0007-6629-4661") + ), person( "Edward", "Burn", email = "edward.burn@ndorms.ox.ac.uk", role = c("aut"), comment = c(ORCID = "0000-0002-9286-1128") @@ -71,4 +75,4 @@ URL: https://OHDSI.github.io/OmopSketch/ BugReports: https://github.com/OHDSI/OmopSketch/issues VignetteBuilder: knitr Remotes: - darwin-eu-dev/omopgenerics@mah_vl + darwin-eu-dev/omopgenerics diff --git a/R/plotConceptCounts.R b/R/plotConceptCounts.R index f92326c..ae0b2b0 100644 --- a/R/plotConceptCounts.R +++ b/R/plotConceptCounts.R @@ -23,7 +23,7 @@ #' #' result |> #' filter(variable_name == "Number subjects") |> -#' plotConceptCounts(facet = "codelist_name", colour = "codelist_name") +#' plotConceptCounts(facet = "codelist_name", colour = "standard_concept_name") #' #' PatientProfiles::mockDisconnect(cdm) #' } @@ -35,9 +35,7 @@ plotConceptCounts <- function(result, # subset to results of interest result <- result |> - visOmopResults::filterSettings(.data$result_type == "summarise_concept_counts") |> - dplyr::mutate(variable_level = gsub(" to.*","",.data$variable_level)) |> - dplyr::mutate(variable_level = gsub("-01$","",.data$variable_level)) + visOmopResults::filterSettings(.data$result_type == "summarise_concept_counts") if (nrow(result) == 0) { cli::cli_abort(c("!" = "No records found with result_type == summarise_concept_counts")) @@ -52,12 +50,14 @@ plotConceptCounts <- function(result, )) } + result1 <- result |> visOmopResults::splitAdditional() # Detect if there are several time intervals - if(length(unique(result$variable_level)) > 1 ){ + if("time_interval" %in% colnames(result1)){ # Line plot where each concept is a different line - p <- result |> - dplyr::filter(.data$variable_level != "overall") |> - visOmopResults::scatterPlot(x = "variable_level", + p <- result1 |> + dplyr::filter(.data$time_interval != "overall") |> + visOmopResults::uniteAdditional(cols = c("time_interval", "standard_concept_name", "standard_concept_id", "source_concept_name", "source_concept_id", "domain_id")) |> + visOmopResults::scatterPlot(x = "time_interval", y = "count", line = TRUE, point = TRUE, @@ -66,11 +66,20 @@ plotConceptCounts <- function(result, facet = facet, colour = colour) }else{ - p <- result |> - visOmopResults::barPlot(x = "standard_concept_name", - y = "count", - facet = facet, - colour = colour) + + if("standard_concept_name" %in% colnames(result1)){ + p <- result |> + visOmopResults::barPlot(x = "standard_concept_name", + y = "count", + facet = facet, + colour = colour) + }else{ + p <- result |> + visOmopResults::barPlot(x = "codelist_name", + y = "count", + facet = facet, + colour = colour) + } + p <- p + ggplot2::labs( x = "Concept name" ) diff --git a/R/plotInObservation.R b/R/plotInObservation.R index 9270555..7910f20 100644 --- a/R/plotInObservation.R +++ b/R/plotInObservation.R @@ -52,15 +52,15 @@ plotInObservation <- function(result, } # warn - warnFacetColour(result, list(facet = asCharacterFacet(facet), colour = colour, "variable_level")) + warnFacetColour(result, list(facet = asCharacterFacet(facet), colour = colour, "additional_level")) # plot - if(length(unique(result$variable_level)) > 1 ){ + if(length(unique(result$additional_level)) > 1 ){ result |> - dplyr::mutate(variable_level = as.Date(gsub(" to.*","",.data$variable_level))) |> + dplyr::mutate(additional_level = as.character(gsub("-01$","",as.Date(gsub(" to.*","",.data$additional_level))))) |> dplyr::filter(.data$estimate_name == "count") |> visOmopResults::scatterPlot( - x = "variable_level", + x = "time_interval", y = "count", line = TRUE, point = TRUE, diff --git a/R/plotRecordCount.R b/R/plotRecordCount.R index a42e1dd..221cb4b 100644 --- a/R/plotRecordCount.R +++ b/R/plotRecordCount.R @@ -38,12 +38,12 @@ plotRecordCount <- function(result, } # Detect if there are several time intervals - if(length(unique(result$variable_level)) > 1 ){ + if(length(unique(result$additional_level)) > 1 ){ # Line plot where each concept is a different line p <- result |> - dplyr::filter(.data$variable_level != "overall") |> + dplyr::filter(.data$additional_level != "overall") |> dplyr::filter(.data$estimate_name == "count") |> - visOmopResults::scatterPlot(x = "variable_level", + visOmopResults::scatterPlot(x = "time_interval", y = "count", line = TRUE, point = TRUE, @@ -57,13 +57,13 @@ plotRecordCount <- function(result, ) }else{ p <- result |> - visOmopResults::barPlot(x = "variable_level", + visOmopResults::barPlot(x = "variable_name", y = "count", facet = facet, colour = colour) + ggplot2::labs( y = "Count", - x = "Number records" + x = "" ) } p diff --git a/R/summariseConceptCounts.R b/R/summariseConceptCounts.R index 20a1cee..e6ea43a 100644 --- a/R/summariseConceptCounts.R +++ b/R/summariseConceptCounts.R @@ -186,7 +186,6 @@ getCodeUse <- function(x, } cc <- records |> - # dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 PatientProfiles::summariseResult(strata = strata, variable = "standard_concept_name", group = group, @@ -211,7 +210,10 @@ getCodeUse <- function(x, if(interval != "overall"){ cc <- cc |> visOmopResults::splitStrata() |> - dplyr::mutate(variable_level = .data$interval_group) |> + dplyr::mutate("additional_level" = dplyr::if_else(.data$interval_group == "overall", .data$additional_level, paste0(.data$interval_group, " &&& ", .data$additional_level))) |> + dplyr::mutate("additional_name" = dplyr::if_else(.data$interval_group == "overall", .data$additional_name, paste0("time_interval &&& ", .data$additional_name))) |> + dplyr::mutate("additional_level" = gsub(" &&& overall$", "", .data$additional_level)) |> + dplyr::mutate("additional_name" = gsub(" &&& overall$", "", .data$additional_name)) |> visOmopResults::uniteStrata(unique(unlist(strata))[unique(unlist(strata)) != "interval_group"]) |> dplyr::select(-"interval_group") } diff --git a/R/summariseInObservation.R b/R/summariseInObservation.R index 5f1ea6c..bc0e2a5 100644 --- a/R/summariseInObservation.R +++ b/R/summariseInObservation.R @@ -52,6 +52,7 @@ summariseInObservation <- function(observationPeriod, checkOutput(output) ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] omopgenerics::assertLogical(sex, length = 1) + original_interval <- interval x <- validateIntervals(interval) interval <- x$interval unitInterval <- x$unitInterval @@ -88,7 +89,7 @@ summariseInObservation <- function(observationPeriod, result <- addSexOverall(result, sex) # Create summarisedResult - result <- createSummarisedResultObservationPeriod(result, observationPeriod, name, denominator, interval, unitInterval) + result <- createSummarisedResultObservationPeriod(result, observationPeriod, name, denominator, original_interval) CDMConnector::dropTable(cdm, name = dplyr::starts_with(tablePrefix)) return(result) @@ -172,7 +173,7 @@ countRecords <- function(observationPeriod, cdm, start_date_name, end_date_name, if(output == "person-days" | output == "all"){ if(interval != "overall"){ x <- cdm[[paste0(tablePrefix, "interval")]] |> - dplyr::rename("variable_level" = "interval_group") |> + dplyr::rename("additional_level" = "interval_group") |> dplyr::cross_join( observationPeriod |> dplyr::select("start_date" = "observation_period_start_date", @@ -188,14 +189,16 @@ countRecords <- function(observationPeriod, cdm, start_date_name, end_date_name, x <- observationPeriod |> dplyr::rename("start_date" = "observation_period_start_date", "end_date" = "observation_period_end_date") |> - dplyr::mutate("variable_level" = NA) + dplyr::mutate("additional_level" = "overall", + "additional_name" = "overall") } personDays <- x %>% dplyr::mutate(estimate_value = !!CDMConnector::datediff("start_date","end_date", interval = "day")+1) |> - dplyr::group_by(dplyr::across(dplyr::any_of(c("variable_level", "sex", "age_group")))) |> + dplyr::group_by(dplyr::across(dplyr::any_of(c("additional_level", "sex", "age_group")))) |> dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> - dplyr::mutate(variable_name = "Number person-days") |> + dplyr::mutate("variable_name" = "Number person-days", + "additional_name" = "time_interval") |> dplyr::collect() }else{ personDays <- createEmptyIntervalTable(interval) @@ -212,20 +215,22 @@ if(output == "records" | output == "all"){ dplyr::compute(temporary = FALSE, name = tablePrefix) records <- cdm[[paste0(tablePrefix, "interval")]] |> - dplyr::rename("variable_level" = "interval_group") |> + dplyr::rename("additional_level" = "interval_group") |> dplyr::cross_join(x) |> dplyr::filter((.data$start_date < .data$interval_start_date & .data$end_date >= .data$interval_start_date) | (.data$start_date >= .data$interval_start_date & .data$start_date <= .data$interval_end_date)) |> - dplyr::group_by(.data$variable_level, .data$age_group, .data$sex) |> + dplyr::group_by(.data$additional_level, .data$age_group, .data$sex) |> dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> - dplyr::mutate(variable_name = "Number records in observation") |> + dplyr::mutate("variable_name" = "Number records in observation", + "additional_name" = "time_interval") |> dplyr::collect() }else{ records <- observationPeriod |> dplyr::group_by(.data$age_group, .data$sex) |> dplyr::summarise(estimate_value = dplyr::n(), .groups = "drop") |> dplyr::mutate("variable_name" = "Number records in observation", - "variable_level" = NA) |> + "additional_level" = "overall", + "additional_name" = "overall") |> dplyr::collect() } }else{ @@ -234,12 +239,12 @@ if(output == "records" | output == "all"){ x <- personDays |> rbind(records) |> - dplyr::arrange(dplyr::across(dplyr::any_of("variable_level"))) + dplyr::arrange(dplyr::across(dplyr::any_of("additional_level"))) return(x) } -createSummarisedResultObservationPeriod <- function(result, observationPeriod, name, denominator, interval, unitInterval){ +createSummarisedResultObservationPeriod <- function(result, observationPeriod, name, denominator, original_interval){ result <- result |> dplyr::mutate("estimate_value" = as.character(.data$estimate_value)) |> visOmopResults::uniteStrata(cols = c("sex", "age_group")) |> @@ -248,15 +253,14 @@ createSummarisedResultObservationPeriod <- function(result, observationPeriod, n "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(observationPeriod)), "group_name" = "omop_table", "group_level" = name, + "variable_level" = as.character(NA), "estimate_name" = "count", - "estimate_type" = "integer", - "additional_name" = "overall", - "additional_level" = "overall" + "estimate_type" = "integer" ) result <- result |> rbind(result) |> - dplyr::group_by(.data$variable_level, .data$strata_level, .data$variable_name) |> + dplyr::group_by(.data$additional_level, .data$strata_level, .data$variable_name) |> dplyr::mutate(estimate_type = dplyr::if_else(dplyr::row_number() == 2, "percentage", .data$estimate_type)) |> dplyr::inner_join(denominator, by = "variable_name") |> dplyr::mutate(estimate_value = dplyr::if_else(.data$estimate_type == "percentage", as.character(as.numeric(.data$estimate_value)/denominator*100), .data$estimate_value)) |> @@ -267,8 +271,7 @@ createSummarisedResultObservationPeriod <- function(result, observationPeriod, n "result_type" = "summarise_in_observation", "package_name" = "OmopSketch", "package_version" = as.character(utils::packageVersion("OmopSketch")), - "interval" = .env$interval, - "unitInterval" = .env$unitInterval + "interval" = .env$original_interval )) return(result) @@ -332,9 +335,10 @@ addSexOverall <- function(result, sex){ if(sex){ result <- result |> rbind( result |> - dplyr::group_by(.data$age_group, .data$variable_level, .data$variable_name) |> + dplyr::group_by(.data$age_group, .data$additional_level, .data$variable_name) |> dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> - dplyr::mutate(sex = "overall") + dplyr::mutate(sex = "overall", + additional_name = dplyr::if_else(.data$additional_level == "overall", "overall", "time_interval")) ) } return(result) diff --git a/R/summariseRecordCount.R b/R/summariseRecordCount.R index 2c19171..c5cb0f2 100644 --- a/R/summariseRecordCount.R +++ b/R/summariseRecordCount.R @@ -37,6 +37,7 @@ summariseRecordCount <- function(cdm, # Initial checks ---- omopgenerics::validateCdmArgument(cdm) omopgenerics::assertCharacter(omopTableName) + original_interval <- interval x <- validateIntervals(interval) interval <- x$interval unitInterval <- x$unitInterval @@ -54,6 +55,7 @@ summariseRecordCount <- function(cdm, cdm = cdm, interval = interval, unitInterval = unitInterval, + original_interval, ageGroup = ageGroup, sex = sex) } @@ -65,7 +67,7 @@ summariseRecordCount <- function(cdm, #' @noRd summariseRecordCountInternal <- function(omopTableName, cdm, interval, unitInterval, - ageGroup, sex) { + original_interval, ageGroup, sex) { prefix <- omopgenerics::tmpPrefix() omopTable <- cdm[[omopTableName]] |> dplyr::ungroup() @@ -107,7 +109,7 @@ summariseRecordCountInternal <- function(omopTableName, cdm, interval, unitInter } # Create summarised result ---- - result <- createSummarisedResultRecordCount(result, strata, omopTable, omopTableName, interval, unitInterval) + result <- createSummarisedResultRecordCount(result, strata, omopTable, omopTableName, original_interval) omopgenerics::dropTable(cdm = cdm, name = dplyr::starts_with(prefix)) return(result) @@ -230,12 +232,11 @@ splitIncidenceBetweenIntervals <- function(cdm, omopTable, date, prefix){ dplyr::select(-c("interval_start_date", "interval_end_date", "incidence_date")) } -createSummarisedResultRecordCount <- function(result, strata, omopTable, omopTableName, interval, unitInterval){ +createSummarisedResultRecordCount <- function(result, strata, omopTable, omopTableName, original_interval){ result <- result |> dplyr::mutate(n = 1) |> dplyr::select(-"person_id") |> - # dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 PatientProfiles::summariseResult( variables = "n", strata = strata, @@ -246,18 +247,15 @@ createSummarisedResultRecordCount <- function(result, strata, omopTable, omopTab suppressMessages() |> dplyr::mutate("variable_name" = stringr::str_to_sentence(.data$variable_name)) |> dplyr::mutate( - "result_id" = as.integer(1), - "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(omopTable)), "group_name" = "omop_table", - "group_level" = omopTableName, - "additional_name" = "overall", - "additional_level" = "overall" + "group_level" = omopTableName ) - if(interval != "overall"){ + if(original_interval != "overall"){ result <- result |> visOmopResults::splitStrata() |> - dplyr::mutate(variable_level = .data$interval_group) |> + dplyr::mutate(additional_level = .data$interval_group) |> + dplyr::mutate(additional_name = dplyr::if_else(.data$additional_level == "overall", "overall", "time_interval")) |> visOmopResults::uniteStrata(unique(unlist(strata))[unique(unlist(strata)) != "interval_group"]) |> dplyr::select(-"interval_group") } @@ -268,7 +266,8 @@ createSummarisedResultRecordCount <- function(result, strata, omopTable, omopTab "result_id" = 1L, "result_type" = "summarise_record_count", "package_name" = "OmopSketch", - "package_version" = as.character(utils::packageVersion("OmopSketch")) + "package_version" = as.character(utils::packageVersion("OmopSketch")), + "interval" = .env$original_interval ) ) } diff --git a/R/utilities.R b/R/utilities.R index aa23797..007e858 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1,18 +1,23 @@ startDate <- function(name) { tables$start_date[tables$table_name == name] } + endDate <- function(name) { tables$end_date[tables$table_name == name] } + standardConcept <- function(name) { tables$standard_concept[tables$table_name == name] } + sourceConcept <- function(name) { tables$source_concept[tables$table_name == name] } + typeConcept <- function(name) { tables$type_concept[tables$table_name == name] } + tableId <- function(name) { tables$id[tables$table_name == name] } @@ -21,7 +26,7 @@ warnFacetColour <- function(result, cols) { colsToWarn <- result |> dplyr::select( "cdm_name", "group_name", "group_level", "strata_name", "strata_level", - "variable_name", "variable_level", "additional_name", "additional_level" + "variable_name", "variable_level" ) |> dplyr::distinct() |> visOmopResults::splitAll() |> @@ -36,12 +41,14 @@ warnFacetColour <- function(result, cols) { } invisible(NULL) } + collapseStr <- function(x, sep) { x <- x[x != ""] if (length(x) == 1) return(x) len <- length(x) paste0(paste0(x[-len], collapse = ", "), " ", sep, " ", x[len]) } + asCharacterFacet <- function(facet) { if (rlang::is_formula(facet)) { facet <- as.character(facet) diff --git a/man/OmopSketch-package.Rd b/man/OmopSketch-package.Rd index 0bc315f..459a2b9 100644 --- a/man/OmopSketch-package.Rd +++ b/man/OmopSketch-package.Rd @@ -25,6 +25,7 @@ Authors: \itemize{ \item Kim Lopez-Guell \email{kim.lopez@spc.ox.ac.uk} (\href{https://orcid.org/0000-0002-8462-8668}{ORCID}) \item Elin Rowlands \email{elin.rowlands@ndorms.ox.ac.uk} (\href{https://orcid.org/0009-0005-5166-0417}{ORCID}) + \item Cecilia Campanile \email{cecilia.campanile@ndorms.ox.ac.uk} \item Edward Burn \email{edward.burn@ndorms.ox.ac.uk} (\href{https://orcid.org/0000-0002-9286-1128}{ORCID}) \item MartĂ­ CatalĂ  \email{marti.catalasabate@ndorms.ox.ac.uk} (\href{https://orcid.org/0000-0003-3308-9905}{ORCID}) } diff --git a/man/plotConceptCounts.Rd b/man/plotConceptCounts.Rd index 63492a7..2492a3d 100644 --- a/man/plotConceptCounts.Rd +++ b/man/plotConceptCounts.Rd @@ -37,7 +37,7 @@ result <- cdm |> result |> filter(variable_name == "Number subjects") |> - plotConceptCounts(facet = "codelist_name", colour = "codelist_name") + plotConceptCounts(facet = "codelist_name", colour = "standard_concept_name") PatientProfiles::mockDisconnect(cdm) } diff --git a/tests/testthat/test-summariseConceptCounts.R b/tests/testthat/test-summariseConceptCounts.R index 7752849..6e63e46 100644 --- a/tests/testthat/test-summariseConceptCounts.R +++ b/tests/testthat/test-summariseConceptCounts.R @@ -10,6 +10,8 @@ test_that("summarise code use - eunomia", { results <- summariseConceptCounts(cdm = cdm, conceptId = cs, interval = "years", + countBy = c("record", "person"), + concept = TRUE, sex = TRUE, ageGroup = list(c(0,17), c(18,65), @@ -23,12 +25,15 @@ test_that("summarise code use - eunomia", { # min cell counts: expect_equal( omopgenerics::suppress(results, 5) |> + visOmopResults::splitAdditional() |> dplyr::filter( - variable_name == "overall", - strata_level == "1909-01-01 to 1909-12-31", + strata_level == "overall", + variable_name == "Number records", + standard_concept_id == "overall", + time_interval == "1909-01-01 to 1909-12-31", group_level == "acetiminophen") |> dplyr::pull("estimate_value"), - as.character() + as.character(NA) ) # check is a summarised result @@ -38,13 +43,12 @@ test_that("summarise code use - eunomia", { # overall record count expect_true(results %>% - dplyr::filter(group_name == "codelist_name" & - strata_name == "overall" & - strata_level == "overall" & - variable_level == "overall" & - group_level == "acetiminophen" & - variable_name == "Number records", - additional_name == "overall") %>% + dplyr::filter(group_name == "codelist_name", + strata_name == "overall", + strata_level == "overall", + additional_level == "overall", + group_level == "acetiminophen", + variable_name == "Number records") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% @@ -58,7 +62,6 @@ test_that("summarise code use - eunomia", { dplyr::filter(group_name == "codelist_name" & strata_name == "overall" & strata_level == "overall" & - variable_level == "overall" & group_level == "acetiminophen" & variable_name == "Number subjects", additional_name == "overall") %>% @@ -74,12 +77,13 @@ test_that("summarise code use - eunomia", { # by year # overall record count expect_true(results %>% + visOmopResults::splitAdditional() |> dplyr::filter(group_name == "codelist_name" & strata_name == "overall" & - variable_level == "2008-01-01 to 2008-12-31" & + time_interval == "2008-01-01 to 2008-12-31" & group_level == "acetiminophen" & variable_name == "Number records", - additional_name == "overall") %>% + standard_concept_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% @@ -90,12 +94,13 @@ test_that("summarise code use - eunomia", { # overall person count expect_true(results %>% + visOmopResults::splitAdditional() |> dplyr::filter(group_name == "codelist_name" & strata_name == "overall" & - variable_level == "2008-01-01 to 2008-12-31" & + time_interval == "2008-01-01 to 2008-12-31" & group_level == "acetiminophen" & variable_name == "Number subjects", - additional_name == "overall") %>% + standard_concept_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% @@ -112,7 +117,6 @@ test_that("summarise code use - eunomia", { dplyr::filter(group_name == "codelist_name" & strata_name == "sex" & strata_level == "Male" & - variable_level == "overall" & group_level == "acetiminophen" & variable_name == "Number records" & additional_name == "overall") %>% @@ -129,7 +133,6 @@ test_that("summarise code use - eunomia", { dplyr::filter(group_name == "codelist_name" & strata_name == "age_group &&& sex" & strata_level == "18 to 65 &&& Male" & - variable_level == "overall" & group_level == "acetiminophen" & variable_name == "Number records", additional_name == "overall") %>% @@ -150,7 +153,6 @@ test_that("summarise code use - eunomia", { dplyr::filter(group_name == "codelist_name" & strata_name == "age_group &&& sex" & strata_level == "18 to 65 &&& Male" & - variable_level == "overall" & group_level == "acetiminophen" & variable_name == "Number subjects", additional_name == "overall") %>% @@ -177,33 +179,35 @@ test_that("summarise code use - eunomia", { c(18,65), c(66, 100))) - expect_true(results1$additional_level |> unique() |> length() == 1) expect_equal( results1 |> + visOmopResults::splitAdditional() |> dplyr::filter(variable_name == "Number records") |> dplyr::arrange(dplyr::across(dplyr::everything())), results |> - dplyr::filter(variable_name == "Number records", additional_name == "overall") |> + visOmopResults::splitAdditional() |> + dplyr::filter(variable_name == "Number records", standard_concept_name == "overall") |> + dplyr::select(-c(starts_with("standard_"), starts_with("source_"), "domain_id")) |> dplyr::arrange(dplyr::across(dplyr::everything())) ) expect_true(results1 |> - dplyr::filter(variable_name == "Number subjects", - group_level == "acetiminophen", - variable_level == "1909-01-01 to 1909-12-31", - strata_level == "0 to 17") |> - dplyr::pull("estimate_value") |> - as.numeric() == - cdm$drug_exposure %>% - dplyr::filter(drug_concept_id %in% acetiminophen) %>% - PatientProfiles::addAge(indexDate = "drug_exposure_start_date") %>% - PatientProfiles::addSex() %>% - dplyr::filter(age >= "0", age <= "17", clock::get_year(drug_exposure_start_date) == 1909) |> - dplyr::select("person_id") %>% - dplyr::distinct() %>% - dplyr::tally() %>% - dplyr::pull("n")) + visOmopResults::splitAdditional() |> + dplyr::filter(variable_name == "Number subjects", + group_level == "acetiminophen", + time_interval == "1909-01-01 to 1909-12-31", + strata_level == "0 to 17") |> + dplyr::pull("estimate_value") |> + as.numeric() == + cdm$drug_exposure %>% + dplyr::filter(drug_concept_id %in% acetiminophen) %>% + PatientProfiles::addAge(indexDate = "drug_exposure_start_date") %>% + PatientProfiles::addSex() %>% + dplyr::filter(age >= "0", age <= "17", clock::get_year(drug_exposure_start_date) == 1909) |> + dplyr::select("person_id") %>% + dplyr::distinct() %>% + dplyr::tally() %>% + dplyr::pull("n")) expect_true(results1$group_level |> unique() |> length() == 2) - expect_true(results1$additional_name |> unique() |> length() == 1) results <- summariseConceptCounts(list("acetiminophen" = acetiminophen), cdm = cdm, countBy = "person", diff --git a/tests/testthat/test-summariseInObservation.R b/tests/testthat/test-summariseInObservation.R index 6ffeb97..d2c7d10 100644 --- a/tests/testthat/test-summariseInObservation.R +++ b/tests/testthat/test-summariseInObservation.R @@ -12,7 +12,7 @@ test_that("check summariseInObservation works", { # Check inputs ---- x <- summariseInObservation(cdm$observation_period, interval = "years") |> - dplyr::filter(variable_level == "1909-01-01 to 1909-12-31", estimate_name == "count") |> + dplyr::filter(additional_level == "1909-01-01 to 1909-12-31", estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% @@ -24,8 +24,13 @@ test_that("check summariseInObservation works", { dplyr::pull("n") |> as.numeric() expect_equal(x,y) + x <- summariseInObservation(cdm$observation_period, interval = "years") + expect_equal(x |> dplyr::filter(additional_level != "overall") |> dplyr::pull("additional_name") |> unique(), "time_interval") + x <- summariseInObservation(cdm$observation_period, interval = "overall") + expect_equal(x |> dplyr::filter(additional_level == "overall") |> dplyr::pull("additional_name") |> unique(), "overall") + x <- summariseInObservation(cdm$observation_period, interval = "years") |> - dplyr::filter(variable_level == c("1936-01-01 to 1936-12-31"), estimate_name == "count") |> + dplyr::filter(additional_level == c("1936-01-01 to 1936-12-31"), estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% @@ -39,7 +44,7 @@ test_that("check summariseInObservation works", { expect_equal(x,y) x <- summariseInObservation(cdm$observation_period, interval = "years") |> - dplyr::filter(variable_level == c("1998-01-01 to 1998-12-31"), estimate_name == "count") |> + dplyr::filter(additional_level == c("1998-01-01 to 1998-12-31"), estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% @@ -54,7 +59,7 @@ test_that("check summariseInObservation works", { # Check inputs ---- x <- summariseInObservation(cdm$observation_period, interval = "months") |> - dplyr::filter(variable_level == "1942-03-01 to 1942-03-31", estimate_name == "count") |> + dplyr::filter(additional_level == "1942-03-01 to 1942-03-31", estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% @@ -67,7 +72,7 @@ test_that("check summariseInObservation works", { x <- summariseInObservation(cdm$observation_period, interval = "months") |> - dplyr::filter(variable_level == "2015-09-01 to 2015-09-30", estimate_name == "count") |> + dplyr::filter(additional_level == "2015-09-01 to 2015-09-30", estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% @@ -79,7 +84,7 @@ test_that("check summariseInObservation works", { expect_equal(x,y) x <- summariseInObservation(cdm$observation_period, interval = "months") |> - dplyr::filter(variable_level == "1982-03-01 to 1982-03-31", estimate_name == "count") |> + dplyr::filter(additional_level == "1982-03-01 to 1982-03-31", estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% @@ -98,11 +103,16 @@ test_that("check sex argument works", { cdm <- cdmEunomia() # Check overall + x <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) + expect_equal(x |> dplyr::filter(additional_level != "overall") |> dplyr::pull("additional_name") |> unique(), "time_interval") + x <- summariseInObservation(cdm$observation_period, interval = "overall") + expect_equal(x |> dplyr::filter(additional_level == "overall") |> dplyr::pull("additional_name") |> unique(), "overall") + x <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) |> - dplyr::filter(strata_level %in% c("Male","Female"), variable_level == "1908-01-01 to 1908-12-31", estimate_name == "count") |> + dplyr::filter(strata_level %in% c("Male","Female"), additional_level == "1908-01-01 to 1908-12-31", estimate_name == "count") |> dplyr::pull(estimate_value) |> as.numeric() |> sum() y <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) |> - dplyr::filter(strata_level %in% c("overall"), variable_level == "1908-01-01 to 1908-12-31", estimate_name == "count") |> + dplyr::filter(strata_level %in% c("overall"), additional_level == "1908-01-01 to 1908-12-31", estimate_name == "count") |> dplyr::pull(estimate_value) |> as.numeric() expect_equal(x,y) @@ -116,7 +126,7 @@ test_that("check sex argument works", { # Check a random group x <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) |> - dplyr::filter(strata_level == "Male", variable_level == "1915-01-01 to 1915-12-31", estimate_name == "count") |> + dplyr::filter(strata_level == "Male", additional_level == "1915-01-01 to 1915-12-31", estimate_name == "count") |> dplyr::pull(estimate_value) |> as.numeric() y <- cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% @@ -129,7 +139,7 @@ test_that("check sex argument works", { expect_equal(x,y) x <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) |> - dplyr::filter(strata_level == "Male", variable_level == "1915-01-01 to 1915-12-31", estimate_name == "percentage") |> + dplyr::filter(strata_level == "Male", additional_level == "1915-01-01 to 1915-12-31", estimate_name == "percentage") |> dplyr::pull(estimate_value) |> as.numeric() y <- (cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% @@ -152,7 +162,7 @@ test_that("check ageGroup argument works", { expect_no_error(summariseInObservation(cdm$observation_period, ageGroup = list(c(0,20), c(21, Inf)))) x <- summariseInObservation(cdm$observation_period, interval = "years", ageGroup = list("<=20" = c(0,20), ">20" = c(21,Inf))) |> - dplyr::filter(variable_level == "1928-01-01 to 1928-12-31", estimate_name == "count", strata_level == "<=20") |> + dplyr::filter(additional_level == "1928-01-01 to 1928-12-31", estimate_name == "count", strata_level == "<=20") |> dplyr::pull(estimate_value) |> as.numeric() y <- cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% @@ -167,7 +177,7 @@ test_that("check ageGroup argument works", { expect_equal(x,y) x <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) |> - dplyr::filter(strata_level == "Male", variable_level == "1918-01-01 to 1918-12-31", estimate_name == "percentage") |> + dplyr::filter(strata_level == "Male", additional_level == "1918-01-01 to 1918-12-31", estimate_name == "percentage") |> dplyr::pull(estimate_value) |> as.numeric() y <- (cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% @@ -189,7 +199,7 @@ test_that("check output argument works", { # check value x <- summariseInObservation(cdm$observation_period, interval = "years", output = c("records","person-days"), ageGroup = NULL, sex = FALSE) |> - dplyr::filter(variable_name == "Number person-days", variable_level == "1970-01-01 to 1970-12-31", estimate_type == "integer") |> + dplyr::filter(variable_name == "Number person-days", additional_level == "1970-01-01 to 1970-12-31", estimate_type == "integer") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% @@ -208,7 +218,7 @@ test_that("check output argument works", { dplyr::mutate(days = !!CDMConnector::datediff("observation_period_start_date","observation_period_end_date", interval = "day")+1) |> dplyr::summarise(n = sum(days, na.rm = TRUE)) |> dplyr::pull("n") x <- summariseInObservation(cdm$observation_period, interval = "years", output = c("records","person-days"), ageGroup = NULL, sex = FALSE) |> - dplyr::filter(variable_name == "Number person-days", variable_level == "1964-01-01 to 1964-12-31", estimate_type == "percentage") |> + dplyr::filter(variable_name == "Number person-days", additional_level == "1964-01-01 to 1964-12-31", estimate_type == "percentage") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% @@ -223,19 +233,19 @@ test_that("check output argument works", { # Check sex stratified x <- summariseInObservation(cdm$observation_period, interval = "years", output = "person-days", sex = TRUE) |> - dplyr::filter(variable_name == "Number person-days", variable_level == "1964-01-01 to 1964-12-31", estimate_type == "integer") |> + dplyr::filter(variable_name == "Number person-days", additional_level == "1964-01-01 to 1964-12-31", estimate_type == "integer") |> dplyr::filter(strata_level == "overall") |> dplyr::pull("estimate_value") |> as.numeric() y <- summariseInObservation(cdm$observation_period, interval = "years", output = "person-days", sex = TRUE) |> - dplyr::filter(variable_name == "Number person-days", variable_level == "1964-01-01 to 1964-12-31", estimate_type == "integer") |> + dplyr::filter(variable_name == "Number person-days", additional_level == "1964-01-01 to 1964-12-31", estimate_type == "integer") |> dplyr::filter(strata_level != "overall") |> dplyr::pull("estimate_value") |> as.numeric() |> sum() expect_equal(x,y) # Check age stratified x <- summariseInObservation(cdm$observation_period, interval = "years", output = "person-days", ageGroup = list("<=20" = c(0,20), ">20" = c(21,Inf))) |> - dplyr::filter(variable_name == "Number person-days", variable_level == "2000-01-01 to 2000-12-31", estimate_type == "integer") |> + dplyr::filter(variable_name == "Number person-days", additional_level == "2000-01-01 to 2000-12-31", estimate_type == "integer") |> dplyr::filter(strata_level == "overall") |> dplyr::pull("estimate_value") |> as.numeric() y <- summariseInObservation(cdm$observation_period, interval = "years", output = "person-days", sex = TRUE) |> - dplyr::filter(variable_name == "Number person-days", variable_level == "2000-01-01 to 2000-12-31", estimate_type == "integer") |> + dplyr::filter(variable_name == "Number person-days", additional_level == "2000-01-01 to 2000-12-31", estimate_type == "integer") |> dplyr::filter(strata_level != "overall") |> dplyr::pull("estimate_value") |> as.numeric() |> sum() expect_equal(x,y) diff --git a/tests/testthat/test-summariseRecordCount.R b/tests/testthat/test-summariseRecordCount.R index 7ab6b7e..2c0e7eb 100644 --- a/tests/testthat/test-summariseRecordCount.R +++ b/tests/testthat/test-summariseRecordCount.R @@ -22,11 +22,10 @@ test_that("summariseRecordCount() works", { "device_exposure","observation"))) expect_equal(all, dplyr::bind_rows(co,de,o)) - # Check inputs ---- expect_true( (summariseRecordCount(cdm, "observation_period", interval = "years") |> - dplyr::filter(variable_level == "1963-01-01 to 1963-12-31") |> + dplyr::filter(additional_level == "1963-01-01 to 1963-12-31") |> dplyr::pull("estimate_value") |> as.numeric()) == (cdm$observation_period |> @@ -40,7 +39,7 @@ test_that("summariseRecordCount() works", { expect_true( summariseRecordCount(cdm, "condition_occurrence", interval = "months") |> - dplyr::filter(variable_level == "1961-02-01 to 1961-02-28") |> + dplyr::filter(additional_level == "1961-02-01 to 1961-02-28") |> dplyr::pull("estimate_value") |> as.numeric() == (cdm$condition_occurrence |> @@ -55,7 +54,7 @@ test_that("summariseRecordCount() works", { expect_true( (summariseRecordCount(cdm, "condition_occurrence", interval = "months") |> - dplyr::filter(variable_level %in% c("1984-01-01 to 1984-01-31", "1984-02-01 to 1984-02-29", "1984-03-01 to 1984-03-31")) |> + dplyr::filter(additional_level %in% c("1984-01-01 to 1984-01-31", "1984-02-01 to 1984-02-29", "1984-03-01 to 1984-03-31")) |> dplyr::summarise("estimate_value" = sum(as.numeric(estimate_value), na.rm = TRUE)) |> dplyr::pull("estimate_value") |> as.numeric()) == @@ -71,7 +70,7 @@ test_that("summariseRecordCount() works", { expect_true( (summariseRecordCount(cdm, "drug_exposure", interval = "years") |> - dplyr::filter(variable_level %in% c("1981-01-01 to 1981-12-31", "1982-01-01 to 1982-12-31", "1983-01-01 to 1983-12-31", + dplyr::filter(additional_level %in% c("1981-01-01 to 1981-12-31", "1982-01-01 to 1982-12-31", "1983-01-01 to 1983-12-31", "1984-01-01 to 1984-12-31", "1985-01-01 to 1985-12-31", "1986-01-01 to 1986-12-31", "1987-01-01 to 1987-12-31", "1988-01-01 to 1988-12-31")) |> dplyr::summarise("estimate_value" = sum(as.numeric(.data$estimate_value), na.rm = TRUE)) |> @@ -124,16 +123,16 @@ test_that("summariseRecordCount() ageGroup argument works", { expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", ageGroup = list(">=65" = c(65, Inf), "<65" = c(0,64)))) x <- t |> - dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::select("strata_level", "additional_level", "estimate_value") |> dplyr::filter(strata_level != "overall") |> - dplyr::group_by(variable_level) |> + dplyr::group_by(additional_level) |> dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> - dplyr::arrange(variable_level) |> + dplyr::arrange(additional_level) |> dplyr::pull("estimate_value") y <- t |> - dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::select("strata_level", "additional_level", "estimate_value") |> dplyr::filter(strata_level == "overall") |> - dplyr::arrange(variable_level) |> + dplyr::arrange(additional_level) |> dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> dplyr::pull("estimate_value") expect_equal(x,y) @@ -141,16 +140,16 @@ test_that("summariseRecordCount() ageGroup argument works", { expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", ageGroup = list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60), ">60" = c(61, Inf)))) x <- t |> - dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::select("strata_level", "additional_level", "estimate_value") |> dplyr::filter(strata_level != "overall") |> - dplyr::group_by(variable_level) |> + dplyr::group_by(additional_level) |> dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> - dplyr::arrange(variable_level) |> + dplyr::arrange(additional_level) |> dplyr::pull("estimate_value") y <- t |> - dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::select("strata_level", "additional_level", "estimate_value") |> dplyr::filter(strata_level == "overall") |> - dplyr::arrange(variable_level) |> + dplyr::arrange(additional_level) |> dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> dplyr::pull("estimate_value") expect_equal(x,y) @@ -158,8 +157,8 @@ test_that("summariseRecordCount() ageGroup argument works", { expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", interval = "years", ageGroup = list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60), ">60" = c(61, Inf)))) x <- t |> - dplyr::select("strata_level", "variable_level", "estimate_value") |> - dplyr::filter(strata_level == "<=20" & variable_level == "1920-01-01 to 1920-12-31") |> + dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::filter(strata_level == "<=20" & additional_level == "1920-01-01 to 1920-12-31") |> dplyr::summarise(n = sum(as.numeric(estimate_value))) |> dplyr::pull("n") y <- cdm$condition_occurrence |> @@ -175,8 +174,8 @@ test_that("summariseRecordCount() ageGroup argument works", { expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", interval = "years", ageGroup = list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60)))) x <- t |> - dplyr::select("strata_level", "variable_level", "estimate_value") |> - dplyr::filter(strata_level == "<=20" & variable_level == "1920-01-01 to 1920-12-31") |> + dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::filter(strata_level == "<=20" & additional_level == "1920-01-01 to 1920-12-31") |> dplyr::summarise(n = sum(as.numeric(estimate_value))) |> dplyr::pull("n") |> as.numeric() y <- cdm$condition_occurrence |> @@ -199,40 +198,40 @@ test_that("summariseRecordCount() sex argument works", { # Check that works ---- expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", sex = TRUE)) x <- t |> - dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::select("strata_level", "additional_level", "estimate_value") |> dplyr::filter(strata_level != "overall") |> - dplyr::group_by(variable_level) |> + dplyr::group_by(additional_level) |> dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> - dplyr::arrange(variable_level) |> + dplyr::arrange(additional_level) |> dplyr::pull("estimate_value") y <- t |> - dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::select("strata_level", "additional_level", "estimate_value") |> dplyr::filter(strata_level == "overall") |> - dplyr::arrange(variable_level) |> + dplyr::arrange(additional_level) |> dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> dplyr::pull("estimate_value") expect_equal(x,y) expect_warning(t <- summariseRecordCount(cdm, "observation_period", sex = TRUE)) x <- t |> - dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::select("strata_level", "additional_level", "estimate_value") |> dplyr::filter(strata_level != "overall") |> - dplyr::group_by(variable_level) |> + dplyr::group_by(additional_level) |> dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> - dplyr::arrange(variable_level) |> + dplyr::arrange(additional_level) |> dplyr::pull("estimate_value") y <- t |> - dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::select("strata_level", "additional_level", "estimate_value") |> dplyr::filter(strata_level == "overall") |> - dplyr::arrange(variable_level) |> + dplyr::arrange(additional_level) |> dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> dplyr::pull("estimate_value") expect_equal(x,y) expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", sex = TRUE, interval = "years")) x <- t |> - dplyr::select("strata_level", "variable_level", "estimate_value") |> - dplyr::filter(strata_level == "Male", variable_level == "1937-01-01 to 1937-12-31") |> dplyr::pull(estimate_value) + dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::filter(strata_level == "Male", additional_level == "1937-01-01 to 1937-12-31") |> dplyr::pull(estimate_value) y <- cdm$condition_occurrence |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") |> @@ -261,7 +260,7 @@ test_that("summariseRecordCount() works with mockOmopSketch", { expect_true(co |> dplyr::filter(grepl("Male",strata_level)) |> dplyr::tally() |> dplyr::pull() == 0) - expect_true(all(co |> dplyr::filter(grepl("&&&",strata_level), variable_level != "overall") |> + expect_true(all(co |> dplyr::filter(grepl("&&&",strata_level), additional_level != "overall") |> dplyr::pull("estimate_value") |> sort() == conditionpp |> dplyr::pull("n") |> as.character() |> sort()))