Skip to content

Commit

Permalink
Merge pull request #223 from OHDSI/mah_fixIssues
Browse files Browse the repository at this point in the history
Move variable_level to additional_level
  • Loading branch information
martaalcalde authored Nov 5, 2024
2 parents dcb0d61 + edd38ae commit b5c04d5
Show file tree
Hide file tree
Showing 13 changed files with 183 additions and 144 deletions.
6 changes: 5 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ Authors@R: c(
"Elin", "Rowlands", email = "[email protected]",
role = c("aut"), comment = c(ORCID = "0009-0005-5166-0417")
),
person(
"Cecilia", "Campanile", email = "[email protected]",
role = c("aut"), comment = c(ORCID = "0009-0007-6629-4661")
),
person(
"Edward", "Burn", email = "[email protected]",
role = c("aut"), comment = c(ORCID = "0000-0002-9286-1128")
Expand Down Expand Up @@ -71,4 +75,4 @@ URL: https://OHDSI.github.io/OmopSketch/
BugReports: https://github.com/OHDSI/OmopSketch/issues
VignetteBuilder: knitr
Remotes:
darwin-eu-dev/omopgenerics@mah_vl
darwin-eu-dev/omopgenerics
35 changes: 22 additions & 13 deletions R/plotConceptCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#'
#' result |>
#' filter(variable_name == "Number subjects") |>
#' plotConceptCounts(facet = "codelist_name", colour = "codelist_name")
#' plotConceptCounts(facet = "codelist_name", colour = "standard_concept_name")
#'
#' PatientProfiles::mockDisconnect(cdm)
#' }
Expand All @@ -35,9 +35,7 @@ plotConceptCounts <- function(result,

# subset to results of interest
result <- result |>
visOmopResults::filterSettings(.data$result_type == "summarise_concept_counts") |>
dplyr::mutate(variable_level = gsub(" to.*","",.data$variable_level)) |>
dplyr::mutate(variable_level = gsub("-01$","",.data$variable_level))
visOmopResults::filterSettings(.data$result_type == "summarise_concept_counts")

if (nrow(result) == 0) {
cli::cli_abort(c("!" = "No records found with result_type == summarise_concept_counts"))
Expand All @@ -52,12 +50,14 @@ plotConceptCounts <- function(result,
))
}

result1 <- result |> visOmopResults::splitAdditional()
# Detect if there are several time intervals
if(length(unique(result$variable_level)) > 1 ){
if("time_interval" %in% colnames(result1)){
# Line plot where each concept is a different line
p <- result |>
dplyr::filter(.data$variable_level != "overall") |>
visOmopResults::scatterPlot(x = "variable_level",
p <- result1 |>
dplyr::filter(.data$time_interval != "overall") |>
visOmopResults::uniteAdditional(cols = c("time_interval", "standard_concept_name", "standard_concept_id", "source_concept_name", "source_concept_id", "domain_id")) |>
visOmopResults::scatterPlot(x = "time_interval",
y = "count",
line = TRUE,
point = TRUE,
Expand All @@ -66,11 +66,20 @@ plotConceptCounts <- function(result,
facet = facet,
colour = colour)
}else{
p <- result |>
visOmopResults::barPlot(x = "standard_concept_name",
y = "count",
facet = facet,
colour = colour) +
if("standard_concept_name" %in% colnames(result1)){
p <- result |>
visOmopResults::barPlot(x = "standard_concept_name",
y = "count",
facet = facet,
colour = colour)
}else{
p <- result |>
visOmopResults::barPlot(x = "codelist_name",
y = "count",
facet = facet,
colour = colour)
}
p <- p +
ggplot2::labs(
x = "Concept name"
)
Expand Down
8 changes: 4 additions & 4 deletions R/plotInObservation.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,15 @@ plotInObservation <- function(result,
}

# warn
warnFacetColour(result, list(facet = asCharacterFacet(facet), colour = colour, "variable_level"))
warnFacetColour(result, list(facet = asCharacterFacet(facet), colour = colour, "additional_level"))

# plot
if(length(unique(result$variable_level)) > 1 ){
if(length(unique(result$additional_level)) > 1 ){
result |>
dplyr::mutate(variable_level = as.Date(gsub(" to.*","",.data$variable_level))) |>
dplyr::mutate(additional_level = as.character(gsub("-01$","",as.Date(gsub(" to.*","",.data$additional_level))))) |>
dplyr::filter(.data$estimate_name == "count") |>
visOmopResults::scatterPlot(
x = "variable_level",
x = "time_interval",
y = "count",
line = TRUE,
point = TRUE,
Expand Down
10 changes: 5 additions & 5 deletions R/plotRecordCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ plotRecordCount <- function(result,
}

# Detect if there are several time intervals
if(length(unique(result$variable_level)) > 1 ){
if(length(unique(result$additional_level)) > 1 ){
# Line plot where each concept is a different line
p <- result |>
dplyr::filter(.data$variable_level != "overall") |>
dplyr::filter(.data$additional_level != "overall") |>
dplyr::filter(.data$estimate_name == "count") |>
visOmopResults::scatterPlot(x = "variable_level",
visOmopResults::scatterPlot(x = "time_interval",
y = "count",
line = TRUE,
point = TRUE,
Expand All @@ -57,13 +57,13 @@ plotRecordCount <- function(result,
)
}else{
p <- result |>
visOmopResults::barPlot(x = "variable_level",
visOmopResults::barPlot(x = "variable_name",
y = "count",
facet = facet,
colour = colour) +
ggplot2::labs(
y = "Count",
x = "Number records"
x = ""
)
}
p
Expand Down
6 changes: 4 additions & 2 deletions R/summariseConceptCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ getCodeUse <- function(x,
}

cc <- records |>
# dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706
PatientProfiles::summariseResult(strata = strata,
variable = "standard_concept_name",
group = group,
Expand All @@ -211,7 +210,10 @@ getCodeUse <- function(x,
if(interval != "overall"){
cc <- cc |>
visOmopResults::splitStrata() |>
dplyr::mutate(variable_level = .data$interval_group) |>
dplyr::mutate("additional_level" = dplyr::if_else(.data$interval_group == "overall", .data$additional_level, paste0(.data$interval_group, " &&& ", .data$additional_level))) |>
dplyr::mutate("additional_name" = dplyr::if_else(.data$interval_group == "overall", .data$additional_name, paste0("time_interval &&& ", .data$additional_name))) |>
dplyr::mutate("additional_level" = gsub(" &&& overall$", "", .data$additional_level)) |>
dplyr::mutate("additional_name" = gsub(" &&& overall$", "", .data$additional_name)) |>
visOmopResults::uniteStrata(unique(unlist(strata))[unique(unlist(strata)) != "interval_group"]) |>
dplyr::select(-"interval_group")
}
Expand Down
42 changes: 23 additions & 19 deletions R/summariseInObservation.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ summariseInObservation <- function(observationPeriod,
checkOutput(output)
ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]]
omopgenerics::assertLogical(sex, length = 1)
original_interval <- interval
x <- validateIntervals(interval)
interval <- x$interval
unitInterval <- x$unitInterval
Expand Down Expand Up @@ -88,7 +89,7 @@ summariseInObservation <- function(observationPeriod,
result <- addSexOverall(result, sex)

# Create summarisedResult
result <- createSummarisedResultObservationPeriod(result, observationPeriod, name, denominator, interval, unitInterval)
result <- createSummarisedResultObservationPeriod(result, observationPeriod, name, denominator, original_interval)

CDMConnector::dropTable(cdm, name = dplyr::starts_with(tablePrefix))
return(result)
Expand Down Expand Up @@ -172,7 +173,7 @@ countRecords <- function(observationPeriod, cdm, start_date_name, end_date_name,
if(output == "person-days" | output == "all"){
if(interval != "overall"){
x <- cdm[[paste0(tablePrefix, "interval")]] |>
dplyr::rename("variable_level" = "interval_group") |>
dplyr::rename("additional_level" = "interval_group") |>
dplyr::cross_join(
observationPeriod |>
dplyr::select("start_date" = "observation_period_start_date",
Expand All @@ -188,14 +189,16 @@ countRecords <- function(observationPeriod, cdm, start_date_name, end_date_name,
x <- observationPeriod |>
dplyr::rename("start_date" = "observation_period_start_date",
"end_date" = "observation_period_end_date") |>
dplyr::mutate("variable_level" = NA)
dplyr::mutate("additional_level" = "overall",
"additional_name" = "overall")
}

personDays <- x %>%
dplyr::mutate(estimate_value = !!CDMConnector::datediff("start_date","end_date", interval = "day")+1) |>
dplyr::group_by(dplyr::across(dplyr::any_of(c("variable_level", "sex", "age_group")))) |>
dplyr::group_by(dplyr::across(dplyr::any_of(c("additional_level", "sex", "age_group")))) |>
dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(variable_name = "Number person-days") |>
dplyr::mutate("variable_name" = "Number person-days",
"additional_name" = "time_interval") |>
dplyr::collect()
}else{
personDays <- createEmptyIntervalTable(interval)
Expand All @@ -212,20 +215,22 @@ if(output == "records" | output == "all"){
dplyr::compute(temporary = FALSE, name = tablePrefix)

records <- cdm[[paste0(tablePrefix, "interval")]] |>
dplyr::rename("variable_level" = "interval_group") |>
dplyr::rename("additional_level" = "interval_group") |>
dplyr::cross_join(x) |>
dplyr::filter((.data$start_date < .data$interval_start_date & .data$end_date >= .data$interval_start_date) |
(.data$start_date >= .data$interval_start_date & .data$start_date <= .data$interval_end_date)) |>
dplyr::group_by(.data$variable_level, .data$age_group, .data$sex) |>
dplyr::group_by(.data$additional_level, .data$age_group, .data$sex) |>
dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(variable_name = "Number records in observation") |>
dplyr::mutate("variable_name" = "Number records in observation",
"additional_name" = "time_interval") |>
dplyr::collect()
}else{
records <- observationPeriod |>
dplyr::group_by(.data$age_group, .data$sex) |>
dplyr::summarise(estimate_value = dplyr::n(), .groups = "drop") |>
dplyr::mutate("variable_name" = "Number records in observation",
"variable_level" = NA) |>
"additional_level" = "overall",
"additional_name" = "overall") |>
dplyr::collect()
}
}else{
Expand All @@ -234,12 +239,12 @@ if(output == "records" | output == "all"){

x <- personDays |>
rbind(records) |>
dplyr::arrange(dplyr::across(dplyr::any_of("variable_level")))
dplyr::arrange(dplyr::across(dplyr::any_of("additional_level")))

return(x)
}

createSummarisedResultObservationPeriod <- function(result, observationPeriod, name, denominator, interval, unitInterval){
createSummarisedResultObservationPeriod <- function(result, observationPeriod, name, denominator, original_interval){
result <- result |>
dplyr::mutate("estimate_value" = as.character(.data$estimate_value)) |>
visOmopResults::uniteStrata(cols = c("sex", "age_group")) |>
Expand All @@ -248,15 +253,14 @@ createSummarisedResultObservationPeriod <- function(result, observationPeriod, n
"cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(observationPeriod)),
"group_name" = "omop_table",
"group_level" = name,
"variable_level" = as.character(NA),
"estimate_name" = "count",
"estimate_type" = "integer",
"additional_name" = "overall",
"additional_level" = "overall"
"estimate_type" = "integer"
)

result <- result |>
rbind(result) |>
dplyr::group_by(.data$variable_level, .data$strata_level, .data$variable_name) |>
dplyr::group_by(.data$additional_level, .data$strata_level, .data$variable_name) |>
dplyr::mutate(estimate_type = dplyr::if_else(dplyr::row_number() == 2, "percentage", .data$estimate_type)) |>
dplyr::inner_join(denominator, by = "variable_name") |>
dplyr::mutate(estimate_value = dplyr::if_else(.data$estimate_type == "percentage", as.character(as.numeric(.data$estimate_value)/denominator*100), .data$estimate_value)) |>
Expand All @@ -267,8 +271,7 @@ createSummarisedResultObservationPeriod <- function(result, observationPeriod, n
"result_type" = "summarise_in_observation",
"package_name" = "OmopSketch",
"package_version" = as.character(utils::packageVersion("OmopSketch")),
"interval" = .env$interval,
"unitInterval" = .env$unitInterval
"interval" = .env$original_interval
))

return(result)
Expand Down Expand Up @@ -332,9 +335,10 @@ addSexOverall <- function(result, sex){
if(sex){
result <- result |> rbind(
result |>
dplyr::group_by(.data$age_group, .data$variable_level, .data$variable_name) |>
dplyr::group_by(.data$age_group, .data$additional_level, .data$variable_name) |>
dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(sex = "overall")
dplyr::mutate(sex = "overall",
additional_name = dplyr::if_else(.data$additional_level == "overall", "overall", "time_interval"))
)
}
return(result)
Expand Down
23 changes: 11 additions & 12 deletions R/summariseRecordCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ summariseRecordCount <- function(cdm,
# Initial checks ----
omopgenerics::validateCdmArgument(cdm)
omopgenerics::assertCharacter(omopTableName)
original_interval <- interval
x <- validateIntervals(interval)
interval <- x$interval
unitInterval <- x$unitInterval
Expand All @@ -54,6 +55,7 @@ summariseRecordCount <- function(cdm,
cdm = cdm,
interval = interval,
unitInterval = unitInterval,
original_interval,
ageGroup = ageGroup,
sex = sex)
}
Expand All @@ -65,7 +67,7 @@ summariseRecordCount <- function(cdm,

#' @noRd
summariseRecordCountInternal <- function(omopTableName, cdm, interval, unitInterval,
ageGroup, sex) {
original_interval, ageGroup, sex) {

prefix <- omopgenerics::tmpPrefix()
omopTable <- cdm[[omopTableName]] |> dplyr::ungroup()
Expand Down Expand Up @@ -107,7 +109,7 @@ summariseRecordCountInternal <- function(omopTableName, cdm, interval, unitInter
}

# Create summarised result ----
result <- createSummarisedResultRecordCount(result, strata, omopTable, omopTableName, interval, unitInterval)
result <- createSummarisedResultRecordCount(result, strata, omopTable, omopTableName, original_interval)
omopgenerics::dropTable(cdm = cdm, name = dplyr::starts_with(prefix))

return(result)
Expand Down Expand Up @@ -230,12 +232,11 @@ splitIncidenceBetweenIntervals <- function(cdm, omopTable, date, prefix){
dplyr::select(-c("interval_start_date", "interval_end_date", "incidence_date"))
}

createSummarisedResultRecordCount <- function(result, strata, omopTable, omopTableName, interval, unitInterval){
createSummarisedResultRecordCount <- function(result, strata, omopTable, omopTableName, original_interval){

result <- result |>
dplyr::mutate(n = 1) |>
dplyr::select(-"person_id") |>
# dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706
PatientProfiles::summariseResult(
variables = "n",
strata = strata,
Expand All @@ -246,18 +247,15 @@ createSummarisedResultRecordCount <- function(result, strata, omopTable, omopTab
suppressMessages() |>
dplyr::mutate("variable_name" = stringr::str_to_sentence(.data$variable_name)) |>
dplyr::mutate(
"result_id" = as.integer(1),
"cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(omopTable)),
"group_name" = "omop_table",
"group_level" = omopTableName,
"additional_name" = "overall",
"additional_level" = "overall"
"group_level" = omopTableName
)

if(interval != "overall"){
if(original_interval != "overall"){
result <- result |>
visOmopResults::splitStrata() |>
dplyr::mutate(variable_level = .data$interval_group) |>
dplyr::mutate(additional_level = .data$interval_group) |>
dplyr::mutate(additional_name = dplyr::if_else(.data$additional_level == "overall", "overall", "time_interval")) |>
visOmopResults::uniteStrata(unique(unlist(strata))[unique(unlist(strata)) != "interval_group"]) |>
dplyr::select(-"interval_group")
}
Expand All @@ -268,7 +266,8 @@ createSummarisedResultRecordCount <- function(result, strata, omopTable, omopTab
"result_id" = 1L,
"result_type" = "summarise_record_count",
"package_name" = "OmopSketch",
"package_version" = as.character(utils::packageVersion("OmopSketch"))
"package_version" = as.character(utils::packageVersion("OmopSketch")),
"interval" = .env$original_interval
)
)
}
Loading

0 comments on commit b5c04d5

Please sign in to comment.