Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fix 4 #243

Merged
merged 4 commits into from
Apr 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
.Rproj.user/*
.Rhistory
.RData
*.RData
*.rds
.Ruserdata
inst/doc
tests/testthat/_snaps/*
Expand All @@ -17,4 +19,4 @@ src-i386
functional_tests/supplements/*
*zip_data.RData
*log*
*.rds
*a.rds
2 changes: 0 additions & 2 deletions R/estimate_erf.R
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,6 @@ estimate_erf <- function(.data,
result_data_prediction <- data.frame(w_vals = w_vals,
y_pred = y_pred)

print(erf_np)

} else {
stop("The code should never get here. Double check.")
}
Expand Down
8 changes: 6 additions & 2 deletions R/estimate_gps.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
#'
#' @return
#' The function returns a S3 object. Including the following:
#' - `.data `: `id`, `w`, `gps`, `e_gps_pred`, `e_gps_std_pred`, `w_resid`
#' - `.data `: `id`, `exposure_var`, `gps`, `e_gps_pred`, `e_gps_std_pred`,
#' `w_resid`
#' - `params`: Including the following fields:
#' - gps_mx (min and max of gps)
#' - w_mx (min and max of w).
Expand Down Expand Up @@ -113,7 +114,10 @@ estimate_gps <- function(.data,
gps_mx <- compute_min_max(gps)

# create new data.frame for output
dataset <- data.frame(id = .data$id, w = response_data, gps = gps)
dataset <- stats::setNames(data.frame(.data$id,
response_data,
gps),
c("id", response_var, "gps"))
dataset$e_gps_pred <- e_gps_pred
if (length(e_gps_std_pred) == 1){
e_gps_std_pred <- rep(e_gps_std_pred, nrow(dataset))
Expand Down
2 changes: 1 addition & 1 deletion R/matching_fn.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ matching_fn <- function(w,
std_w <- (w - w_min) / (w_max - w_min)
std_p_w <- (p_w - gps_min) / (gps_max - gps_min)

dataset_subset <- dataset[abs(dataset[["w"]] - w) <= (delta_n / 2), ]
dataset_subset <- dataset[abs(dataset[[exposure_col_name]] - w) <= (delta_n / 2), ]

if (nrow(dataset_subset) < 1){
logger:: log_warn(paste("There is no data to match with ", w, "in ",
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
70 changes: 70 additions & 0 deletions functional_tests/paper_1_estimating_gps_normal.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
## author: Naeem Khoshnevis
## created: September 2023
## purpose: Reproducing examples in the paper.


# Load libraries
library(ggplot2)
library(CausalGPS)
library(data.table)

# Load data --------------------------------------------------------------------
data_file <- "zip_data.RData"
if (!file.exists(data_file)) {
stop(paste0("Download the study data file from the following link:\n",
"https://drive.google.com/file/d/",
"1QFdbVU8Qir1gWf96c5h_ZhT-aPjhHpqn/view?usp=share_link"))
} else {
load(data_file)
}

data.table::setDF(zip_data)
data <- zip_data

# Add id to the data
data$id <- 1:nrow(data)
data$w <- data$pm25
data$pm25 <- NULL

save(data, file = "study_data.RData")

# Estimate GPS -----------------------------------------------------------------

## Super learner wrapper
m_xgboost <- function(nthread = 6,
ntrees = 50,
shrinkage = 0.3,
max_depth = 6,
minobspernode = 1,
verbose = 1,
...) {SuperLearner::SL.xgboost(
nthread = nthread,
ntrees = ntrees,
shrinkage=shrinkage,
max_depth=max_depth,
mibobspernode=minobspernode,
verbose=verbose,
...)}

exposure <- "w"
confounders <- c("mean_bmi", "smoke_rate",
"hispanic", "pct_blk", "medhouseholdincome",
"medianhousevalue", "poverty", "popdensity",
"pct_owner_occ", "summer_tmmx", "winter_tmmx",
"summer_rmax", "winter_rmax", "year")

formula_str <- paste(exposure, " ~ ", paste(confounders, collapse = " + "))


data_with_gps_normal <- estimate_gps(.data = data,
.formula = as.formula(formula_str),
gps_density = "normal",
sl_lib = c("m_xgboost")
)


pdf("figure_paper_1_estimating_gps_normal.pdf")
plot(data_with_gps_normal)
dev.off()

save(data_with_gps_normal, file = "data_with_gps_normal.RData")
66 changes: 66 additions & 0 deletions functional_tests/paper_2_estimating_gps_kernel.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
## author: Naeem Khoshnevis
## created: March 2024
## purpose: Reproducing examples in the paper.


# Load libraries
library(ggplot2)
library(CausalGPS)
library(data.table)

# Load data --------------------------------------------------------------------
data_file <- "zip_data.RData"
if (!file.exists(data_file)) {
stop(paste0("Download the study data file from the following link:\n",
"https://drive.google.com/file/d/",
"1QFdbVU8Qir1gWf96c5h_ZhT-aPjhHpqn/view?usp=share_link"))
} else {
load(data_file)
}

data.table::setDF(zip_data)
data <- zip_data

# Add id to the data
data$id <- 1:nrow(data)

# Estimate GPS -----------------------------------------------------------------

## Super learner wrapper
m_xgboost <- function(nthread = 6,
ntrees = 50,
shrinkage = 0.3,
max_depth = 6,
minobspernode = 1,
verbose = 1,
...) {SuperLearner::SL.xgboost(
nthread = nthread,
ntrees = ntrees,
shrinkage=shrinkage,
max_depth=max_depth,
mibobspernode=minobspernode,
verbose=verbose,
...)}

exposure <- "pm25"
confounders <- c("mean_bmi", "smoke_rate",
"hispanic", "pct_blk", "medhouseholdincome",
"medianhousevalue", "poverty", "popdensity",
"pct_owner_occ", "summer_tmmx", "winter_tmmx",
"summer_rmax", "winter_rmax", "year")

formula_str <- paste(exposure, " ~ ", paste(confounders, collapse = " + "))


data_with_gps_kernel <- estimate_gps(.data = data,
.formula = as.formula(formula_str),
gps_density = "kernel",
sl_lib = c("m_xgboost")
)


pdf("figure_paper_2_estimating_gps_kernel.pdf")
plot(data_with_gps_kernel)
dev.off()

save(data_with_gps_kernel, file = "data_with_gps_kernel.RData")
26 changes: 26 additions & 0 deletions functional_tests/paper_3_compute_weight_counter_weighting.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
## author: Naeem Khoshnevis
## created: March 2024
## purpose: Reproducing examples in the paper.


# Load libraries
library(ggplot2)
library(CausalGPS)


# Load gps object
load("data_with_gps_normal.RData")


cw_weighting_object <- compute_counter_weight(gps_obj = data_with_gps_normal,
ci_appr = "weighting",
bin_seq = NULL,
nthread = 6,
delta_n = 0.1,
dist_measure = "l1",
scale = 0.5)


save(cw_weighting_object, file = "cw_weighting_object.RData")


24 changes: 24 additions & 0 deletions functional_tests/paper_4_compute_weight_counter_matching.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
## author: Naeem Khoshnevis
## created: March 2024
## purpose: Reproducing examples in the paper.


# Load libraries
library(ggplot2)
library(CausalGPS)


# Load gps object
load("data_with_gps_normal.RData")


cw_matching_object <- compute_counter_weight(gps_obj = data_with_gps_normal,
ci_appr = "matching",
bin_seq = NULL,
nthread = 6,
delta_n = 0.1,
dist_measure = "l1",
scale = 0.5)


save(cw_matching_object, file = "cw_matching_object.RData")
36 changes: 36 additions & 0 deletions functional_tests/paper_5_compute_pseudo_pop_weighting.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
## author: Naeem Khoshnevis
## created: March 2024
## purpose: Reproducing examples in the paper.


# Load libraries
library(ggplot2)
library(CausalGPS)


# Load cw object and data
load("cw_weighting_object.RData")
load("study_data.RData")

confounders <- c("mean_bmi", "smoke_rate",
"hispanic", "pct_blk", "medhouseholdincome",
"medianhousevalue", "poverty", "popdensity",
"pct_owner_occ", "summer_tmmx", "winter_tmmx",
"summer_rmax", "winter_rmax", "year")


pseudo_pop_weighting_object <- generate_pseudo_pop(
.data = data,
cw_obj = cw_weighting_object,
covariate_col_names = confounders,
covar_bl_trs = 0.1,
covar_bl_trs_type = "maximal",
covar_bl_method = "absolute")


save(pseudo_pop_weighting_object, file = "pseudo_pop_weighting_object.RData")


pdf("figure_paper_5_pseudo_pop_weighting_object.pdf")
plot(pseudo_pop_weighting_object)
dev.off()
35 changes: 35 additions & 0 deletions functional_tests/paper_6_compute_pseudo_pop_matching.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
## author: Naeem Khoshnevis
## created: March 2024
## purpose: Reproducing examples in the paper.


# Load libraries
library(ggplot2)
library(CausalGPS)


# Load cw object and data
load("cw_matching_object.RData")
load("study_data.RData")

confounders <- c("mean_bmi", "smoke_rate",
"hispanic", "pct_blk", "medhouseholdincome",
"medianhousevalue", "poverty", "popdensity",
"pct_owner_occ", "summer_tmmx", "winter_tmmx",
"summer_rmax", "winter_rmax", "year")


pseudo_pop_weighting_object <- generate_pseudo_pop(
.data = data,
cw_obj = cw_matching_object,
covariate_col_names = confounders,
covar_bl_trs = 0.1,
covar_bl_trs_type = "maximal",
covar_bl_method = "absolute")


save(pseudo_pop_matching_object, file = "pseudo_pop_matching_object.RData")

pdf("figure_paper_6_pseudo_pop_matching_object.pdf")
plot(pseudo_pop_matching_object)
dev.off()
Loading
Loading