Skip to content

Commit

Permalink
Merge pull request #85 from ImperialCollegeLondon/version3
Browse files Browse the repository at this point in the history
nature submission mirror with refactoring
  • Loading branch information
s-mishra authored Apr 24, 2020
2 parents 742ddd0 + d41c601 commit 865806f
Show file tree
Hide file tree
Showing 12 changed files with 13,082 additions and 10,155 deletions.
219 changes: 29 additions & 190 deletions base.r
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,8 @@ library(tidyr)
library(EnvStats)
library(optparse)

countries <- c(
"Denmark",
"Italy",
"Germany",
"Spain",
"United_Kingdom",
"France",
"Norway",
"Belgium",
"Austria",
"Sweden",
"Switzerland",
"Greece",
"Portugal",
"Netherlands"
)
source('utils/read-data.r')
source('utils/process-covariates.r')

# Commandline options and parsing
parser <- OptionParser()
Expand Down Expand Up @@ -54,198 +40,44 @@ if(length(cmdoptions$args) == 0) {
} else {
StanModel = cmdoptions$args[1]
}

print(sprintf("Running %s",StanModel))
if(DEBUG) {
print("Running in DEBUG mode")
} else if (FULL) {
print("Running in FULL mode")
}

## Reading all data
d=readRDS('data/COVID-19-up-to-date.rds')

## Ensure that output directories exist
dir.create("results/", showWarnings = FALSE, recursive = TRUE)
dir.create("figures/", showWarnings = FALSE, recursive = TRUE)
dir.create("web/", showWarnings = FALSE, recursive = TRUE)

## get IFR and population from same file
ifr.by.country = read.csv("data/popt_ifr.csv")
ifr.by.country$country = as.character(ifr.by.country[,2])
ifr.by.country$country[ifr.by.country$country == "United Kingdom"] = "United_Kingdom"

serial.interval = read.csv("data/serial_interval.csv")
covariates = read.csv('data/interventions.csv', stringsAsFactors = FALSE)
names_covariates = c('Schools + Universities','Self-isolating if ill', 'Public events', 'Lockdown', 'Social distancing encouraged')
covariates <- covariates %>%
filter((Type %in% names_covariates))
covariates <- covariates[,c(1,2,4)]
covariates <- spread(covariates, Type, Date.effective)
names(covariates) <- c('Country','lockdown', 'public_events', 'schools_universities','self_isolating_if_ill', 'social_distancing_encouraged')
covariates <- covariates[c('Country','schools_universities', 'self_isolating_if_ill', 'public_events', 'lockdown', 'social_distancing_encouraged')]
covariates$schools_universities <- as.Date(covariates$schools_universities, format = "%d.%m.%Y")
covariates$lockdown <- as.Date(covariates$lockdown, format = "%d.%m.%Y")
covariates$public_events <- as.Date(covariates$public_events, format = "%d.%m.%Y")
covariates$self_isolating_if_ill <- as.Date(covariates$self_isolating_if_ill, format = "%d.%m.%Y")
covariates$social_distancing_encouraged <- as.Date(covariates$social_distancing_encouraged, format = "%d.%m.%Y")
## using covariates as dates we want
covariates$schools_universities[covariates$schools_universities > covariates$lockdown] <- covariates$lockdown[covariates$schools_universities > covariates$lockdown]
covariates$public_events[covariates$public_events > covariates$lockdown] <- covariates$lockdown[covariates$public_events > covariates$lockdown]
covariates$social_distancing_encouraged[covariates$social_distancing_encouraged > covariates$lockdown] <- covariates$lockdown[covariates$social_distancing_encouraged > covariates$lockdown]
covariates$self_isolating_if_ill[covariates$self_isolating_if_ill > covariates$lockdown] <- covariates$lockdown[covariates$self_isolating_if_ill > covariates$lockdown]

forecast = 0

N2 = 120 # increase if you need more forecast

dates = list()
reported_cases = list()
# Pads serial interval with 0 if N2 is greater than the length of the serial
# interval array
if (N2 > length(serial.interval$fit)) {
pad_serial.interval <- data.frame(
"X"=(length(serial.interval$fit)+1):N2,
"fit"=rep(0.0, max(N2-length(serial.interval$fit), 0 ))
)
serial.interval = rbind(serial.interval, pad_serial.interval)
}
stan_data = list(M=length(countries),N=NULL,covariate1=NULL,covariate2=NULL,covariate3=NULL,covariate4=NULL,covariate5=NULL,covariate6=NULL,deaths=NULL,f=NULL,
N0=6,cases=NULL,SI=serial.interval$fit[1:N2],
EpidemicStart = NULL, pop = NULL) # N0 = 6 to make it consistent with Rayleigh
deaths_by_country = list()

# various distributions required for modeling
mean1 = 5.1; cv1 = 0.86; # infection to onset
mean2 = 18.8; cv2 = 0.45 # onset to death
x1 = rgammaAlt(1e7,mean1,cv1) # infection-to-onset distribution
x2 = rgammaAlt(1e7,mean2,cv2) # onset-to-death distribution

ecdf.saved = ecdf(x1+x2)

for(Country in countries) {
IFR=ifr.by.country$ifr[ifr.by.country$country == Country]

covariates1 <- covariates[covariates$Country == Country, c(2,3,4,5,6)]

d1_pop = ifr.by.country[ifr.by.country$country==Country,]
d1=d[d$Countries.and.territories==Country,c(1,5,6,7)]
d1$date = as.Date(d1$DateRep,format='%d/%m/%Y')
d1$t = decimal_date(d1$date)
d1=d1[order(d1$t),]

date_min <- dmy('31/12/2019')
if (as.Date(d1$DateRep[1], format='%d/%m/%Y') > as.Date(date_min, format='%d/%m/%Y')){
print(paste(Country,'In padding'))
pad_days <- as.Date(d1$DateRep[1], format='%d/%m/%Y') - date_min
pad_dates <- date_min + days(1:pad_days[[1]]-1)
padded_data <- data.frame("Countries.and.territories" = rep(Country, pad_days),
"DateRep" = format(pad_dates, '%d/%m/%Y'),
"t" = decimal_date(as.Date(pad_dates,format='%d/%m/%Y')),
"date" = as.Date(pad_dates,format='%d/%m/%Y'),
"Cases" = as.integer(rep(0, pad_days)),
"Deaths" = as.integer(rep(0, pad_days)),
stringsAsFactors=F)

d1 <- bind_rows(padded_data, d1)
}
index = which(d1$Cases>0)[1]
index1 = which(cumsum(d1$Deaths)>=10)[1] # also 5
index2 = index1-30

print(sprintf("First non-zero cases is on day %d, and 30 days before 10 deaths is day %d",index,index2))
d1=d1[index2:nrow(d1),]
stan_data$EpidemicStart = c(stan_data$EpidemicStart,index1+1-index2)
stan_data$pop = c(stan_data$pop, d1_pop$popt)


for (ii in 1:ncol(covariates1)) {
covariate = names(covariates1)[ii]
d1[covariate] <- (as.Date(d1$DateRep, format='%d/%m/%Y') >= as.Date(covariates1[1,covariate]))*1 # should this be > or >=?
}

dates[[Country]] = d1$date
# hazard estimation
N = length(d1$Cases)
print(sprintf("%s has %d days of data",Country,N))
forecast = N2 - N
if(forecast < 0) {
print(sprintf("%s: %d", Country, N))
print("ERROR!!!! increasing N2")
N2 = N
forecast = N2 - N
}

# IFR is the overall probability of dying given infection
convolution = function(u) (IFR * ecdf.saved(u))
# Read which countires to use
countries <- read.csv('data/regions.csv', stringsAsFactors = FALSE)
# Read deaths data for regions
d <- read_obs_data(countries)
# Read ifr
ifr.by.country <- read_ifr_data()

f = rep(0,N2) # f is the probability of dying on day i given infection
f[1] = (convolution(1.5) - convolution(0))
for(i in 2:N2) {
f[i] = (convolution(i+.5) - convolution(i-.5))
}

reported_cases[[Country]] = as.vector(as.numeric(d1$Cases))
deaths=c(as.vector(as.numeric(d1$Deaths)),rep(-1,forecast))
cases=c(as.vector(as.numeric(d1$Cases)),rep(-1,forecast))
deaths_by_country[[Country]] = as.vector(as.numeric(d1$Deaths))
covariates2 <- as.data.frame(d1[, colnames(covariates1)])
# x=1:(N+forecast)
covariates2[N:(N+forecast),] <- covariates2[N,]

## append data
stan_data$N = c(stan_data$N,N)
# stan_data$x = cbind(stan_data$x,x)
stan_data$covariate1 = cbind(stan_data$covariate1,covariates2[,1])
stan_data$covariate2 = cbind(stan_data$covariate2,covariates2[,2])
stan_data$covariate3 = cbind(stan_data$covariate3,covariates2[,3])
stan_data$covariate4 = cbind(stan_data$covariate4,covariates2[,4])
stan_data$covariate5 = cbind(stan_data$covariate5,covariates2[,4])
stan_data$covariate6 = cbind(stan_data$covariate6,covariates2[,5])
stan_data$f = cbind(stan_data$f,f)
stan_data$deaths = cbind(stan_data$deaths,deaths)
stan_data$cases = cbind(stan_data$cases,cases)

stan_data$N2=N2
stan_data$x=1:N2
if(length(stan_data$N) == 1) {
stan_data$N = as.array(stan_data$N)
}
}
# Read interventions
interventions <- read_interventions(countries)

# create the `any intervention` covariate
stan_data$covariate4 = 1*as.data.frame((stan_data$covariate1+
stan_data$covariate2+
stan_data$covariate3+
stan_data$covariate5+
stan_data$covariate6) >= 1)
N2 <- 100 # increase if you need more forecast

if(DEBUG) {
for(i in 1:length(countries)) {
write.csv(
data.frame(date=dates[[i]],
`school closure`=stan_data$covariate1[1:stan_data$N[i],i],
`self isolating if ill`=stan_data$covariate2[1:stan_data$N[i],i],
`public events`=stan_data$covariate3[1:stan_data$N[i],i],
`government makes any intervention`=stan_data$covariate4[1:stan_data$N[i],i],
`lockdown`=stan_data$covariate5[1:stan_data$N[i],i],
`social distancing encouraged`=stan_data$covariate6[1:stan_data$N[i],i]),
file=sprintf("results/%s-check-dates.csv",countries[i]),row.names=F)
}
}
processed_data <- process_covariates(countries = countries, interventions = interventions,
d = d , ifr.by.country = ifr.by.country, N2 = N2)
stan_data = processed_data$stan_data
dates = processed_data$dates
deaths_by_country = processed_data$deaths_by_country
reported_cases = processed_data$reported_cases

options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
m = stan_model(paste0('stan-models/',StanModel,'.stan'))


if(DEBUG) {
fit = sampling(m,data=stan_data,iter=40,warmup=20,chains=2)
} else if (FULL) {
fit = sampling(m,data=stan_data,iter=4000,warmup=2000,chains=4,thin=4,control = list(adapt_delta = 0.95, max_treedepth = 10))
fit = sampling(m,data=stan_data,iter=1800,warmup=1000,chains=5,thin=1,control = list(adapt_delta = 0.95, max_treedepth = 15))
} else {
fit = sampling(m,data=stan_data,iter=200,warmup=100,chains=4,thin=4,control = list(adapt_delta = 0.95, max_treedepth = 10))
}
fit = sampling(m,data=stan_data,iter=1000,warmup=500,chains=4,thin=1,control = list(adapt_delta = 0.95, max_treedepth = 10))
}

out = rstan::extract(fit)
prediction = out$prediction
Expand All @@ -257,9 +89,15 @@ if(JOBID == "")
JOBID = as.character(abs(round(rnorm(1) * 1000000)))
print(sprintf("Jobid = %s",JOBID))

countries <- countries$Regions
save.image(paste0('results/',StanModel,'-',JOBID,'.Rdata'))
save(fit,prediction,dates,reported_cases,deaths_by_country,countries,estimated.deaths,estimated.deaths.cf,out,file=paste0('results/',StanModel,'-',JOBID,'-stanfit.Rdata'))

save(fit,prediction,dates,reported_cases,deaths_by_country,countries,estimated.deaths,estimated.deaths.cf,out,covariates,file=paste0('results/',StanModel,'-',JOBID,'-stanfit.Rdata'))
## Ensure that output directories exist
dir.create("results/", showWarnings = FALSE, recursive = TRUE)
dir.create("figures/", showWarnings = FALSE, recursive = TRUE)
dir.create("web/", showWarnings = FALSE, recursive = TRUE)
dir.create("web/data", showWarnings = FALSE, recursive = TRUE)

library(bayesplot)
filename <- paste0(StanModel,'-',JOBID)
Expand Down Expand Up @@ -298,6 +136,7 @@ if(make_table_error != 0){
stop(sprintf("Generation of alpha covar table failed! Code: %d", make_table_error))
}


verify_result_error <- system(paste0("Rscript web-verify-output.r ", filename,'.Rdata'),intern=FALSE)
if(verify_result_error != 0){
stop(sprintf("Verification of web output failed! Code: %d", verify_result_error))
Expand Down
2 changes: 1 addition & 1 deletion covariate-size-effects.r
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ plot_labels <- c("School Closure",
"Self Isolation",
"Public Events",
"First Intervention",
"Lockdown", 'Social distancing')
"Lockdown", 'Social distancing \n encouraged')
colnames(alpha) = plot_labels
first.intervention = alpha[,c(1,2,3,5,6)] + alpha[,4]
data1 = mcmc_intervals_data(first.intervention,prob=.95,transformation=function(x) 1-exp(-x),point_est="mean")
Expand Down
Loading

0 comments on commit 865806f

Please sign in to comment.