-
Notifications
You must be signed in to change notification settings - Fork 0
/
P5s2258945.R
165 lines (127 loc) · 6.46 KB
/
P5s2258945.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
## Zheyi SHEN, s2258945
## ------ Overview ------
## Taking advantages of the high reliability of
## death tolls and the fatal disease duration information,
## this project aims at making inference on new fatal/infections each day.
## More specifically, we would:
## *** Sample the hospital deaths with Covid-19 in England for 100 days since Mar.2nd, 2020
## *** Catch the posterior mean along the trajectory of "observed" new deaths n[i]
## and also the expected new deaths m[i].
## *** Diagnostic analysis would be conducted before the final plot.
##
## Model assumption:
## The number if new infections per day changes fairly smoothly from day to day and
## the measured dist'n for fatal disease duration is correct.
##
## Model details is given in model.jags file.
library(rjags)
library(coda)
######################################################################
###### Part I: Data reading and sampling ################
######################################################################
## Data on daily hospital deaths with COVID-19 in England for the first 100 days from March 2nd 2020
## (Source: NHS England)
y <-c(1,2,0,2,2,0,4,4,1,9,14,20,22,27,40,46,66,63,105,103,149,159,204,263,326,353,360,437,498,576,645,647,
700,778,743,727,813,900,792,740,779,718,699,646,686,639,610,571,524,566,486,502,451,438,386,380,345,341,
325,313,306,268,251,259,252,266,256,215,203,196,166,183,162,180,172,167,138,160,144,153,150,122,128,116,
133,139,122,124,117,92,83,94,109,111,83,86,83,80,73,69)
## As there's no Covid-19 deaths prior to this date,
## we could extend the data by appending 20 days of zero deaths to the start of the data
## New data then begins on February 11, 2020.
y_new <- c(rep(0,20),y)
N <- length(y_new) # 120
## Generate tmp matrix B based on the log normal density of fatal disease duration D
B <- matrix(0,N,N) ## Initialize matrix B according to data length
## For each element B[i,j]
for (i in 1:N){ ## row i of B
for (j in 1:N){ ## at the j-th column
if (j > i){
B[i,j] <- 0 ## 0 means no change.
}
else{ ## Choose the (i-j)th element in the marginal probability of log(D)
B[i,j] <- dlnorm(i-j, meanlog = 3.235, sdlog = 0.4147, log = FALSE)
}
}
} ## End of B matrix construction
## Call JAGS to perform Gibbs sampling for posterior dist'n
mod <- jags.model("model.jags", data = list(y = y_new, N = N, B = B))
ns <- 10000 ## select a sample size of 10000
sam.coda <- coda.samples(mod, c("m","n"), n.iter = ns)
## Please refer to model.jags file for model specification details
##########################################################################
###### Part II: Diagnostics #################################
#########################################################################
## Trace plots:
par(mfrow = c(2,4), mar = c(2,2,2,2))
traceplot(sam.coda[[1]][,c("n[3]","n[40]","n[60]","n[100]", "m[3]","m[40]","m[60]","m[120]")])
## Good mixing,
## "rapid" apparent movement about the state space could be seen.
## ACF plots:
acfplot(sam.coda[[1]][,c("n[3]","n[40]","n[60]","n[100]", "m[3]","m[40]","m[60]","m[120]")],
main = "Autocorrelation Function Plots",
aspect=2, type="l")
## The auto-correlations reduce faster around the middle part.
## For Gibbs sampling, high posterior correlation tends to slow mixing, and
## usually the effective sample sizes are low relative to the simulation length.
## However, based on the full nodes of plot(sam.coda), the burn-in period has not ended
## (MC not yet converge) until 4000.
## effective size of sampling calculation:
effect <- max(effectiveSize(sam.coda[[1]])); effect
## roughly 600 (it varies)
#######################################################################
############## Part III: Summary statistics and Final Plot ##########
#######################################################################
## Sampling for expected deaths and new infections:
death_ind <- grep("m", colnames(sam.coda[[1]])) ## index in sequential date
infection_ind <- grep("n", colnames(sam.coda[[1]]))[1:length(y)] ## using 100 here
## Credible interval for new infections:
CI <- apply(sam.coda[[1]][,infection_index], 2, quantile, prob=(c(0.025,0.975)))
upper_bound <- CI[2,]; lower_bound <- CI[1,]
## Posterior mean of new infections and expected deaths number:
expect_death <- colMeans(sam.coda[[1]][,death_ind])
new_infect <- colMeans(sam.coda[[1]][,infection_ind])
max_vertical <- max(max(new_infect), max(expect_death),
max(upper_bound), max(lower_bound),
max(y_new)
)
lockdown <- julian(as.Date("2020-3-24"), origin=as.Date("2019-12-31"))
date_vector <- seq(as.Date("2020-2-11"), by = "days", length.out = 120)
julian_day <- julian(date_vector,origin=as.Date("2019-12-31"))
infection_day <- julian_day[1:length(y)]
## The final single summary plot:
par(mfrow=c(1,1),mar=c(5.1,4.1,4.1,2.1))
plot(x=julian_day, y=y_new,
xlab = "Day of the year",
ylab = "Number of Individuals",
ylim = c(0,max_vertical),
col='grey',pch=20)
## Shade the credible interval band
polygon(x=c(rev(infection_day), infection_day),
y=c(rev(lower_bound), upper_bound),
col = "yellow", border = NA)
lines(x=infection_day,y=lower_bound,col="black")
lines(x=infection_day,y=upper_bound,col="black")
lines(x=infection_day,y=new_infect,col="red")
lines(x=julian_day,y=expect_death,col="blue")
## Highlight Mar.24th 2020, the the first date of lock-down in the UK
abline(v=lockdown)
text(x=lockdown + 4, y=1100,
label="lockdown", pos=3, cex=0.6, srt=90)
title("Daily Deaths and New Infections from Covid-19 at England (2020)", cex.main=0.8)
legend(x="topright", legend=c("Mean of New Infections",
"Mean of Expected Deaths",
"95% Credible Interval (New Infections)",
"Observed Daily Deaths"),
col = c("red", "blue", "yellow", "grey"),
lty = c(1,1,NA,NA),
fill = c(NA,NA, "yellow", NA),
border = c(NA,NA,"yellow", NA),
pch=c(NA,NA,NA,20),
cex = 0.55, bty = "n", x.intersp = c(2,2,1.5,2.2)
)
#######################################
### Conclusion:
### The posterior of the new infections fit the NHS England data well.
### As the red line is all bounded by the confidence interval.
### The tricky thing is that the peak of deaths appear after the first lockdown.
#######################################################################