240109.Rmd

---
title: "SEIR modelling"
output:
  html_document: default
  pdf_document: default
date: "2024-01-08"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

library("ggplot2")
library("sdprisk")

source("solver.R")
source("model.R")
source("simulation.R")
source("plots.R")
```

## Modelling COVID-19 using a stochastic differential equation

### Equations

The SEIR model arises from the following assumptions:

$$
\begin{equation}
  S(t) + E(t) + I(t) + R(t) = N\quad  \forall t \\
  S \xrightarrow{\text{exposure rate }\beta} E \xrightarrow{\text{infection rate }\sigma} I \xrightarrow{\text{removal rate }\gamma}R
\end{equation}
$$

Consequently, we can determine the rate of transition and model this using a Poisson distribution.

$$
\begin{align}
  S \rightarrow E &\sim \text{Pois}\left( \frac{\beta}{N}S(t)I(t) \right) \\
  E \rightarrow I &\sim \text{Pois}\left( \sigma E(t)\right) \\
  I \rightarrow R &\sim \text{Pois}\left( \gamma I(t)\right)
\end{align}
$$

#### Estimating parameters for COVID-19

I approximated the values for $\beta$, $\sigma$ and $\gamma$ using the following:

-   Latent period of 2.5 days
-   Infectious period of 5.6 days
-   $R_0$ of 2.68

This resulted in the following parameters: $$
\begin{align}
\beta &= 0.47857 \\
\sigma &= 0.4 \\
\gamma &= 0.17857
\end{align}
$$

### Solving the equations

```{r covid_default_model}

parameters <- c(N = 10000, beta = 0.47857, sigma = 0.4, gamma = 0.17857)
state      <- c(S = 9999, E = 1, I = 0, R = 0)

out.default_solution <- simulate_seir(
  initial_value = state,
  params = parameters,
  start = 0,
  end = 100
)

plot_SEIR(out.default_solution, parameters, style="incidence")
```

### Generating multiple solutions (and a deterministic one) and comparing

```{r multi-plot}
out.deterministic <- simulate_seir(
  initial_value = state,
  params = parameters,
  start = 0,
  end = 100,
  stochastic = FALSE
)

out.stochastic <- replicate(100, simulate_seir(
  initial_value = state,
  params = parameters,
  start = 0,
  end = 100,
  stochastic = TRUE,
  simulate = FALSE
), simplify = FALSE)

plot_SEIR_incidences(out.deterministic, out.stochastic, parameters)
```

### Simulating using a real population

```{r}
parameters <- c(N = 100000, beta = 0.47857, sigma = 0.4, gamma = 0.17857)
state      <- c(S = 99999, E = 1, I = 0, R = 0)

out <- simulate_seir(
  initial_value = state,
  params = parameters,
  start = 0,
  end = 200
)

plot_SEIR(out, parameters, "combined")
plot_SEIR(out, parameters, "exploded")

# the histogram of events should exactly match the line plot
plot_SEIR(out, parameters, "incidence") + 
      geom_histogram(data=out$events, aes(x=time), binwidth=1, alpha=0.5)
```

#### Sanity check our results to verify things are working properly

```{r}
results <- table(unlist(out$pop$.state))
results

unlist(tail(out$overview, n=1))
```

#### Generation time distribution

```{r}
# for each event, find the time when the original infector got infected
# original infection times
events <- out$events

events$infector_exposure_time <- out$pop[events$infector, ]$E
events$gt <- events$time - events$infector_exposure_time

# normalise the generation time frequencies, to graph a histogram
gt.freq <- data.frame(table(events$gt)) # frequency table
colnames(gt.freq) <- c("time", "freq")  # fix column names
gt.freq$time <- as.numeric(gt.freq$time)
gt.freq$norm <- gt.freq$freq / sum(gt.freq$freq)# normalise

# graph the hypoexponential
hypoexp <- data.frame(time = seq(1, 60, by=0.1))
hypoexp$density <- dhypoexp(hypoexp$time, rate=c(parameters[["sigma"]], parameters[["gamma"]]))

# plot
ggplot() + 
  geom_line(data=gt.freq, aes(x=time, y=norm, colour="simulation")) +
  geom_line(data=hypoexp, aes(x=time, y=density, colour="expected")) +
  #geom_line(data=out, aes(x=time, y=g, colour="de")) +
  labs(
    title="generation time frequency",
    x="generation time",
    y="proportion",
    colour="type"
  )
```

#### Forwards and backwards generation time

The backwards GI is also known as the "period GI", and the forwards GI is also known as the "cohort GI"

```{r}
gt.back <- do.call(
  data.frame,
  aggregate(
    gt ~ time, data = events, FUN=function(x) {
      c(
        mean = mean(x),
        q1 = quantile(x, 0.25),
        q3 = quantile(x, 0.75),
        median = median(x),
        sd = sd(x),
        size = length(x)
      )
    }
  )
)
colnames(gt.back) <- c("time", "mean", "q1", "q3", "median", "sd", "size")
```

And forward generation:

```{r}
gt.fwd <- do.call(
  data.frame,
  aggregate(
    gt ~ infector_exposure_time, data = events, FUN=function(x) {
      c(
        mean = mean(x),
        q1 = quantile(x, 0.25),
        q3 = quantile(x, 0.75),
        median = median(x),
        sd = sd(x),
        size = length(x)
      )
    }
  )
)
colnames(gt.fwd) <- c("time", "mean", "q1", "q3", "median", "sd", "size")
```

Graphing the results

```{r}
ggplot(gt.back, aes(x=time, y=mean)) + 
  geom_line() + 
  geom_errorbar(aes(ymin=mean - sd, ymax=mean + sd), width=.1,
                 position=position_dodge()) +
  labs(title="mean backwards generation time")


ggplot() + 
  geom_line(data=gt.back, aes(x=time, y=mean, colour="simulation")) + 
  geom_line(data=gt_c, aes(x=time, y=back, colour="estimate")) + 
  labs(title="mean backwards generation time")
```

```{r}
ggplot(gt.fwd, aes(x=time, y=mean)) + 
  geom_line() + 
  #geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.1,
  #               position=position_dodge()) +
  labs(title="mean forwards generation time")

ggplot() + 
  geom_line(data=gt_c, aes(x=time, y=fwd, colour="estimate")) +
  geom_line(data=gt.fwd, aes(x=time, y=mean, colour="simulation")) + 
  labs(title="mean forwards generation time")
```