diff --git a/EpiAware/src/EpiAware.jl b/EpiAware/src/EpiAware.jl index 26d479afc..1d3c4162d 100644 --- a/EpiAware/src/EpiAware.jl +++ b/EpiAware/src/EpiAware.jl @@ -30,13 +30,18 @@ using Distributions, Parameters, QuadGK +# Exported utilities export scan, create_discrete_pmf, growth_rate_to_reproductive_ratio, generate_observation_kernel, - EpiModel, - log_infections, - random_walk + default_rw_priors + +# Exported types +export EpiData, Renewal, ExpGrowthRate, DirectInfections + +# Exported Turing model constructors +export make_epi_inference_model, random_walk include("utilities.jl") include("epimodel.jl") diff --git a/EpiAware/src/epimodel.jl b/EpiAware/src/epimodel.jl index d8ad2334d..4e099248c 100644 --- a/EpiAware/src/epimodel.jl +++ b/EpiAware/src/epimodel.jl @@ -1,37 +1,23 @@ abstract type AbstractEpiModel end - - -""" - struct EpiModel{T<:Real} <: AbstractEpiModel - -EpiModel represents an epidemiological model with generation intervals, delay intervals, and observation delay kernel. - -# Fields -- `gen_int::Vector{T}`: Discrete generation inteval, runs from 1, 2, ... to the end of the vector. -- `delay_int::Vector{T}`: Discrete delay distribution runs from 0, 1, ... to the end of the vector less 1. -- `delay_kernel::SparseMatrixCSC{T,Integer}`: Sparse matrix representing the observation delay kernel. -- `cluster_coeff::T`: Cluster coefficient for negative binomial observations. -- `len_gen_int::Integer`: Length of `gen_int`. -- `len_delay_int::Integer`: Length of `delay_int`. -- `time_horizon::Integer`: Length of the generated data. - -# Constructors -- `EpiModel(gen_int, delay_int, cluster_coeff, time_horizon::Integer)`: Constructs an EpiModel object with given generation intervals, delay intervals, cluster coefficient, and time horizon. -- `EpiModel(gen_distribution::ContinuousDistribution, delay_distribution::ContinuousDistribution, cluster_coeff, time_horizon::Integer; Δd = 1.0, D_gen, D_delay)`: Constructs an EpiModel object with generation and delay distributions, cluster coefficient, time horizon, and optional parameters. - -""" -struct EpiModel{T<:Real} <: AbstractEpiModel +struct EpiData{T<:Real,F<:Function} gen_int::Vector{T} delay_int::Vector{T} delay_kernel::SparseMatrixCSC{T,Integer} cluster_coeff::T - len_gen_int::Integer #length(gen_int) just to save recalc - len_delay_int::Integer #length(delay_int) just to save recalc + len_gen_int::Integer + len_delay_int::Integer time_horizon::Integer + transformation::F - #Inner constructors for EpiModel object - function EpiModel(gen_int, delay_int, cluster_coeff, time_horizon::Integer) + #Inner constructors for EpiData object + function EpiData( + gen_int, + delay_int, + cluster_coeff, + time_horizon::Integer, + transformation::Function, + ) @assert all(gen_int .>= 0) "Generation interval must be non-negative" @assert all(delay_int .>= 0) "Delay interval must be non-negative" @assert sum(gen_int) ≈ 1 "Generation interval must sum to 1" @@ -39,7 +25,7 @@ struct EpiModel{T<:Real} <: AbstractEpiModel K = generate_observation_kernel(delay_int, time_horizon) - new{eltype(gen_int)}( + new{eltype(gen_int),typeof(transformation)}( gen_int, delay_int, K, @@ -47,50 +33,51 @@ struct EpiModel{T<:Real} <: AbstractEpiModel length(gen_int), length(delay_int), time_horizon, + transformation, ) end - function EpiModel( + function EpiData( gen_distribution::ContinuousDistribution, delay_distribution::ContinuousDistribution, cluster_coeff, time_horizon::Integer; - Δd = 1.0, D_gen, D_delay, + Δd = 1.0, + transformation::Function = exp, ) gen_int = create_discrete_pmf(gen_distribution, Δd = Δd, D = D_gen) |> p -> p[2:end] ./ sum(p[2:end]) delay_int = create_discrete_pmf(delay_distribution, Δd = Δd, D = D_delay) - K = generate_observation_kernel(delay_int, time_horizon) - - new{eltype(gen_int)}( - gen_int, - delay_int, - K, - cluster_coeff, - length(gen_int), - length(delay_int), - time_horizon, - ) + return EpiData(gen_int, delay_int, cluster_coeff, time_horizon, transformation) end end -""" - (epi_model::EpiModel)(recent_incidence, Rt) +struct DirectInfections <: AbstractEpiModel + data::EpiData +end -Apply the EpiModel to calculate new incidence based on recent incidence and Rt. +function (epi_model::DirectInfections)(recent_incidence, unc_I_t) + nothing, epi_model.data.transformation(unc_I_t) +end + +struct ExpGrowthRate <: AbstractEpiModel + data::EpiData +end -# Arguments -- `recent_incidence`: Array of recent incidence values. -- `Rt`: Reproduction number. +function (epi_model::ExpGrowthRate)(unc_recent_incidence, rt) + new_unc_recent_incidence = unc_recent_incidence + rt + new_unc_recent_incidence, epi_model.data.transformation(new_unc_recent_incidence) +end + +struct Renewal <: AbstractEpiModel + data::EpiData +end -# Returns -- `new_incidence`: Array of new incidence values. -""" -function (epi_model::EpiModel)(recent_incidence, Rt) - new_incidence = Rt * dot(recent_incidence, epi_model.gen_int) - [new_incidence; recent_incidence[1:(epi_model.len_gen_int-1)]], new_incidence +function (epi_model::Renewal)(recent_incidence, Rt) + new_incidence = Rt * dot(recent_incidence, epi_model.data.gen_int) + [new_incidence; recent_incidence[1:(epi_model.data.len_gen_int-1)]], new_incidence end diff --git a/EpiAware/src/latent-processes.jl b/EpiAware/src/latent-processes.jl index 16361a587..828dd9eff 100644 --- a/EpiAware/src/latent-processes.jl +++ b/EpiAware/src/latent-processes.jl @@ -1,32 +1,25 @@ -const STANDARD_RW_PRIORS = - (var_RW_dist = truncated(Normal(0.0, 0.05), 0.0, Inf), init_rw_value_dist = Normal()) - - -""" - random_walk(n, ϵ_t = missing; latent_process_priors = (var_RW_dist = truncated(Normal(0., 0.05), 0., Inf),), ::Type{T} = Float64) where {T <: Real} - -Constructs a random walk model. - -# Arguments -- `n`: The number of time steps. -- `ϵ_t`: The random noise vector. Defaults to `missing`, in which case it is sampled from the standard multivariate normal distribution. -- `latent_process_priors`: The prior distribution for the latent process parameters. Defaults to `(var_RW_dist = truncated(Normal(0., 0.05), 0., Inf),)`. +function default_rw_priors() + return ( + var_RW_dist = truncated(Normal(0.0, 0.05), 0.0, Inf), + init_rw_value_dist = Normal(), + ) +end -# Returns -- `rw`: The random walk process. -- `σ_RW`: The standard deviation of the random walk process. -""" @model function random_walk( n, ϵ_t = missing, ::Type{T} = Float64; - latent_process_priors = STANDARD_RW_PRIORS, -) where {T<:Real} + latent_process_priors = default_rw_priors(), +) where {T<:AbstractFloat} rw = Vector{T}(undef, n) ϵ_t ~ MvNormal(ones(n)) σ²_RW ~ latent_process_priors.var_RW_dist init_rw_value ~ latent_process_priors.init_rw_value_dist σ_RW = sqrt(σ²_RW) - rw .= init_rw_value .+ cumsum(σ_RW * ϵ_t) - return rw, (; σ_RW, init_rw_value) + + rw[1] = init_rw_value + σ_RW * ϵ_t[1] + for t = 2:n + rw[t] = rw[t-1] + σ_RW * ϵ_t[t] + end + return rw, (; σ_RW, init_rw_value, init = rw[1]) end diff --git a/EpiAware/src/models.jl b/EpiAware/src/models.jl index 5f00d933d..948e50351 100644 --- a/EpiAware/src/models.jl +++ b/EpiAware/src/models.jl @@ -1,39 +1,8 @@ -""" - log_infections(y_t, epimodel::EpiModel, latent_process; - latent_process_priors, - transform_function = exp, - n_generate_ahead = 0, - pos_shift = 1e-6, - neg_bin_cluster_factor = missing, - neg_bin_cluster_factor_prior = Gamma(3, 0.05 / 3)) - -A Turing model for Log-infections undelying observed epidemiological data. - -This function defines a log-infections model for epidemiological data. -It takes the observed data `y_t`, an `EpiModel` object `epimodel`, and a `latent_process` -model. It also accepts optional arguments for the `latent_process_priors`, `transform_function`, -`n_generate_ahead`, `pos_shift`, `neg_bin_cluster_factor`, and `neg_bin_cluster_factor_prior`. - -## Arguments -- `y_t`: Observed data. -- `epimodel`: Epidemiological model. -- `latent_process`: Latent process model. -- `latent_process_priors`: Priors for the latent process model. -- `transform_function`: Function to transform the latent process into infections. Default is `exp`. -- `n_generate_ahead`: Number of time steps to generate ahead. Default is `0`. -- `pos_shift`: Positive shift to avoid zero values. Default is `1e-6`. -- `neg_bin_cluster_factor`: Missing value for the negative binomial cluster factor. Default is `missing`. -- `neg_bin_cluster_factor_prior`: Prior distribution for the negative binomial cluster factor. Default is `Gamma(3, 0.05 / 3)`. - -## Returns -A named tuple containing the generated quantities `I_t` and `latent_process_parameters`. -""" -@model function log_infections( +@model function make_epi_inference_model( y_t, - epimodel::EpiModel, + epimodel::AbstractEpiModel, latent_process; latent_process_priors, - transform_function = exp, pos_shift = 1e-6, neg_bin_cluster_factor = missing, neg_bin_cluster_factor_prior = Gamma(3, 0.05 / 3), @@ -42,16 +11,16 @@ A named tuple containing the generated quantities `I_t` and `latent_process_para neg_bin_cluster_factor ~ neg_bin_cluster_factor_prior #Latent process - time_steps = epimodel.time_horizon - @submodel _I_t, latent_process_parameters = + time_steps = epimodel.data.time_horizon + @submodel latent_process, latent_process_parameters = latent_process(time_steps; latent_process_priors = latent_process_priors) #Transform into infections - I_t = transform_function.(_I_t) + I_t, _ = scan(epimodel, latent_process_parameters.init, latent_process) #Predictive distribution case_pred_dists = - (epimodel.delay_kernel * I_t) .+ pos_shift .|> + (epimodel.data.delay_kernel * I_t) .+ pos_shift .|> μ -> mean_cc_neg_bin(μ, neg_bin_cluster_factor) #Likelihood diff --git a/EpiAware/test/predictive_checking/toy_model_log_infs_RW.jl b/EpiAware/test/predictive_checking/toy_model_log_infs_RW.jl index 259adfc4c..2a3dfebd1 100644 --- a/EpiAware/test/predictive_checking/toy_model_log_infs_RW.jl +++ b/EpiAware/test/predictive_checking/toy_model_log_infs_RW.jl @@ -1,4 +1,3 @@ -using Pkg: generate #= # Toy model for running analysis: @@ -47,18 +46,16 @@ r &\sim \text{Gamma}(3, 0.05/3). \end{align} ``` -## Load dependencies in `TestEnv` +## Load dependencies + +This script should be run from the root folder of `EpiAware` and with the active environment. =# -split(pwd(), "/")[end] != "EpiAware" && begin - cd("./EpiAware") - using Pkg - Pkg.activate(".") - using TestEnv - TestEnv.activate() -end + +using TestEnv # Run in Test environment mode +TestEnv.activate() using EpiAware using Turing @@ -70,16 +67,18 @@ Random.seed!(0) #= ## Create an `EpiModel` struct -Somewhat randomly chosen parameters for the `EpiModel` struct. +- Medium length generation interval distribution. +- Median 2 day, std 4.3 day delay distribution. +- 100 days of simulations =# -truth_GI = Gamma(1, 2) -truth_delay = Uniform(0.0, 5.0) +truth_GI = Gamma(2, 5) +truth_delay = LogNormal(2.0, 1.0) neg_bin_cluster_factor = 0.05 time_horizon = 100 -epimodel = EpiModel( +model_data = EpiData( truth_GI, truth_delay, neg_bin_cluster_factor, @@ -89,29 +88,41 @@ epimodel = EpiModel( ) #= -## Define a log-infections model -The log-infections model is defined by a Turing model `log_infections`. +## Define the data generating process -In this case we don't have observed data, so we use `missing` value for `y_t`. +In this case we use the `DirectInfections` model. =# -toy_log_infs = log_infections( + +toy_log_infs = DirectInfections(model_data) + +#= +## Generate a `Turing` `Model` +We don't have observed data, so we use `missing` value for `y_t`. +=# + +log_infs_model = make_epi_inference_model( missing, - epimodel, - random_walk; - latent_process_priors = EpiAware.STANDARD_RW_PRIORS, + toy_log_infs, + random_walk, + latent_process_priors = default_rw_priors(), + pos_shift = 1e-6, + neg_bin_cluster_factor = 0.5, + neg_bin_cluster_factor_prior = Gamma(3, 0.05 / 3), ) + + #= ## Sample from the model I define a fixed version of the model with initial infections set to 10 and variance of the random walk process set to 0.1. We can sample from the model using the `rand` function, and plot the generated infections against generated cases. =# -cond_toy = fix(toy_log_infs, (init_rw_value = log(10.0), σ²_RW = 0.1)) -random_epidemic = rand(cond_toy) - # We can get the generated infections using `generated_quantities` function. Because the observed # cases are "defined" with a `~` operator they can be accessed directly from the randomly sampled # process. + +cond_toy = fix(log_infs_model, (init_rw_value = log(10.0), σ²_RW = 0.1)) +random_epidemic = rand(cond_toy) gen = generated_quantities(cond_toy, random_epidemic) plot( gen.I_t, @@ -120,4 +131,4 @@ plot( ylabel = "Infections", title = "Generated Infections", ) -scatter!(X.y_t, lab = "generated cases") +scatter!(random_epidemic.y_t, lab = "generated cases") diff --git a/EpiAware/test/test_epimodel.jl b/EpiAware/test/test_epimodel.jl index 4280fcad1..82e2c703e 100644 --- a/EpiAware/test/test_epimodel.jl +++ b/EpiAware/test/test_epimodel.jl @@ -1,26 +1,68 @@ -@testitem "EpiModel constructor" begin +@testitem "EpiData constructor" begin gen_int = [0.2, 0.3, 0.5] delay_int = [0.1, 0.4, 0.5] cluster_coeff = 0.8 time_horizon = 10 + transformation = exp - model = EpiModel(gen_int, delay_int, cluster_coeff, time_horizon) + data = EpiData(gen_int, delay_int, cluster_coeff, time_horizon, transformation) - @test length(model.gen_int) == 3 - @test length(model.delay_int) == 3 - @test model.cluster_coeff == 0.8 - @test model.len_gen_int == 3 - @test model.len_delay_int == 3 + @test length(data.gen_int) == 3 + @test length(data.delay_int) == 3 + @test data.cluster_coeff == 0.8 + @test data.len_gen_int == 3 + @test data.len_delay_int == 3 - @test sum(model.gen_int) ≈ 1 - @test sum(model.delay_int) ≈ 1 + @test sum(data.gen_int) ≈ 1 + @test sum(data.delay_int) ≈ 1 - @test size(model.delay_kernel) == (time_horizon, time_horizon) + @test size(data.delay_kernel) == (time_horizon, time_horizon) + @test data.transformation(0.0) == 1.0 end -@testitem "EpiModel function" begin +@testitem "EpiData constructor with distributions" begin + using Distributions + + gen_distribution = Uniform(0.0, 10.0) + delay_distribution = Exponential(1.0) + cluster_coeff = 0.8 + time_horizon = 10 + D_gen = 10.0 + D_delay = 10.0 + Δd = 1.0 + + data = EpiData( + gen_distribution, + delay_distribution, + cluster_coeff, + time_horizon; + D_gen = 10.0, + D_delay = 10.0, + ) + + @test data.cluster_coeff == 0.8 + @test data.len_gen_int == Int64(D_gen / Δd) - 1 + @test data.len_delay_int == Int64(D_delay / Δd) + + @test sum(data.gen_int) ≈ 1 + @test sum(data.delay_int) ≈ 1 + + @test size(data.delay_kernel) == (time_horizon, time_horizon) +end + + +@testitem "Renewal function" begin using LinearAlgebra + gen_int = [0.2, 0.3, 0.5] + delay_int = [0.1, 0.4, 0.5] + cluster_coeff = 0.8 + time_horizon = 10 + transformation = exp + + data = EpiData(gen_int, delay_int, cluster_coeff, time_horizon, transformation) + renewal_model = Renewal(data) + recent_incidence = [10, 20, 30] Rt = 1.5 @@ -28,70 +70,46 @@ end expected_output = [expected_new_incidence; recent_incidence[1:2]], expected_new_incidence - model = EpiModel([0.2, 0.3, 0.5], [0.1, 0.4, 0.5], 0.8, 10) - @test model(recent_incidence, Rt) == expected_output + @test renewal_model(recent_incidence, Rt) == expected_output end -@testitem "EpiModel constructor" begin + +@testitem "ExpGrowthRate function" begin gen_int = [0.2, 0.3, 0.5] delay_int = [0.1, 0.4, 0.5] cluster_coeff = 0.8 time_horizon = 10 + transformation = exp - model = EpiModel(gen_int, delay_int, cluster_coeff, time_horizon) - - @test length(model.gen_int) == 3 - @test length(model.delay_int) == 3 - @test model.cluster_coeff == 0.8 - @test model.len_gen_int == 3 - @test model.len_delay_int == 3 - - @test sum(model.gen_int) ≈ 1 - @test sum(model.delay_int) ≈ 1 + data = EpiData(gen_int, delay_int, cluster_coeff, time_horizon, transformation) + rt_model = ExpGrowthRate(data) - @test size(model.delay_kernel) == (time_horizon, time_horizon) -end - -@testitem "EpiModel function" begin - using LinearAlgebra recent_incidence = [10, 20, 30] - Rt = 1.5 + rt = log(2) / 7.0 # doubling time of 7 days - expected_new_incidence = Rt * dot(recent_incidence, [0.2, 0.3, 0.5]) - expected_output = - [expected_new_incidence; recent_incidence[1:2]], expected_new_incidence + expected_new_incidence = recent_incidence[end] * exp(rt) + expected_output = log(expected_new_incidence), expected_new_incidence - model = EpiModel([0.2, 0.3, 0.5], [0.1, 0.4, 0.5], 0.8, 10) - @test model(recent_incidence, Rt) == expected_output + @test rt_model(log(recent_incidence[end]), rt)[1] ≈ expected_output[1] + @test rt_model(log(recent_incidence[end]), rt)[2] ≈ expected_output[2] end -@testitem "EpiModel constructor with distributions" begin - using Distributions - - gen_distribution = Uniform(0.0, 10.0) - delay_distribution = Exponential(1.0) +@testitem "DirectInfections function" begin + gen_int = [0.2, 0.3, 0.5] + delay_int = [0.1, 0.4, 0.5] cluster_coeff = 0.8 time_horizon = 10 - D_gen = 10.0 - D_delay = 10.0 - Δd = 1.0 + transformation = exp - model = EpiModel( - gen_distribution, - delay_distribution, - cluster_coeff, - time_horizon; - D_gen = 10.0, - D_delay = 10.0, - ) + data = EpiData(gen_int, delay_int, cluster_coeff, time_horizon, transformation) + direct_inf_model = DirectInfections(data) + + recent_log_incidence = [10, 20, 30] .|> log - @test model.cluster_coeff == 0.8 - @test model.len_gen_int == Int64(D_gen / Δd) - 1 - @test model.len_delay_int == Int64(D_delay / Δd) + expected_new_incidence = exp(recent_log_incidence[end]) + expected_output = nothing, expected_new_incidence - @test sum(model.gen_int) ≈ 1 - @test sum(model.delay_int) ≈ 1 - @test size(model.delay_kernel) == (time_horizon, time_horizon) + @test direct_inf_model(nothing, recent_log_incidence[end]) == expected_output end diff --git a/EpiAware/test/test_latent-processes.jl b/EpiAware/test/test_latent-processes.jl index 856a26243..9fb85a236 100644 --- a/EpiAware/test/test_latent-processes.jl +++ b/EpiAware/test/test_latent-processes.jl @@ -18,10 +18,24 @@ #Theoretically, after 5 steps distribution is N(0, var = 5) - theoretical_std_of_empiral_var = std(Chisq(5)) / sqrt(n_samples) + theoretical_std_of_empiral_var = std(Chisq(5)) / sqrt(n_samples - 1) @info "var = $(var(samples_day_5)); theoretical_std_of_empiral_var = $(theoretical_std_of_empiral_var)" @test (var(samples_day_5) - 5) < 5 * theoretical_std_of_empiral_var && (var(samples_day_5) - 5) > -5 * theoretical_std_of_empiral_var end +@testitem "Testing default_rw_priors" begin + + @testset "var_RW_dist" begin + priors = default_rw_priors() + var_RW = rand(priors.var_RW_dist) + @test var_RW >= 0.0 + end + + @testset "init_rw_value_dist" begin + priors = default_rw_priors() + init_rw_value = rand(priors.init_rw_value_dist) + @test typeof(init_rw_value) == Float64 + end +end diff --git a/EpiAware/test/test_models.jl b/EpiAware/test/test_models.jl index ed4fc11bd..8aa75bc4c 100644 --- a/EpiAware/test/test_models.jl +++ b/EpiAware/test/test_models.jl @@ -1,23 +1,24 @@ -@testitem "`log_infections` with RW latent process" begin +@testitem "direct infections with RW latent process" begin using Distributions, Turing, DynamicPPL # Define test inputs y_t = missing # Data will be generated from the model - epimodel = EpiModel([0.2, 0.3, 0.5], [0.1, 0.4, 0.5], 0.8, 10) - latent_process_priors = EpiAware.STANDARD_RW_PRIORS + data = EpiData([0.2, 0.3, 0.5], [0.1, 0.4, 0.5], 0.8, 10, exp) + latent_process_priors = default_rw_priors() transform_function = exp n_generate_ahead = 0 pos_shift = 1e-6 neg_bin_cluster_factor = 0.5 neg_bin_cluster_factor_prior = Gamma(3, 0.05 / 3) + epimodel = DirectInfections(data) + # Call the function - test_mdl = log_infections( + test_mdl = make_epi_inference_model( y_t, epimodel, random_walk; latent_process_priors, - transform_function, pos_shift, neg_bin_cluster_factor, neg_bin_cluster_factor_prior, @@ -32,7 +33,7 @@ (init_rw_value = log(1.0), σ²_RW = 0.0, neg_bin_cluster_factor = 0.05), ) X = rand(fixed_test_mdl) - expected_I_t = [1.0 for _ = 1:epimodel.time_horizon] + expected_I_t = [1.0 for _ = 1:epimodel.data.time_horizon] gen = generated_quantities(fixed_test_mdl, rand(fixed_test_mdl)) # Perform tests