diff --git a/EpiAware/src/EpiAware.jl b/EpiAware/src/EpiAware.jl index fdf21fe48..378ae7fff 100644 --- a/EpiAware/src/EpiAware.jl +++ b/EpiAware/src/EpiAware.jl @@ -48,7 +48,7 @@ export spread_draws, scan, create_discrete_pmf include("EpiLatentModels/EpiLatentModels.jl") using .EpiLatentModels -export RandomWalk, AR +export RandomWalk, AR, DiffLatentModel include("EpiInfModels/EpiInfModels.jl") using .EpiInfModels diff --git a/EpiAware/src/EpiLatentModels/EpiLatentModels.jl b/EpiAware/src/EpiLatentModels/EpiLatentModels.jl index 71b317641..9b95b3296 100644 --- a/EpiAware/src/EpiLatentModels/EpiLatentModels.jl +++ b/EpiAware/src/EpiLatentModels/EpiLatentModels.jl @@ -8,10 +8,12 @@ using ..EpiAwareBase using Turing, Distributions, DocStringExtensions -export RandomWalk, AR +export RandomWalk, AR, DiffLatentModel include("docstrings.jl") include("randomwalk.jl") include("autoregressive.jl") +include("difflatentmodel.jl") include("utils.jl") + end diff --git a/EpiAware/src/EpiLatentModels/difflatentmodel.jl b/EpiAware/src/EpiLatentModels/difflatentmodel.jl new file mode 100644 index 000000000..f2e896265 --- /dev/null +++ b/EpiAware/src/EpiLatentModels/difflatentmodel.jl @@ -0,0 +1,191 @@ + +@doc raw""" +Model the latent process as a `d`-fold differenced version of another process. + +## Mathematical specification + +Let ``\Delta`` be the differencing operator. If ``\tilde{Z}_t`` is a realisation of +undifferenced latent model supplied to `DiffLatentModel`, then the differenced process is +given by, + +```math +\Delta^{(d)} Z_t = \tilde{Z}_t, \quad t = d+1, \ldots. +``` + +We can recover ``Z_t`` by applying the inverse differencing operator ``\Delta^{-1}``, which +corresponds to the cumulative sum operator `cumsum` in Julia, `d`-times. The `d` initial +terms ``Z_1, \ldots, Z_d`` are inferred. + +## Constructors + +- `DiffLatentModel(latentmodel, init_prior_distribution::Distribution; d::Int)` + Constructs a `DiffLatentModel` for `d`-fold differencing with `latentmodel` as the + undifferenced latent process. All initial terms have common prior + `init_prior_distribution`. +- `DiffLatentModel(;undiffmodel, init_priors::Vector{D} where {D <: Distribution})` + Constructs a `DiffLatentModel` for `d`-fold differencing with `latentmodel` as the + undifferenced latent process. The `d` initial terms have priors given by the vector + `init_priors`, therefore `length(init_priors)` sets `d`. + +## Example usage with `generate_latent` + +`generate_latent` can be used to construct a `Turing` model for the differenced latent +process. In this example, the underlying undifferenced process is a `RandomWalk` model. + + +First, we construct a `RandomWalk` struct with an initial value prior and a step size +standard deviation prior. + +```julia +using Distributions, EpiAware +rw = RandomWalk(Normal(0.0, 1.0), truncated(Normal(0.0, 0.05), 0.0, Inf)) +``` + +Then, we can use `DiffLatentModel` to construct a `DiffLatentModel` for `d`-fold +differenced process with `rw` as the undifferenced latent process. + +We have two constructor options for `DiffLatentModel`. The first option is to supply a +common prior distribution for the initial terms and specify `d` as follows: + +```julia +diff_model = DiffLatentModel(rw, Normal(); d = 2) +``` + +Or we can supply a vector of priors for the initial terms and `d` is inferred as follows: +```julia +diff_model2 = DiffLatentModel(;undiffmodel = rw, init_priors = [Normal(), Normal()]) +``` + +Then, we can use `generate_latent` to construct a Turing model for the differenced latent +process generating a length `n` process, + +```julia +# Construct a Turing model +n = 100 +difference_mdl = generate_latent(diff_model, n) +``` + +Now we can use the `Turing` PPL API to sample underlying parameters and generate the +unobserved latent process. + +```julia +#Sample random parameters from prior +θ = rand(difference_mdl) +#Get a sampled latent process as a generated quantity from the model +(Z_t, _) = generated_quantities(difference_mdl, θ) +Z_t +``` + +""" +struct DiffLatentModel{M <: AbstractLatentModel, P} <: AbstractLatentModel + "Underlying latent model for the differenced process" + model::M + "The prior distribution for the initial latent variables." + init_prior::P + "Number of times differenced." + d::Int + + function DiffLatentModel( + model::AbstractLatentModel, init_prior::Distribution; d::Int) + init_priors = fill(init_prior, d) + return DiffLatentModel(; model = model, init_priors = init_priors) + end + + function DiffLatentModel(; model::AbstractLatentModel, + init_priors::Vector{D} where {D <: Distribution} = [Normal()]) + d = length(init_priors) + init_prior = _expand_dist(init_priors) + return DiffLatentModel(model, init_prior, d) + end + + function DiffLatentModel(model::AbstractLatentModel, init_prior::Distribution, d::Int) + @assert d>0 "d must be greater than 0" + @assert d==length(init_prior) "d must be equal to the length of init_prior" + new{typeof(model), typeof(init_prior)}(model, init_prior, d) + end +end + +""" +Generate a `Turing` model for `n`-step latent process ``Z_t`` using a differenced +latent model defined by `latent_model`. + +# Arguments +- `latent_model::DiffLatentModel`: The differential latent model. +- `n`: The length of the latent variables. + +## Turing model specifications + +### Sampled random variables +- `latent_init`: The initial latent process variables. +- Other random variables defined by `model<:AbstractLatentModel` field of the undifferenced + model. + +### Generated quantities +- A tuple containing the generated latent process as its first argument + and a `NamedTuple` of sampled auxiliary variables as second argument. + +# Example usage with `DiffLatentModel` model constructor + +`generate_latent` can be used to construct a `Turing` model for the differenced latent +process. In this example, the underlying undifferenced process is a `RandomWalk` model. + +First, we construct a `RandomWalk` struct with an initial value prior and a step size +standard deviation prior. + +```julia +using Distributions, EpiAware +rw = RandomWalk(Normal(0.0, 1.0), truncated(Normal(0.0, 0.05), 0.0, Inf)) +``` + +Then, we can use `DiffLatentModel` to construct a `DiffLatentModel` for `d`-fold +differenced process with `rw` as the undifferenced latent process. + +We have two constructor options for `DiffLatentModel`. The first option is to supply a +common prior distribution for the initial terms and specify `d` as follows: + +```julia +diff_model = DiffLatentModel(rw, Normal(); d = 2) +``` + +Or we can supply a vector of priors for the initial terms and `d` is inferred as follows: +```julia +diff_model2 = DiffLatentModel(;undiffmodel = rw, init_priors = [Normal(), Normal()]) +``` + +Then, we can use `generate_latent` to construct a Turing model for the differenced latent +process generating a length `n` process, + +```julia +# Construct a Turing model +n = 100 +difference_mdl = generate_latent(diff_model, n) +``` + +Now we can use the `Turing` PPL API to sample underlying parameters and generate the +unobserved latent process. + +```julia +#Sample random parameters from prior +θ = rand(difference_mdl) +#Get a sampled latent process as a generated quantity from the model +(Z_t, _) = generated_quantities(difference_mdl, θ) +Z_t +``` +""" +@model function EpiAwareBase.generate_latent(latent_model::DiffLatentModel, n) + d = latent_model.d + @assert n>d "n must be longer than d" + latent_init ~ latent_model.init_prior + + @submodel diff_latent, diff_latent_aux = generate_latent(latent_model.model, n - d) + + return _combine_diff(latent_init, diff_latent, d), (; latent_init, diff_latent_aux...) +end + +function _combine_diff(init, diff, d) + combined = vcat(init, diff) + for _ in 1:d + combined = cumsum(combined) + end + return combined +end diff --git a/EpiAware/test/test_difflatentmodel.jl b/EpiAware/test/test_difflatentmodel.jl new file mode 100644 index 000000000..650900f43 --- /dev/null +++ b/EpiAware/test/test_difflatentmodel.jl @@ -0,0 +1,90 @@ +@testitem "Testing DiffLatentModel constructor" begin + using Distributions, Turing + + model = RandomWalk() + @testset "Testing DiffLatentModel with vector of priors" begin + init_priors = [Normal(0.0, 1.0), Normal(1.0, 2.0)] + diff_model = DiffLatentModel(; model = model, init_priors = init_priors) + + @test diff_model.model == model + @test diff_model.init_prior == arraydist(init_priors) + @test diff_model.d == 2 + end + + @testset "Testing DiffLatentModel with single prior and d" begin + d = 3 + init_prior = Normal() + diff_model = DiffLatentModel(model, init_prior; d = d) + + @test diff_model.model == model + @test diff_model.init_prior == filldist(init_prior, d) + @test diff_model.d == d + end +end + +@testitem "Testing DiffLatentModel process" begin + using DynamicPPL, Turing + using Distributions + using HypothesisTests: ExactOneSampleKSTest, pvalue + + n = 100 + d = 2 + model = RandomWalk(Normal(0.0, 1.0), truncated(Normal(0.0, 0.05), 0.0, Inf)) + init_priors = [Normal(0.0, 1.0), Normal(1.0, 2.0)] + diff_model = DiffLatentModel(model = model, init_priors = init_priors) + + latent_model = generate_latent(diff_model, n) + fixed_model = fix( + latent_model, (σ_RW = 0, rw_init = 0.0)) + + n_samples = 2000 + samples = sample(fixed_model, Prior(), n_samples) |> + chn -> mapreduce(hcat, generated_quantities(fixed_model, chn)) do gen + gen[1] + end + + #Because of the recursive d-times cumsum to undifference the process, + #The distribution of the second day should be d lots of first day init distribution + """ + Add two normal distributions `x` and `y` together. + + # Returns + - `Normal`: The sum of `x` and `y` as a normal distribution. + """ + function _add_normals(x::Normal, y::Normal) + return Normal(x.μ + y.μ, sqrt(x.σ^2 + y.σ^2)) + end + + #Plus day two distribution + day2_dist = foldl((x, y) -> _add_normals(x, init_priors[1]), 1:d, init = init_priors[2]) + + ks_test_pval_day1 = ExactOneSampleKSTest(samples[1, :], init_priors[1]) |> pvalue + ks_test_pval_day2 = ExactOneSampleKSTest(samples[2, :], day2_dist) |> pvalue + + @test size(samples) == (n, n_samples) + @test ks_test_pval_day1 > 1e-6 #Very unlikely to fail if the model is correctly implemented + @test ks_test_pval_day2 > 1e-6 #Very unlikely to fail if the model is correctly implemented +end + +@testitem "Testing DiffLatentModel runs with AR process" begin + using DynamicPPL, Turing + using Distributions + + n = 100 + d = 2 + model = AR() + init_priors = [Normal(0.0, 1.0), Normal(1.0, 2.0)] + diff_model = DiffLatentModel(model = model, init_priors = init_priors) + + latent_model = generate_latent(diff_model, n) + fixed_model = fix(latent_model, + (latent_init = [0.0, 1.0], σ_AR = 1.0, damp_AR = [0.8], ar_init = [0.0])) + + n_samples = 100 + samples = sample(fixed_model, Prior(), n_samples) |> + chn -> mapreduce(hcat, generated_quantities(fixed_model, chn)) do gen + gen[1] + end + + @test size(samples) == (n, n_samples) +end