Skip to content

Commit

Permalink
Issue 537: Fix making analysis dataframes and record priors (#535)
Browse files Browse the repository at this point in the history
* Update changelog.md

* Update make_model_priors.jl

* Make `define_epiprob` more modular

_make_epidata can also be reused elsewhere

* restructure analysis dataframe func

* fix make_prediction_dataframe_from_output

And add simple unit test with committed test data

* Revert "fix make_prediction_dataframe_from_output"

This reverts commit 7e6238b.

* Reapply "fix make_prediction_dataframe_from_output"

This reverts commit 14d29b4.
  • Loading branch information
SamuelBrand1 authored Dec 9, 2024
1 parent 032457f commit 8c05d03
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 52 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -393,3 +393,6 @@ EpiAware/docs/src/getting-started/tutorials/censored-obs.md
EpiAware/docs/Manifest*.toml

!benchmark/Manifest.toml

# Test data
!pipeline/test/analysis/test_data.jld2
34 changes: 34 additions & 0 deletions manuscript/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,37 @@

## 16th Aug 2024
Changed the proposed latent processes to daily rather than weekly incremental changes.

## 5th Dec 2024
Changed priors to

```julia
"""
Constructs and returns a dictionary of prior distributions for the latent model
parameters. This is the default method.
# Arguments
- `pipeline`: An instance of the `AbstractEpiAwarePipeline` type.
# Returns
A dictionary containing the following prior distributions:
- `"transformed_process_init_prior"`: A normal distribution with mean 0.0 and
standard deviation 0.25.
- `"std_prior"`: A half-normal distribution with standard deviation 0.25.
- `"damp_param_prior"`: A beta distribution with shape parameters 0.5 and 0.5.
"""
function make_model_priors(pipeline::AbstractEpiAwarePipeline)
transformed_process_init_prior = Normal(0.0, 0.25)
std_prior = HalfNormal(0.025)
damp_param_prior = Beta(1, 9)
log_I0_prior = Normal(log(100.0), 1e-1)

return Dict(
"transformed_process_init_prior" => transformed_process_init_prior,
"std_prior" => std_prior,
"damp_param_prior" => damp_param_prior,
"log_I0_prior" => log_I0_prior
)
end
```
86 changes: 46 additions & 40 deletions pipeline/src/analysis/make_prediction_dataframe_from_output.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ Create a dataframe containing prediction results based on the given output and i
# Arguments
- `filename`: The name of the file.
- `output`: The output data containing inference configuration, IGP model, and other information.
- `output`: The output data containing inference configuration, IGP model, and other
using Base: infer_effects
information.
- `epi_datas`: The input data for the epidemiological model.
- `qs`: An optional array of quantiles to calculate. Default is `[0.025, 0.5, 0.975]`.
Expand All @@ -12,53 +14,57 @@ A dataframe containing the prediction results.
"""
function make_prediction_dataframe_from_output(
filename, output, epi_datas, pipelines; qs = [0.025, 0.5, 0.975])
#Get the scenario, IGP model, latent model and true mean GI
output, true_mean_gi; qs = [0.025, 0.25, 0.5, 0.75, 0.975],
transformation = oneexpy)
#Unpack the output
inference_config = output["inference_config"]
igp_model = output["inference_config"].igp |> string
scenario = EpiAwarePipeline._get_scenario_from_filename(filename, pipelines)
latent_model = EpiAwarePipeline._get_latent_model_from_filename(filename)
true_mean_gi = EpiAwarePipeline._get_true_gi_mean_from_filename(filename)
forecasts = output["forecast_results"]
#Get the scenario, IGP model, latent model and true mean GI
igp_model = inference_config["igp"] |> igp_name -> split(igp_name, ".")[end]
scenario = inference_config["scenario"]
latent_model = inference_config["latent_model"]
used_gi_mean = inference_config["gi_mean"]
used_gi_std = inference_config["gi_std"]
(start_time, reference_time) = inference_config["tspan"] |>
tspan -> split(tspan, "_") |>
tspan -> (
parse(Int, tspan[1]), parse(Int, tspan[2]))

#Get the quantiles for the targets across the gi mean scenarios
#if Renewal model, then we use the underlying epi model
#otherwise we use the epi datas to loop over different gi mean implications
used_epi_datas = igp_model == "Renewal" ? [output["epiprob"].epi_model.data] : epi_datas
used_gi_means = igp_model == "Renewal" ?
[used_gi_mean] :
make_gi_params(EpiAwareExamplePipeline())["gi_means"]

preds = nothing
try
preds = map(used_epi_datas) do epi_data
generate_quantiles_for_targets(output, epi_data, qs)
end
used_gi_means = igp_model == "Renewal" ?
[EpiAwarePipeline._get_used_gi_mean_from_filename(filename)] :
make_gi_params(EpiAwareExamplePipeline())["gi_means"]
used_epidatas = map(used_gi_means) do
_make_epidata(ḡ, used_gi_std; transformation = transformation)
end

preds = map(used_epidatas) do epi_data
generate_quantiles_for_targets(forecasts, epi_data, qs)
end

#Create the dataframe columnwise
df = mapreduce(vcat, preds, used_gi_means) do pred, used_gi_mean
mapreduce(vcat, keys(pred)) do target
target_mat = pred[target]
target_times = collect(1:size(target_mat, 1)) .+
(inference_config.tspan[1] - 1)
_df = DataFrame(target_times = target_times)
_df[!, "Scenario"] .= scenario
_df[!, "IGP_Model"] .= igp_model
_df[!, "Latent_Model"] .= latent_model
_df[!, "True_GI_Mean"] .= true_mean_gi
_df[!, "Used_GI_Mean"] .= used_gi_mean
_df[!, "Reference_Time"] .= inference_config.tspan[2]
_df[!, "Target"] .= string(target)
# quantile predictions
for (j, q) in enumerate(qs)
q_str = split(string(q), ".")[end]
_df[!, "q_$(q_str)"] = target_mat[:, j]
end
return _df
#Create the dataframe columnwise
df = mapreduce(vcat, preds, used_gi_means) do pred, used_gi_mean
mapreduce(vcat, keys(pred)) do target
target_mat = pred[target]
target_times = collect(1:size(target_mat, 1)) .+ (start_time - 1)
_df = DataFrame(target_times = target_times)
_df[!, "Scenario"] .= scenario
_df[!, "IGP_Model"] .= igp_model
_df[!, "Latent_Model"] .= latent_model
_df[!, "True_GI_Mean"] .= true_mean_gi
_df[!, "Used_GI_Mean"] .= used_gi_mean
_df[!, "Reference_Time"] .= reference_time
_df[!, "Target"] .= string(target)
# quantile predictions
for (j, q) in enumerate(qs)
q_str = split(string(q), ".")[end]
_df[!, "q_$(q_str)"] = target_mat[:, j]
end
return _df
end
return df
catch
@warn "Error in generating quantiles for targets in file $filename"
return nothing
end
return df
end
14 changes: 8 additions & 6 deletions pipeline/src/constructors/make_model_priors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,18 @@ parameters. This is the default method.
# Returns
A dictionary containing the following prior distributions:
- `"transformed_process_init_prior"`: A normal distribution with mean 0.0 and
standard deviation 0.25.
- `"std_prior"`: A half-normal distribution with standard deviation 0.25.
- `"damp_param_prior"`: A beta distribution with shape parameters 0.5 and 0.5.
standard deviation 0.025.
- `"std_prior"`: A half-normal distribution with standard deviation 0.025.
- `"damp_param_prior"`: A beta distribution with shape parameters 1 and 9.
- `"log_I0_prior"`: A normal distribution with mean log(100.0) and standard
deviation 1e-1.
"""
function make_model_priors(pipeline::AbstractEpiAwarePipeline)
transformed_process_init_prior = Normal(0.0, 0.25)
std_prior = HalfNormal(0.25)
damp_param_prior = Beta(0.5, 0.5)
log_I0_prior = Normal(log(100.0), 1e-5)
std_prior = HalfNormal(0.025)
damp_param_prior = Beta(1, 9)
log_I0_prior = Normal(log(100.0), 1e-1)

return Dict(
"transformed_process_init_prior" => transformed_process_init_prior,
Expand Down
33 changes: 27 additions & 6 deletions pipeline/src/infer/define_epiprob.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,28 @@
"""
Create an `EpiData` object based on the provided generation interval mean and standard
deviation by assuming a gamma distribution.
# Arguments
- `gi_mean::Float64`: Mean of the generation interval.
- `gi_std::Float64`: Standard deviation of the generation interval.
- `transformation`: A transformation function to be applied
(default is `EpiAwarePipeline.oneexpy` a custom implementation of `exp`).
# Returns
- `model_data::EpiData`: An `EpiData` object containing the generation interval distribution
and transformation.
"""
function _make_epidata(gi_mean, gi_std; transformation = EpiAwarePipeline.oneexpy)
shape = (gi_mean / gi_std)^2
scale = gi_std^2 / gi_mean
gen_distribution = Gamma(shape, scale)

model_data = EpiData(
gen_distribution = gen_distribution, transformation = transformation)
return model_data
end

"""
Create an `EpiProblem` object based on the provided `InferenceConfig`.
Expand All @@ -9,12 +34,8 @@ Create an `EpiProblem` object based on the provided `InferenceConfig`.
"""
function define_epiprob(config::InferenceConfig)
shape = (config.gi_mean / config.gi_std)^2
scale = config.gi_std^2 / config.gi_mean
gen_distribution = Gamma(shape, scale)

model_data = EpiData(
gen_distribution = gen_distribution, transformation = config.transformation)
model_data = _make_epidata(
config.gi_mean, config.gi_std; transformation = config.transformation)
#Build the epidemiological model
epi = config.igp(data = model_data, initialisation_prior = config.log_I0_prior)

Expand Down
2 changes: 2 additions & 0 deletions pipeline/test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
EpiAware = "b2eeebe4-5992-4301-9193-7ebc9f62c855"
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
Expand Down
20 changes: 20 additions & 0 deletions pipeline/test/analysis/make_prediction_dataframe_from_output.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
@testset "test dataframe construct for one dataset" begin
using JLD2, DataFramesMeta
output = load(joinpath(@__DIR__(), "test_data.jld2"))
true_mean_gi = 10.0

df = make_prediction_dataframe_from_output(output, true_mean_gi)
@test !isempty(df)
@test "Scenario" in names(df)
@test "IGP_Model" in names(df)
@test "Latent_Model" in names(df)
@test "True_GI_Mean" in names(df)
@test "Used_GI_Mean" in names(df)
@test "Reference_Time" in names(df)
@test "Target" in names(df)
@test "q_025" in names(df)
@test "q_25" in names(df)
@test "q_5" in names(df)
@test "q_75" in names(df)
@test "q_975" in names(df)
end
1 change: 1 addition & 0 deletions pipeline/test/analysis/test_analysis.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include("make_prediction_dataframe_from_output.jl")
Binary file added pipeline/test/analysis/test_data.jld2
Binary file not shown.

0 comments on commit 8c05d03

Please sign in to comment.