Skip to content
Snippets Groups Projects
Commit 7a01b27c authored by Peter Jentsch's avatar Peter Jentsch
Browse files

documentation!

parent e8a16b48
No related branches found
No related tags found
No related merge requests found
No preview for this file type
No preview for this file type
...@@ -22,10 +22,14 @@ using Plots ...@@ -22,10 +22,14 @@ using Plots
const rng = Xoroshiro128Plus() const rng = Xoroshiro128Plus()
#lets us use nicer names for the indices #consts that let give us nicer names for the indices
const YOUNG, MIDDLE,OLD = 1,2,3 const YOUNG, MIDDLE,OLD = 1,2,3
const durmax = 144 const durmax = 144
const comparison_samples = 100
"""
Number of start times to sample for a given set of distribution parameters.
"""
const comparison_samples = 70
const sparam = [60, 12] const sparam = [60, 12]
# Swap age brackets for numbers # Swap age brackets for numbers
const swap_dict = OrderedDict("Y" => YOUNG, "M" => MIDDLE, "O" => OLD) const swap_dict = OrderedDict("Y" => YOUNG, "M" => MIDDLE, "O" => OLD)
...@@ -98,6 +102,8 @@ function bayesian_estimation(fname, err_func, priors_list, dists, particles; alp ...@@ -98,6 +102,8 @@ function bayesian_estimation(fname, err_func, priors_list, dists, particles; alp
end end
""" """
do_hh(particles::Integer)
Fit HH durations. This function specifies a list of distributions to fit, and loads the priors for "hh", and then calls `bayesian_estimation` with `err_hh`. This function passes parameters to `bayesian_estimation` depending on what produces the best fit for the HH data. Fit HH durations. This function specifies a list of distributions to fit, and loads the priors for "hh", and then calls `bayesian_estimation` with `err_hh`. This function passes parameters to `bayesian_estimation` depending on what produces the best fit for the HH data.
""" """
function do_hh(particles) function do_hh(particles)
...@@ -115,6 +121,9 @@ end ...@@ -115,6 +121,9 @@ end
""" """
do_ws(particles::Integer)
Fit WS durations. This function specifies a list of distributions to fit, and loads the priors for "workschool", and then calls `bayesian_estimation` with `err_ws`. This function passes parameters to `bayesian_estimation` depending on what produces the best fit for the WS data. Fit WS durations. This function specifies a list of distributions to fit, and loads the priors for "workschool", and then calls `bayesian_estimation` with `err_ws`. This function passes parameters to `bayesian_estimation` depending on what produces the best fit for the WS data.
""" """
function do_ws(particles) function do_ws(particles)
...@@ -130,6 +139,9 @@ function do_ws(particles) ...@@ -130,6 +139,9 @@ function do_ws(particles)
end end
""" """
do_rest(particles::Integer)
Fit rest durations. This function specifies a list of distributions to fit, and loads the priors for "rest", and then calls `bayesian_estimation` with `err_rest`. This function passes parameters to `bayesian_estimation` depending on what produces the best fit for the Rest data. Fit rest durations. This function specifies a list of distributions to fit, and loads the priors for "rest", and then calls `bayesian_estimation` with `err_rest`. This function passes parameters to `bayesian_estimation` depending on what produces the best fit for the Rest data.
""" """
function do_rest(particles) function do_rest(particles)
......
const HHYMO = DataFrame(CSV.File("$PACKAGE_FOLDER/network-data/Timeuse/HH/HHYMO.csv")) const HHYMO = DataFrame(CSV.File("$PACKAGE_FOLDER/network-data/Timeuse/HH/HHYMO.csv"))
const cnst_hh = ( const Wghttotal = sum(HHYMO[:,"WGHT_PER"])
subsize = size(HHYMO)[1],
Wghttotal = sum(HHYMO[:,"WGHT_PER"]),
)
function make_dat_array() function make_dat_array()
durs = hcat( durs = hcat(
...@@ -27,28 +24,34 @@ function make_dat_array() ...@@ -27,28 +24,34 @@ function make_dat_array()
end end
const dat = make_dat_array() #assign a constant data array const dat = make_dat_array() #assign a constant data array
"""
#error function err_hh(p,dist)
Calculate error between observed total contact durations in HH data and simulated total contact durations based on `dist(p[i]...)`. This one works a bit differently than the WS or rest error function, since we do not have a distribution of total durations to compare to, but rather one average total duration, and we do have the explicit samples of numbers of contacts.
"""
function err_hh(p,dist) function err_hh(p,dist)
p_matrix = zeros(3,3) p_matrix = zeros(3,3)
for i in 1:9 for i in 1:9
p_matrix[i] = p[i] p_matrix[i] = p[i]
end end
age_dists = [dist(p_matrix[i,j]...) for i in YOUNG:OLD, j in YOUNG:OLD] age_dists = [dist(p_matrix[i,j]...) for i in YOUNG:OLD, j in YOUNG:OLD]
duration_subarray = dat.durs duration_subarray = dat.durs #durations, these are call subarray because we used to only sample a subset
num_contacts_subarray = dat.nums num_contacts_subarray = dat.nums #numbers of contacts for each duration
# display(p_matrix) # display(p_matrix)
AGERESP = dat.AGERESP AGERESP = dat.AGERESP #age of the respondent
errsum = 0 errsum = 0
@inbounds for i = 1:cnst_hh.subsize @inbounds for i = 1:length(duration_subarray) #loop through entire file
age_sample = AGERESP[i] age_sample = AGERESP[i]
@inbounds for age_j in YOUNG:OLD #for a given age_sample loop over possible contact ages @inbounds for age_j in YOUNG:OLD #for a given age_sample loop over possible contact ages
#get num_contacts from the HH data
durs = trunc.(Int,rand(rng,age_dists[age_sample,age_j],num_contacts_subarray[i,age_j])) .% durmax durs = trunc.(Int,rand(rng,age_dists[age_sample,age_j],num_contacts_subarray[i,age_j])) .% durmax
#this returns the MEAN of the distribution of total durations, given we have durations given by durs, and we sample comparison_samples from that distribution.
expdur = tot_dur_sample(comparison_samples,durs) expdur = tot_dur_sample(comparison_samples,durs)
errsum += (expdur/comparison_samples - duration_subarray[i,age_j])^2 #compute total errsum += (expdur - duration_subarray[i,age_j])^2 #compute total
end end
end end
# display("2")
return errsum return errsum
end end
\ No newline at end of file
# Set the underlying parameters for the intervals model # Set the underlying parameters for the intervals model
# This is the distribution of start_times
const startdist = Normal(sparam...) const startdist = Normal(sparam...)
...@@ -10,7 +11,14 @@ function coverage!(cov,S_j,E_j) ...@@ -10,7 +11,14 @@ function coverage!(cov,S_j,E_j)
push!(cov,Interval(S_j,E_j)) push!(cov,Interval(S_j,E_j))
end end
end end
#compute the total duration of a sample of intervals
"""
tot_dur_sample(n::Integer,durlist::Vector{Integer})
Returns the mean of the distribution of total durations, given a set of contact durations `durlist`, and a number of samples `n`.
Note that this is different from `tot_dur_sample!` in that it returns only a mean and does not mutate it's arguments. This could be reducible to that one but not without losing some performance I think. Used in `err_hh` where we only need a mean.
"""
function tot_dur_sample(n,durlist) function tot_dur_sample(n,durlist)
if isempty(durlist) if isempty(durlist)
return 0 return 0
...@@ -20,7 +28,6 @@ function tot_dur_sample(n,durlist) ...@@ -20,7 +28,6 @@ function tot_dur_sample(n,durlist)
int_list = Vector{Interval{Int,Closed,Closed}}() int_list = Vector{Interval{Int,Closed,Closed}}()
sizehint!(int_list,numcontact*2) sizehint!(int_list,numcontact*2)
start_matrix = trunc.(Int,rand(rng,startdist,(numcontact,n))) start_matrix = trunc.(Int,rand(rng,startdist,(numcontact,n)))
@inbounds for i in 1:n @inbounds for i in 1:n
empty!(int_list) empty!(int_list)
...@@ -32,10 +39,18 @@ function tot_dur_sample(n,durlist) ...@@ -32,10 +39,18 @@ function tot_dur_sample(n,durlist)
union!(int_list) union!(int_list)
total_dur += mapreduce(Intervals.span,+,int_list) total_dur += mapreduce(Intervals.span,+,int_list)
end end
return total_dur return total_dur/n
end end
"""
tot_dur_sample(sample_list::Vector{Integer},durlist::Vector{Integer})
Mutates `sample_list` to contain samples from the distribution of total durations, given a set of contact durations `durlist`.
This is different from `tot_dur_sample` because it modifies it's arguments. Used in `err_ws` and `err_rest` where we need a distribution.
"""
function tot_dur_sample!(sample_list,durlist) function tot_dur_sample!(sample_list,durlist)
if isempty(durlist) if isempty(durlist)
sample_list .= 0.0 sample_list .= 0.0
...@@ -45,7 +60,6 @@ function tot_dur_sample!(sample_list,durlist) ...@@ -45,7 +60,6 @@ function tot_dur_sample!(sample_list,durlist)
n = length(sample_list) n = length(sample_list)
int_list = Vector{Interval{Int,Closed,Closed}}() int_list = Vector{Interval{Int,Closed,Closed}}()
sizehint!(int_list,numcontact*2) sizehint!(int_list,numcontact*2)
# @show rand(rng,startdist,(numcontact,n))
start_matrix = trunc.(Int,rand(rng,startdist,(numcontact,n))) start_matrix = trunc.(Int,rand(rng,startdist,(numcontact,n)))
for i in 1:n for i in 1:n
empty!(int_list) empty!(int_list)
...@@ -58,12 +72,3 @@ function tot_dur_sample!(sample_list,durlist) ...@@ -58,12 +72,3 @@ function tot_dur_sample!(sample_list,durlist)
sample_list[i] = mapreduce(Intervals.span,+,int_list) sample_list[i] = mapreduce(Intervals.span,+,int_list)
end end
end end
function as_symmetric_matrix(l) #turn a vector of length 6, l, into a symmetric 3x3 matrix, probably a nicer way to do this exists
return [
l[1] l[2] l[3]
l[2] l[4] l[5]
l[3] l[5] l[6]
]
end
\ No newline at end of file
...@@ -6,25 +6,42 @@ const kerneldensity_data_rest = ( ...@@ -6,25 +6,42 @@ const kerneldensity_data_rest = (
M = kde(rest_data.M.durs,weights = rest_data.M.weights), M = kde(rest_data.M.durs,weights = rest_data.M.weights),
O = kde(rest_data.O.durs,weights = rest_data.O.weights), O = kde(rest_data.O.durs,weights = rest_data.O.weights),
) )
"""
err_rest(p::Vector{Number},dist::Distribution)
#error function for rest Calculate error between observed contact durations in rest data and simulated contact durations based on `dist(p[i]...)`.
"""
function err_rest(p,dist) function err_rest(p,dist)
#we get the vector p as a 9 element vector from KissABC, but more convenient as a 3x3 matrix
p_matrix = zeros(3,3) p_matrix = zeros(3,3)
for i in 1:9 for i in 1:9
p_matrix[i] = p[i] p_matrix[i] = p[i]
end end
#sample a matrix of neighourhood sizes for distributions in rest_distributions
#this is actually not correct? neighbourhood sizes should be symmetric!
neighourhoods = rand.(rng,rest_distributions) neighourhoods = rand.(rng,rest_distributions)
#create a matrix of distributions of type dist, from the parameters in p_matrix
age_dists = [dist(p_matrix[i,j]...) for i in YOUNG:OLD, j in YOUNG:OLD] age_dists = [dist(p_matrix[i,j]...) for i in YOUNG:OLD, j in YOUNG:OLD]
#initialize the list of samples and the total error
sample_list = zeros(comparison_samples) sample_list = zeros(comparison_samples)
errsum = 0 errsum = 0
for age_sample in YOUNG:OLD for age_sample in YOUNG:OLD
for age_j in YOUNG:OLD #for a given age_sample loop over possible contact ages for age_j in YOUNG:OLD #for a given age_sample loop over possible contact ages
if neighourhoods[age_sample,age_j] > 0 if neighourhoods[age_sample,age_j] > 0
#get durations from our candidate distribtions for each of the contacts in neighbourhood
durs = trunc.(Int,rand(rng,age_dists[age_sample,age_j],neighourhoods[age_sample,age_j])) .% durmax durs = trunc.(Int,rand(rng,age_dists[age_sample,age_j],neighourhoods[age_sample,age_j])) .% durmax
#this MODIFIES sample_list to contain samples from the distribution of total_durations, given intervals of length dur.
tot_dur_sample!(sample_list,durs) tot_dur_sample!(sample_list,durs)
#compute a kernel density from the list of samples
kde_est = kde(sample_list) kde_est = kde(sample_list)
#compute L1 distance between observed distribution of total durations from survey, and observed distribution from simulation
errsum += mapreduce(+,0:1:durmax) do i errsum += mapreduce(+,0:1:durmax) do i
return abs(pdf(kde_est,i) - pdf(kerneldensity_data_ws[age_sample],i)) return abs(pdf(kde_est,i) - pdf(kerneldensity_data_ws[age_sample],i))
end end
......
import Pandas: read_csv import Pandas: read_csv
using DataFrames using DataFrames
"""
get_params(params::Vector{Number})
This function reorganizes a Vector of 6n parameters into a 3x3 symmetric matrix of n-tuples of parameters.
"""
function get_params(params) function get_params(params)
p_list = [as_symmetric_matrix(params[i:i+5]) for i = 1:6:length(params)] p_list = [as_symmetric_matrix(params[i:i+5]) for i = 1:6:length(params)]
return zip(p_list...) |> collect return zip(p_list...) |> collect
end end
"""
Turn a vector of length 6, l, into a symmetric 3x3 matrix, probably a nicer way to do this exists but meh.
"""
function as_symmetric_matrix(l)
return [
l[1] l[2] l[3]
l[2] l[4] l[5]
l[3] l[5] l[6]
]
end
"""
Load rest data from `network-data/Timeuse/Rest/RData`.
"""
function get_rest_data() function get_rest_data()
path = "$PACKAGE_FOLDER/network-data/Timeuse/Rest/RData" path = "$PACKAGE_FOLDER/network-data/Timeuse/Rest/RData"
data_table_by_age = map(collect(keys(swap_dict))) do age data_table_by_age = map(collect(keys(swap_dict))) do age
...@@ -18,6 +38,9 @@ function get_rest_data() ...@@ -18,6 +38,9 @@ function get_rest_data()
return (;data_table_by_age...) return (;data_table_by_age...)
end end
"""
Load WS data from `network-data/Timeuse/WS/WorkschoolData`.
"""
function get_ws_data() function get_ws_data()
path = "$PACKAGE_FOLDER/network-data/Timeuse/WS/WorkschoolData" path = "$PACKAGE_FOLDER/network-data/Timeuse/WS/WorkschoolData"
data_table_by_age = map(collect(keys(swap_dict))) do age data_table_by_age = map(collect(keys(swap_dict))) do age
...@@ -29,6 +52,9 @@ function get_ws_data() ...@@ -29,6 +52,9 @@ function get_ws_data()
return (;data_table_by_age...) return (;data_table_by_age...)
end end
"""
Load priors data from `IntervalsModel/network-data/POLYMOD/AALPoisPriors.csv` and format into a DataFrame with some additional columns for the index of the age class (per `swap_dict`).
"""
function get_priors() function get_priors()
df = DataFrame(read_csv("IntervalsModel/network-data/POLYMOD/AALPoisPriors.csv")) df = DataFrame(read_csv("IntervalsModel/network-data/POLYMOD/AALPoisPriors.csv"))
df_w_indicies = transform( df_w_indicies = transform(
...@@ -41,12 +67,21 @@ function get_priors() ...@@ -41,12 +67,21 @@ function get_priors()
return df_w_indicies return df_w_indicies
end end
function filter_priors(key)
"""
filter_priors(location_key::String)
Filter priors dataframe based on location_key, which should be one of "rest", "workschool", or "home". Returns an array of Categorical distributions.
"""
function filter_priors(location_key)
priors = get_priors() priors = get_priors()
if !(key in priors[!,:location])
if !(location_key in priors[!,:location])
throw(ArgumentError("location key not in priors file!")) throw(ArgumentError("location key not in priors file!"))
end end
priors_probs = filter(row-> row["location"] == key, eachrow(priors)) |>
priors_probs = filter(row-> row["location"] == location_key, eachrow(priors)) |>
df -> map(r -> (r[:index_out],r[:index_in]) => convert(Vector,r[string.(0:143)]),df) |> df -> map(r -> (r[:index_out],r[:index_in]) => convert(Vector,r[string.(0:143)]),df) |>
Dict |> Dict |>
df -> Dict(map(v -> v => Categorical(df[v]), collect(keys(df)))) df -> Dict(map(v -> v => Categorical(df[v]), collect(keys(df))))
......
const ws_distributions = CovidAlertVaccinationModel.initial_workschool_mixing_matrix
const ws_data = get_ws_data() const ws_data = get_ws_data()
"""
Kernel density functions based on the durs in ws_data and their corresponding weights. This defines an interpolated density function that we can compare to the density obtained from the simulation.
"""
const kerneldensity_data_ws = ( const kerneldensity_data_ws = (
Y = kde(ws_data.Y.durs;weights = ws_data.Y.weights), Y = kde(ws_data.Y.durs;weights = ws_data.Y.weights),
M = kde(ws_data.M.durs;weights = ws_data.M.weights), M = kde(ws_data.M.durs;weights = ws_data.M.weights),
O = kde(ws_data.O.durs;weights = ws_data.O.weights), O = kde(ws_data.O.durs;weights = ws_data.O.weights),
) )
"""
err_ws(p::Vector{Number},dist::Distribution)
#error function for ws Calculate error between observed contact durations in workschool data and simulated contact durations based on `dist(p[i]...)`. For these workschool simulations, we use a different sampling procedure for neighbourhood sizes based on `ws_sample` in CovidAlertVaccinationModel.
"""
function err_ws(p,dist) function err_ws(p,dist)
p_matrix = zeros(3,3) #we get the vector p as a 9 element vector from KissABC, but more convenient as a 3x3 matrix
p_matrix = zeros(3,3)
for i in 1:9 for i in 1:9
p_matrix[i] = p[i] p_matrix[i] = p[i] #this will fill p_matrix with the elements of p in column-major order
end end
age_dists = [dist(p_matrix[i,j]...) for i in YOUNG:OLD, j in YOUNG:OLD]
#create a matrix of distributions of type dist, from the parameters in p_matrix
age_dists = [dist(p_matrix[i,j]...) for i in YOUNG:OLD, j in YOUNG:OLD]
#initialize the list of samples and the total error
sample_list = zeros(comparison_samples) sample_list = zeros(comparison_samples)
errsum = 0 errsum = 0
for age_sample in YOUNG:OLD for age_sample in YOUNG:OLD
#Sample neighbourhood sizes. Note that this includes the weekend/weekday coinflip, implemented in CovidAlertVaccinationModel.
neighourhoods = CovidAlertVaccinationModel.ws_sample(age_sample) neighourhoods = CovidAlertVaccinationModel.ws_sample(age_sample)
for age_j in YOUNG:OLD #for a given age_sample loop over possible contact ages
for age_j in YOUNG:OLD #for a given age_sample loop over possible contact ages
if neighourhoods[age_j] > 0 if neighourhoods[age_j] > 0
#get durations from our candidate distribtions for each of the contacts in neighbourhood
durs = trunc.(Int,rand(rng,age_dists[age_sample,age_j],neighourhoods[age_j])) .% durmax durs = trunc.(Int,rand(rng,age_dists[age_sample,age_j],neighourhoods[age_j])) .% durmax
#this MODIFIES sample_list to contain samples from the distribution of total_durations, given intervals of length dur.
tot_dur_sample!(sample_list,durs) tot_dur_sample!(sample_list,durs)
#compute a kernel density from the list of samples
kde_est = kde(sample_list) kde_est = kde(sample_list)
#compute L1 distance between observed distribution of total durations from survey, and observed distribution from simulation
errsum += mapreduce(+,0:1:durmax) do i errsum += mapreduce(+,0:1:durmax) do i
return abs(pdf(kde_est,i) - pdf(kerneldensity_data_ws[age_sample],i)) return abs(pdf(kde_est,i) - pdf(kerneldensity_data_ws[age_sample],i))
end end
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment