#Data parsing and import functions #These functions specify output types #so the compiler is able to do type inference throughout the rest of the code #I don't think it is necessary in all cases, but it's good when importing data from files to specify because type instability makes things rly slow function get_canada_demographic_distribution()::Vector{Float64} f = readdlm(joinpath(PACKAGE_FOLDER,"data/csv/demographic_data.csv"), ',') df = DataFrame([f[1,i] => f[2:end, i] for i = 1:length(f[1,:])]) binned_data = df[:,:demographic_data] source_bins = map(parse_string_as_float_pair, df[:,:demographic_data_bins]) return [sum(binned_data[1:5]),sum(binned_data[6:13]),sum(binned_data[14:end])] end function get_canada_case_fatality()::Tuple{Vector{Tuple{Float64,Float64}},Vector{Float64}} f = readdlm(joinpath(PACKAGE_FOLDER,"data/csv/case_fatality_data.csv"), ',') df = DataFrame([f[1,i] => f[2:end, i] for i = 1:length(f[1,:])]) return map(parse_string_as_float_pair, df[:,:case_fatality_bins]), df[:,:case_fatality_data] # https://www.publichealthontario.ca/-/media/documents/ncov/epi/covid-19-severe-outcomes-ontario-epi-summary.pdf?la=en end function find_household_composition(df_row) # display(typeof(df_row)) age_resp_to_bin = Dict( "Y" => 1, "M" => 2, "O" => 3, ) u25_bins = [:U15CHILD,:O15CHILD,:YSPOUSE] m_bins = [:MPAR, :MCHILD,:MHHADULT] o_bins = [:OPAR, :OSPOUSE,:OHHADULT] age_distribution =[ sum(Int(df_row[field]) for field in u25_bins), sum(Int(df_row[field]) for field in m_bins), sum(Int(df_row[field]) for field in o_bins), ] age_distribution[age_resp_to_bin[df_row[:AGERESP]]] += 1 return SVector{3}(age_distribution) end function read_household_data() f = readdlm(joinpath(PACKAGE_FOLDER,"data/csv/home_compositions.csv"), ',') df = DataFrame([f[1,i] => f[2:end, i] for i = 1:length(f[1,:])]) weight_vector::Vector{Float64} = df[!,:WGHT_PER]/sum(df[!,:WGHT_PER]) households = map(find_household_composition,eachrow(df)) return (;households,weight_vector) end function sample_household_data(n) return sample(Random.default_rng(Threads.threadid()),household_data.households,Weights(household_data.weight_vector), n) end function get_household_data_proportions() households_by_demographic_sum = sum.([map(((l,w),)-> l[i]*w,zip(household_data.households,household_data.weight_vector)) for i in 1:3]) return households_by_demographic_sum./sum(households_by_demographic_sum) # https://www.publichealthontario.ca/-/media/documents/ncov/epi/covid-19-severe-outcomes-ontario-epi-summary.pdf?la=en end function make_workschool_mixing_matrix()# this is actually used in IntervalsModel only now #all geometric with means given ws_mixing = map(t->from_mean(t...),[ (Geometric{Float64}, 4.104848) (Geometric{Float64},2.568782) (Geometric{Float64},0.017729) (Geometric{Float64}, 0.975688) (Geometric{Float64},5.057572) (Geometric{Float64},0.021307) (Geometric{Float64},0.001937) (Geometric{Float64},0.00722) (Geometric{Float64}, 0.022134) ]) #symmetrize WS mixing with respect to the population proportions in the data ws_mixing_w_unemployment_symmetrized = symmetrize_means(get_household_data_proportions(),ws_mixing) #define a function that adjusts the means of W according to the unemployment_matrix ws_adjust_mean(W) = (5/7) .* (1 .- unemployment_matrix) ./ ( mean.(W) + (5/7) .* (1 .- unemployment_matrix)) #create a zero weighted distribution where the zero-weight is given by the unemployment_matrix, and the non-zero weight is given by ws_mixing, symmetrized, with means adjust upwards ws_mixing_w_unemployment_symmetrized_weekday_adjusted = ZWDist.(unemployment_matrix,Geometric.(ws_adjust_mean(ws_mixing_w_unemployment_symmetrized))) return ws_mixing_w_unemployment_symmetrized_weekday_adjusted end function load_mixing_matrices() df = CSV.File(joinpath(PACKAGE_FOLDER,"data/csv/mixing_loc-freq.csv")) |> DataFrame workschool_mixing = ( daily = zeros(3,3), twice_a_week = zeros(3,3), otherwise = zeros(3,3), ) rest_mixing = ( daily = zeros(3,3), twice_a_week = zeros(3,3), otherwise = zeros(3,3), ) mixing = [workschool_mixing,rest_mixing] locations_labels = ["workschool", "rest"] frequency_labels = ["daily","3xweekly","justonce"] for r in eachrow(df) location_ind = findfirst(==(r["location"]),locations_labels) frequency_ind = findfirst(==(r["frequency"]),frequency_labels) for i in 0:8 mixing[location_ind][frequency_ind]'[i+1] = r[string(i)] end end return map(t -> from_mean.(Geometric{Float64},t),workschool_mixing), map(t -> from_mean.(Geometric{Float64}, t),rest_mixing) end # function make_sampler(λ) # return Distributions.PoissonADSampler(λ)#Distributions.DiscreteNonParametricSampler(0:durmax,[pdf(Poisson(λ),x) for x in 0:durmax]) # end function load_contact_time_distributions() distkey = "Distributions.Poisson" fnames = ( hh = "hh", ws = "ws", rest = "rest" ) contact_distributions_tuple = map(fnames) do fname dat = deserialize(joinpath(PACKAGE_FOLDER,"intervals_model_output","simulation_output","$fname.dat")) return map(p -> Poisson(mode(p.particles)), as_symmetric_matrix(dat[distkey].P)) end return contact_distributions_tuple end """ Load rest data from `data/canada-network-data/Timeuse/Rest/RData`. """ function get_rest_data() path = "$PACKAGE_FOLDER/data/canada-network-data/Timeuse/Rest/RData" data_table_by_age = map(collect(keys(swap_dict))) do age data_table = CSV.File(path*"$age.csv") |> Tables.matrix weights = Weights(data_table[:,2]) durs = data_table[:,3] Symbol(age) => (;durs, weights) end return (;data_table_by_age...) end """ Load WS data from `data/canada-network-data/Timeuse/WS/WorkschoolData`. """ function get_ws_data() path = "$PACKAGE_FOLDER/data/canada-network-data/Timeuse/WS/WorkschoolData" data_table_by_age = map(collect(keys(swap_dict))) do age data_table = CSV.File(path*"$age.csv") |> Tables.matrix weights = Weights(data_table[:,2]) durs = data_table[:,3] Symbol(age) => (;durs, weights) end return (;data_table_by_age...) end """ Load priors data from `data/canada-network-data/POLYMOD/AALPoisPriors.csv` and format into a DataFrame with some additional columns for the index of the age class (per `swap_dict`). """ function get_priors() df = DataFrame(CSV.File("$PACKAGE_FOLDER/data/canada-network-data/POLYMOD/AALPoisPriors.csv")) display(df) df_w_indicies = transform( df, :Age_in => e -> map(x-> swap_dict[x],e), :Age_out => e -> map(x -> swap_dict[x],e), :Age_in,:Age_out ) rename!(df_w_indicies, Dict(:Age_out_function => "index_out",:Age_in_function => "index_in")) display(df_w_indicies) return df_w_indicies end """ filter_priors(location_key::String) Filter priors dataframe based on location_key, which should be one of "rest", "workschool", or "home". Returns an array of Categorical distributions. """ function filter_priors(location_key) priors = get_priors() if !(location_key in priors[!,:location]) throw(ArgumentError("location key not in priors file!")) end priors_dict = filter(row-> row["location"] == location_key, eachrow(priors)) |> df -> map(r -> (r[:index_out],r[:index_in]) =>Vector{Float64}(r[string.(distribution_support)]),df) |> #this is pretty inflexible to changing support unfortunately Dict display(priors_dict) priors_probs = Dict(map(v -> v => DiscreteNonParametric(distribution_support,priors_dict[v]), collect(keys(priors_dict)))) output_array = Array{eltype(values(priors_probs)),2}(undef,(3,3)) for (key,value) in priors_probs output_array[key...] = value output_array[reverse(key)...] = value #corny way to get symmetry assuming key is a 2-tuple end return symmetric_matrix_as_vector(output_array) end