Commit 512f2118 authored by Mark Penney's avatar Mark Penney
Browse files

Update...

Update CovidAlertVaccinationModel/data/canada-network-data/POLYMOD/Duration-priors.py, CovidAlertVaccinationModel/data/canada-network-data/POLYMOD/AALPoisPriors.csv files
parent ce2ac31e
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 2021-03-11
Extracts the duration distributions stratified by age-age-location from the
POLYMOD dataset.
Locations: Home, work/school, rest, and community.
Community is the combined work/school and rest
Additionally, uses this duration data to deduce priors on the poisson
random variables controlling individual contact durations
@author: mark
"""
import numpy as np
import pandas as pd
import math
from itertools import product
# To start, let's load the two files: The contact and participant data
Contact = pd.read_csv("2008_Mossong_POLYMOD_contact_common.csv")
Part = pd.read_csv("2008_Mossong_POLYMOD_participant_common.csv")
# Delete contacts missing age data
Contact = Contact.dropna(subset=['cnt_age_exact'])
# Delete contacts missing location data
def Loctest(row):
emptycheck = row['cnt_home'] or row['cnt_work'] or row['cnt_school'] or row['cnt_transport'] or row['cnt_leisure'] or row['cnt_otherplace']
if emptycheck == False:
return 1
else:
return 0
Contact['Loctest'] = Contact.apply(Loctest, axis=1)
Contact = Contact[Contact['Loctest']==0]
# Next, we aggregate the locations
# Define the function computing WorkSchool value from remaining entries
def WorkSchool(row):
return row['cnt_work'] or row['cnt_school']
# Create the new column
Contact['cnt_workschool'] = Contact.apply(WorkSchool, axis=1)
# Define the function computing Rest value from remaining entries
def Rest(row):
return row['cnt_transport'] or row['cnt_leisure'] or row['cnt_otherplace']
# And then create the new column
Contact['cnt_rest'] = Contact.apply(Rest, axis=1)
# Define the function computing Community value from remaining entries
def Community(row):
return row['cnt_work'] or row['cnt_school'] or row['cnt_transport'] or row['cnt_leisure'] or row['cnt_otherplace']
# And then create the new column
Contact['cnt_community'] = Contact.apply(Community, axis=1)
# We also add a column with the source age bracket. First step is to define age bracket value
def Agesource(row):
ID = row['part_id']
year = float(Part[Part['part_id'] ==ID]['part_age'])
if year < 25:
return 'Y'
elif year < 65:
return 'M'
else:
return 'O'
# Then create the new source column
Contact['age_source'] = Contact.apply(Agesource, axis=1)
# Columns function for target age bracket
def Agetarget(row):
ID = row['cont_id']
year = float(Contact[Contact['cont_id'] ==ID]['cnt_age_exact'])
if year < 25:
return 'Y'
elif year < 65:
return 'M'
else:
return 'O'
# Add the new target column
Contact['age_target'] = Contact.apply(Agetarget, axis=1)
# Specify AAL categories
Ages = ['Y', 'M', 'O']
Locales = ['home', 'workschool', 'rest', 'community']
# Dictionary separating contact data by AAL
ContAAL = {}
for x, y, z in list(product(Ages,Ages,Locales)):
ContAAL[(x,y,z)] = Contact[Contact['cnt_'+z]==True][Contact['age_source']== x][Contact['age_target']==y]
# Duration distributions stratified by AAL
DurFreqAAL = {}
for x, y, z in list(product(Ages,Ages, Locales)):
C = ContAAL[(x,y,z)]
#D = pd.DataFrame([[ID, C[C['part_id']==ID]['duration_multi']] for ID in PartsAge[x]], columns = ["ID", "Dur"])
Durlist = [C[C['duration_multi']==i]["duration_multi"].count() for i in range(1,6)]
DurFreqAAL[(x,y,z)] = Durlist/np.sum(Durlist)
# Save to csv
#with open('AALDur_data.csv', 'w') as csv_file:
# writer = csv.writer(csv_file)
# for key, value in DurFreqAAL.items():
# writer.writerow([key, value[0], value[1], value[2], value[3], value[4]])
# Define error functions for poisson random variables
def PoisArray(lam):
arr = [math.exp(-lam + k*math.log(lam) - np.sum([math.log(n) for n in range(1,k+1)]) ) for k in range(145)]
return [math.exp(-lam)] + arr
def PoisBin(lam):
Arr = PoisArray(lam)
out = [Arr[0], Arr[1], np.sum(Arr[2:6]), np.sum(Arr[6:24]), np.sum(Arr[24:])]
return out/np.sum(out)
def PoisErr(lam, label):
err = np.sum((PoisBin(lam) - DurFreqAAL[label])**2)
return err
PoisErr = np.vectorize(PoisErr, excluded=[1])
# Define priors. Each lambda value is weighted inversely to the error
PoisPrior = {}
for x, y, z in list(product(Ages,Ages, Locales)):
AAL = (x,y,z)
arr= [1/PoisErr(i,AAL) for i in range(1,145)]
PoisPrior[AAL] = arr/np.sum(arr)
# Save them to csv
Priordf = pd.DataFrame.from_dict(PoisPrior, orient="index")
Priordf.to_csv("AALPoisPriors.csv")
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment