Commit c1fec531 authored by Peter Jentsch's avatar Peter Jentsch
Browse files
parents 56242722 c5b2285c
......@@ -3,9 +3,14 @@
"""
Created on 2021-03-11
Uses this POLYMOD duration data to deduce priors on the poisson
random variables controlling individual contact durations
Extracts the duration distributions stratified by age-age-location from the
POLYMOD dataset.
Locations: Home, work/school, rest, and community.
Community is the combined work/school and rest
Additionally, uses this duration data to deduce priors on the poisson
random variables controlling individual contact durations
@author: mark
"""
......@@ -14,79 +19,118 @@ import pandas as pd
import math
from itertools import product
# Read the cleaned survey as a dataframe
Contact = pd.read_csv("AALContact_data.csv")
# To start, let's load the two files: The contact and participant data
Contact = pd.read_csv("2008_Mossong_POLYMOD_contact_common.csv")
Part = pd.read_csv("2008_Mossong_POLYMOD_participant_common.csv")
# Delete contacts missing age data
Contact = Contact.dropna(subset=['cnt_age_exact'])
# Delete contacts missing location data
def Loctest(row):
emptycheck = row['cnt_home'] or row['cnt_work'] or row['cnt_school'] or row['cnt_transport'] or row['cnt_leisure'] or row['cnt_otherplace']
if emptycheck == False:
return 1
else:
return 0
Contact['Loctest'] = Contact.apply(Loctest, axis=1)
Contact = Contact[Contact['Loctest']==0]
# Next, we aggregate the locations
# Define the function computing WorkSchool value from remaining entries
def WorkSchool(row):
return row['cnt_work'] or row['cnt_school']
# Create the new column
Contact['cnt_workschool'] = Contact.apply(WorkSchool, axis=1)
# Define the function computing Rest value from remaining entries
def Rest(row):
return row['cnt_transport'] or row['cnt_leisure'] or row['cnt_otherplace']
# And then create the new column
Contact['cnt_rest'] = Contact.apply(Rest, axis=1)
# Define the function computing Community value from remaining entries
def Community(row):
return row['cnt_work'] or row['cnt_school'] or row['cnt_transport'] or row['cnt_leisure'] or row['cnt_otherplace']
# And then create the new column
Contact['cnt_community'] = Contact.apply(Community, axis=1)
# We also add a column with the source age bracket. First step is to define age bracket value
def Agesource(row):
ID = row['part_id']
year = float(Part[Part['part_id'] ==ID]['part_age'])
if year < 25:
return 'Y'
elif year < 65:
return 'M'
else:
return 'O'
# Then create the new source column
Contact['age_source'] = Contact.apply(Agesource, axis=1)
# Columns function for target age bracket
def Agetarget(row):
ID = row['cont_id']
year = float(Contact[Contact['cont_id'] ==ID]['cnt_age_exact'])
if year < 25:
return 'Y'
elif year < 65:
return 'M'
else:
return 'O'
# Add the new target column
Contact['age_target'] = Contact.apply(Agetarget, axis=1)
# Specify AAL categories
Ages = ['Y', 'M', 'O']
SymAge = [('Y','Y'), ('Y', 'M'), ('Y', 'O'), ('M', 'M'),('M', 'O'), ('O','O')]
Locales = ['home', 'workschool', 'rest']
# define row function for symage
def symage_row(row):
agesrc = row['age_source']
agetar = row['age_target']
for symage in SymAge:
#x, y = symage[0], symage[1]
if symage == (agesrc, agetar) or symage == (agetar, agesrc):
return symage
Contact['symage'] = Contact.apply(symage_row, axis=1)
# Dictionary separating contact data by symage-location
Locales = ['home', 'workschool', 'rest', 'community']
# Dictionary separating contact data by AAL
ContAAL = {}
columns = ['part_id', 'cont_id', 'age_source', 'age_target',
'duration_multi', 'frequency_multi']
for symage, loc in list(product(SymAge,Locales)):
x,y,z = symage[0], symage[1], loc
C = Contact[Contact['cnt_'+z] == True][Contact['symage']==symage]
ContAAL[(x,y,z)] = C[columns].copy(deep=True)
for x, y, z in list(product(Ages,Ages,Locales)):
ContAAL[(x,y,z)] = Contact[Contact['cnt_'+z]==True][Contact['age_source']== x][Contact['age_target']==y]
# Duration distributions stratified by SymAge-location
# Duration distributions stratified by AAL
DurFreqAAL = {}
for symage, loc in list(product(SymAge,Locales)):
x,y,z = symage[0], symage[1], loc
for x, y, z in list(product(Ages,Ages, Locales)):
C = ContAAL[(x,y,z)]
#D = pd.DataFrame([[ID, C[C['part_id']==ID]['duration_multi']] for ID in PartsAge[x]], columns = ["ID", "Dur"])
Durlist = [C[C['duration_multi']==i]["duration_multi"].count() for i in range(1,6)]
DurFreqAAL[(x,y,z)] = Durlist/np.sum(Durlist)
# Save to csv
#with open('AALDur_data.csv', 'w') as csv_file:
# writer = csv.writer(csv_file)
# for key, value in DurFreqAAL.items():
# writer.writerow([key, value[0], value[1], value[2], value[3], value[4]])
# Define error functions for poisson random variables
durcutoff = {'home': 6*16, 'workschool': 6*8, 'rest': 6*8}
def PoisArray(lam, loc):
arr = [math.exp(-lam + k*math.log(lam) - np.sum([math.log(n) for n in range(1,k+1)]) )
for k in range(durcutoff[loc])]
return arr/np.sum(arr)
def PoisBin(lam,loc):
Arr = PoisArray(lam,loc)
def PoisArray(lam):
arr = [math.exp(-lam + k*math.log(lam) - np.sum([math.log(n) for n in range(1,k+1)]) ) for k in range(145)]
return [math.exp(-lam)] + arr
def PoisBin(lam):
Arr = PoisArray(lam)
out = [Arr[0], Arr[1], np.sum(Arr[2:6]), np.sum(Arr[6:24]), np.sum(Arr[24:])]
return out/np.sum(out)
def PoisErr(lam, label): # label is SymAge-Location
agesrc, agetar, loc = label
err = np.sum((PoisBin(lam, loc) - DurFreqAAL[label])**2)
def PoisErr(lam, label):
err = np.sum((PoisBin(lam) - DurFreqAAL[label])**2)
return err
PoisErr = np.vectorize(PoisErr, excluded=[1])
PoisPrior={}
for symage, loc in list(product(SymAge, Locales)):
AAL = (symage[0],symage[1],loc)
arr = [1/PoisErr(i,AAL) for i in range(1,durcutoff[loc])]
zeropad = [0 for i in range(durcutoff['home'] - durcutoff[loc])]
PoisPrior[AAL] = (arr+zeropad)/np.sum(arr)
# Define priors. Each lambda value is weighted inversely to the error
PoisPrior = {}
for x, y, z in list(product(Ages,Ages, Locales)):
AAL = (x,y,z)
arr= [1/PoisErr(i,AAL) for i in range(1,145)]
PoisPrior[AAL] = arr/np.sum(arr)
# Save them to csv
df = pd.DataFrame(list(PoisPrior.items()), columns=['col1','col2'])
dfkeys = pd.DataFrame([pd.Series(x) for x in df.col1])
dfkeys.columns = ["Age_in", "Age_out", "location"]
dfvals = pd.DataFrame([pd.Series(x) for x in df.col2])
dfout = dfkeys.join(dfvals)
dfout.to_csv("AALPoisPriors.csv")
Priordf = pd.DataFrame.from_dict(PoisPrior, orient="index")
Priordf.to_csv("AALPoisPriors.csv")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment