Commit c1fec531 authored by Peter Jentsch's avatar Peter Jentsch
Browse files
parents 56242722 c5b2285c
...@@ -3,9 +3,14 @@ ...@@ -3,9 +3,14 @@
""" """
Created on 2021-03-11 Created on 2021-03-11
Uses this POLYMOD duration data to deduce priors on the poisson Extracts the duration distributions stratified by age-age-location from the
random variables controlling individual contact durations POLYMOD dataset.
Locations: Home, work/school, rest, and community.
Community is the combined work/school and rest
Additionally, uses this duration data to deduce priors on the poisson
random variables controlling individual contact durations
@author: mark @author: mark
""" """
...@@ -14,79 +19,118 @@ import pandas as pd ...@@ -14,79 +19,118 @@ import pandas as pd
import math import math
from itertools import product from itertools import product
# Read the cleaned survey as a dataframe # To start, let's load the two files: The contact and participant data
Contact = pd.read_csv("AALContact_data.csv") Contact = pd.read_csv("2008_Mossong_POLYMOD_contact_common.csv")
Part = pd.read_csv("2008_Mossong_POLYMOD_participant_common.csv")
# Delete contacts missing age data
Contact = Contact.dropna(subset=['cnt_age_exact'])
# Delete contacts missing location data
def Loctest(row):
emptycheck = row['cnt_home'] or row['cnt_work'] or row['cnt_school'] or row['cnt_transport'] or row['cnt_leisure'] or row['cnt_otherplace']
if emptycheck == False:
return 1
else:
return 0
Contact['Loctest'] = Contact.apply(Loctest, axis=1)
Contact = Contact[Contact['Loctest']==0]
# Next, we aggregate the locations
# Define the function computing WorkSchool value from remaining entries
def WorkSchool(row):
return row['cnt_work'] or row['cnt_school']
# Create the new column
Contact['cnt_workschool'] = Contact.apply(WorkSchool, axis=1)
# Define the function computing Rest value from remaining entries
def Rest(row):
return row['cnt_transport'] or row['cnt_leisure'] or row['cnt_otherplace']
# And then create the new column
Contact['cnt_rest'] = Contact.apply(Rest, axis=1)
# Define the function computing Community value from remaining entries
def Community(row):
return row['cnt_work'] or row['cnt_school'] or row['cnt_transport'] or row['cnt_leisure'] or row['cnt_otherplace']
# And then create the new column
Contact['cnt_community'] = Contact.apply(Community, axis=1)
# We also add a column with the source age bracket. First step is to define age bracket value
def Agesource(row):
ID = row['part_id']
year = float(Part[Part['part_id'] ==ID]['part_age'])
if year < 25:
return 'Y'
elif year < 65:
return 'M'
else:
return 'O'
# Then create the new source column
Contact['age_source'] = Contact.apply(Agesource, axis=1)
# Columns function for target age bracket
def Agetarget(row):
ID = row['cont_id']
year = float(Contact[Contact['cont_id'] ==ID]['cnt_age_exact'])
if year < 25:
return 'Y'
elif year < 65:
return 'M'
else:
return 'O'
# Add the new target column
Contact['age_target'] = Contact.apply(Agetarget, axis=1)
# Specify AAL categories # Specify AAL categories
Ages = ['Y', 'M', 'O'] Ages = ['Y', 'M', 'O']
SymAge = [('Y','Y'), ('Y', 'M'), ('Y', 'O'), ('M', 'M'),('M', 'O'), ('O','O')] Locales = ['home', 'workschool', 'rest', 'community']
Locales = ['home', 'workschool', 'rest']
# Dictionary separating contact data by AAL
# define row function for symage
def symage_row(row):
agesrc = row['age_source']
agetar = row['age_target']
for symage in SymAge:
#x, y = symage[0], symage[1]
if symage == (agesrc, agetar) or symage == (agetar, agesrc):
return symage
Contact['symage'] = Contact.apply(symage_row, axis=1)
# Dictionary separating contact data by symage-location
ContAAL = {} ContAAL = {}
columns = ['part_id', 'cont_id', 'age_source', 'age_target', for x, y, z in list(product(Ages,Ages,Locales)):
'duration_multi', 'frequency_multi'] ContAAL[(x,y,z)] = Contact[Contact['cnt_'+z]==True][Contact['age_source']== x][Contact['age_target']==y]
for symage, loc in list(product(SymAge,Locales)):
x,y,z = symage[0], symage[1], loc
C = Contact[Contact['cnt_'+z] == True][Contact['symage']==symage]
ContAAL[(x,y,z)] = C[columns].copy(deep=True)
# Duration distributions stratified by SymAge-location
# Duration distributions stratified by AAL
DurFreqAAL = {} DurFreqAAL = {}
for symage, loc in list(product(SymAge,Locales)): for x, y, z in list(product(Ages,Ages, Locales)):
x,y,z = symage[0], symage[1], loc
C = ContAAL[(x,y,z)] C = ContAAL[(x,y,z)]
#D = pd.DataFrame([[ID, C[C['part_id']==ID]['duration_multi']] for ID in PartsAge[x]], columns = ["ID", "Dur"])
Durlist = [C[C['duration_multi']==i]["duration_multi"].count() for i in range(1,6)] Durlist = [C[C['duration_multi']==i]["duration_multi"].count() for i in range(1,6)]
DurFreqAAL[(x,y,z)] = Durlist/np.sum(Durlist) DurFreqAAL[(x,y,z)] = Durlist/np.sum(Durlist)
# Save to csv
#with open('AALDur_data.csv', 'w') as csv_file:
# writer = csv.writer(csv_file)
# for key, value in DurFreqAAL.items():
# writer.writerow([key, value[0], value[1], value[2], value[3], value[4]])
# Define error functions for poisson random variables # Define error functions for poisson random variables
durcutoff = {'home': 6*16, 'workschool': 6*8, 'rest': 6*8} def PoisArray(lam):
def PoisArray(lam, loc): arr = [math.exp(-lam + k*math.log(lam) - np.sum([math.log(n) for n in range(1,k+1)]) ) for k in range(145)]
arr = [math.exp(-lam + k*math.log(lam) - np.sum([math.log(n) for n in range(1,k+1)]) ) return [math.exp(-lam)] + arr
for k in range(durcutoff[loc])]
return arr/np.sum(arr) def PoisBin(lam):
Arr = PoisArray(lam)
def PoisBin(lam,loc):
Arr = PoisArray(lam,loc)
out = [Arr[0], Arr[1], np.sum(Arr[2:6]), np.sum(Arr[6:24]), np.sum(Arr[24:])] out = [Arr[0], Arr[1], np.sum(Arr[2:6]), np.sum(Arr[6:24]), np.sum(Arr[24:])]
return out/np.sum(out) return out/np.sum(out)
def PoisErr(lam, label): # label is SymAge-Location def PoisErr(lam, label):
agesrc, agetar, loc = label err = np.sum((PoisBin(lam) - DurFreqAAL[label])**2)
err = np.sum((PoisBin(lam, loc) - DurFreqAAL[label])**2)
return err return err
PoisErr = np.vectorize(PoisErr, excluded=[1]) PoisErr = np.vectorize(PoisErr, excluded=[1])
# Define priors. Each lambda value is weighted inversely to the error
PoisPrior = {}
PoisPrior={} for x, y, z in list(product(Ages,Ages, Locales)):
for symage, loc in list(product(SymAge, Locales)): AAL = (x,y,z)
AAL = (symage[0],symage[1],loc) arr= [1/PoisErr(i,AAL) for i in range(1,145)]
arr = [1/PoisErr(i,AAL) for i in range(1,durcutoff[loc])] PoisPrior[AAL] = arr/np.sum(arr)
zeropad = [0 for i in range(durcutoff['home'] - durcutoff[loc])]
PoisPrior[AAL] = (arr+zeropad)/np.sum(arr)
# Save them to csv # Save them to csv
df = pd.DataFrame(list(PoisPrior.items()), columns=['col1','col2']) Priordf = pd.DataFrame.from_dict(PoisPrior, orient="index")
dfkeys = pd.DataFrame([pd.Series(x) for x in df.col1]) Priordf.to_csv("AALPoisPriors.csv")
dfkeys.columns = ["Age_in", "Age_out", "location"]
dfvals = pd.DataFrame([pd.Series(x) for x in df.col2])
dfout = dfkeys.join(dfvals)
dfout.to_csv("AALPoisPriors.csv")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment