diff --git a/IntervalsModel/network-data/POLYMOD/Duration-priors.py b/IntervalsModel/network-data/POLYMOD/Duration-priors.py new file mode 100644 index 0000000000000000000000000000000000000000..59d70f6131d7875f08fb6a30a37b26baf4f9dd82 --- /dev/null +++ b/IntervalsModel/network-data/POLYMOD/Duration-priors.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on 2021-03-11 + +Extracts the duration distributions stratified by age-age-location from the +POLYMOD dataset. + +Additionally, uses this duration data to deduce priors on the poisson +random variables controlling individual contact durations + +@author: mark +""" +import numpy as np +import pandas as pd +import csv +import math +from itertools import product + +# To start, let's load the two files: The contact and participant data +Contact = pd.read_csv("2008_Mossong_POLYMOD_contact_common.csv") +Part = pd.read_csv("2008_Mossong_POLYMOD_participant_common.csv") + +# Delete contacts missing age data +Contact = Contact.dropna(subset=['cnt_age_exact']) + +# Delete contacts missing location data +def Loctest(row): + emptycheck = row['cnt_home'] or row['cnt_work'] or row['cnt_school'] or row['cnt_transport'] or row['cnt_leisure'] or row['cnt_otherplace'] + if emptycheck == False: + return 1 + else: + return 0 +Contact['Loctest'] = Contact.apply(Loctest, axis=1) +Contact = Contact[Contact['Loctest']==0] + +# Next, we aggregate the locations +# Define the function computing WorkSchool value from remaining entries +def WorkSchool(row): + return row['cnt_work'] or row['cnt_school'] +# Create the new column +Contact['cnt_workschool'] = Contact.apply(WorkSchool, axis=1) + +# Define the function computed Rest value from remaining entries +def Rest(row): + return row['cnt_transport'] or row['cnt_leisure'] or row['cnt_otherplace'] +# And then create the new column +Contact['cnt_rest'] = Contact.apply(Rest, axis=1) + +# We also add a column with the source age bracket. First step is to define age bracket value +def Agesource(row): + ID = row['part_id'] + year = float(Part[Part['part_id'] ==ID]['part_age']) + if year < 25: + return 'Y' + elif year < 65: + return 'M' + else: + return 'O' +# Then create the new source column +Contact['age_source'] = Contact.apply(Agesource, axis=1) + +# Columns function for target age bracket +def Agetarget(row): + ID = row['cont_id'] + year = float(Contact[Contact['cont_id'] ==ID]['cnt_age_exact']) + if year < 25: + return 'Y' + elif year < 65: + return 'M' + else: + return 'O' +# Add the new target column +Contact['age_target'] = Contact.apply(Agetarget, axis=1) + + +# Specify AAL categories +Ages = ['Y', 'M', 'O'] +Locales = ['home', 'workschool', 'rest'] + +# Dictionary separating contact data by AAL +ContAAL = {} +for x, y, z in list(product(Ages,Ages,Locales)): + ContAAL[(x,y,z)] = Contact[Contact['cnt_'+z]==True][Contact['age_source']== x][Contact['age_target']==y] + + + +# Duration distributions stratified by AAL +DurFreqAAL = {} +for x, y, z in list(product(Ages,Ages, Locales)): + C = ContAAL[(x,y,z)] + #D = pd.DataFrame([[ID, C[C['part_id']==ID]['duration_multi']] for ID in PartsAge[x]], columns = ["ID", "Dur"]) + Durlist = [C[C['duration_multi']==i]["duration_multi"].count() for i in range(1,6)] + DurFreqAAL[(x,y,z)] = Durlist/np.sum(Durlist) + +# Save to csv +#with open('AALDur_data.csv', 'w') as csv_file: +# writer = csv.writer(csv_file) +# for key, value in DurFreqAAL.items(): +# writer.writerow([key, value[0], value[1], value[2], value[3], value[4]]) + + +# Define error functions for poisson random variables +def PoisArray(lam): + arr = [math.exp(-lam + k*math.log(lam) - np.sum([math.log(n) for n in range(1,k+1)]) ) for k in range(145)] + return [math.exp(-lam)] + arr + +def PoisBin(lam): + Arr = PoisArray(lam) + out = [Arr[0], Arr[1], np.sum(Arr[2:6]), np.sum(Arr[6:24]), np.sum(Arr[24:])] + return out/np.sum(out) + +def PoisErr(lam, label): + err = np.sum((PoisBin(lam) - DurFreqAAL[label])**2) + return err +PoisErr = np.vectorize(PoisErr, excluded=[1]) + +# Define priors. Each lambda value is weighted inversely to the error +PoisPrior = {} +for x, y, z in list(product(Ages,Ages, Locales)): + AAL = (x,y,z) + arr= [1/PoisErr(i,AAL) for i in range(1,145)] + PoisPrior[AAL] = arr/np.sum(arr) + +# Save them to csv +Priordf = pd.DataFrame.from_dict(PoisPrior, orient="index") +Priordf.to_csv("AALPoisPriors.csv") + \ No newline at end of file