Upload New File

abe06b93 · Mark Penney · 1e18580d · abe06b93
Commit abe06b93 authored 4 years ago by Mark Penney
--- a/IntervalsModel/network-data/POLYMOD/Duration-priors.py
+++ b/IntervalsModel/network-data/POLYMOD/Duration-priors.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on 2021-03-11
+
+Extracts the duration distributions stratified by age-age-location from the
+POLYMOD dataset. 
+
+Additionally, uses this duration data to deduce priors on the poisson
+random variables controlling individual contact durations
+
+@author: mark
+"""
+import numpy as np
+import pandas as pd
+import csv
+import math
+from itertools import product
+
+# To start, let's load the two files: The contact and participant data
+Contact = pd.read_csv("2008_Mossong_POLYMOD_contact_common.csv")
+Part = pd.read_csv("2008_Mossong_POLYMOD_participant_common.csv")
+
+# Delete contacts missing age data
+Contact = Contact.dropna(subset=['cnt_age_exact'])
+
+# Delete contacts missing location data
+def Loctest(row):
+    emptycheck = row['cnt_home'] or row['cnt_work'] or row['cnt_school'] or row['cnt_transport'] or row['cnt_leisure'] or row['cnt_otherplace']
+    if emptycheck == False:
+        return 1
+    else:
+        return 0
+Contact['Loctest'] = Contact.apply(Loctest, axis=1)
+Contact = Contact[Contact['Loctest']==0]
+
+# Next, we aggregate the locations
+# Define the function computing WorkSchool value from remaining entries
+def WorkSchool(row):
+    return row['cnt_work'] or row['cnt_school']
+# Create the new column
+Contact['cnt_workschool'] = Contact.apply(WorkSchool, axis=1)
+
+# Define the function computed Rest value from remaining entries
+def Rest(row):
+    return row['cnt_transport'] or row['cnt_leisure'] or row['cnt_otherplace']
+# And then create the new column
+Contact['cnt_rest'] = Contact.apply(Rest, axis=1)
+
+# We also add a column with the source age bracket. First step is to define age bracket value
+def Agesource(row):
+    ID = row['part_id']
+    year = float(Part[Part['part_id'] ==ID]['part_age'])
+    if year < 25:
+        return 'Y'
+    elif year < 65:
+        return 'M'
+    else:
+        return 'O'
+# Then create the new source column
+Contact['age_source'] = Contact.apply(Agesource, axis=1)
+
+# Columns function for target age bracket
+def Agetarget(row):
+    ID = row['cont_id']
+    year = float(Contact[Contact['cont_id'] ==ID]['cnt_age_exact'])
+    if year < 25:
+        return 'Y'
+    elif year < 65:
+        return 'M'
+    else:
+        return 'O'
+# Add the new target column
+Contact['age_target'] = Contact.apply(Agetarget, axis=1)
+
+
+# Specify AAL categories
+Ages = ['Y', 'M', 'O']
+Locales = ['home', 'workschool', 'rest']
+
+# Dictionary separating contact data by AAL
+ContAAL = {}
+for x, y, z in list(product(Ages,Ages,Locales)):
+    ContAAL[(x,y,z)] = Contact[Contact['cnt_'+z]==True][Contact['age_source']== x][Contact['age_target']==y]  
+
+
+
+# Duration distributions stratified by AAL
+DurFreqAAL = {}
+for x, y, z in list(product(Ages,Ages, Locales)):
+    C = ContAAL[(x,y,z)]
+    #D = pd.DataFrame([[ID, C[C['part_id']==ID]['duration_multi']] for ID in PartsAge[x]], columns = ["ID", "Dur"])
+    Durlist = [C[C['duration_multi']==i]["duration_multi"].count() for i in range(1,6)]
+    DurFreqAAL[(x,y,z)] = Durlist/np.sum(Durlist)
+    
+# Save to csv
+#with open('AALDur_data.csv', 'w') as csv_file:  
+#    writer = csv.writer(csv_file)
+#    for key, value in DurFreqAAL.items():
+#       writer.writerow([key, value[0], value[1], value[2], value[3], value[4]])
+       
+       
+# Define error functions for poisson random variables
+def PoisArray(lam):
+    arr = [math.exp(-lam + k*math.log(lam) - np.sum([math.log(n) for n in range(1,k+1)]) ) for k in range(145)]
+    return [math.exp(-lam)] + arr
+
+def PoisBin(lam):
+    Arr = PoisArray(lam)
+    out = [Arr[0], Arr[1], np.sum(Arr[2:6]), np.sum(Arr[6:24]), np.sum(Arr[24:])]
+    return out/np.sum(out)
+
+def PoisErr(lam, label):
+    err = np.sum((PoisBin(lam) - DurFreqAAL[label])**2)
+    return err
+PoisErr = np.vectorize(PoisErr, excluded=[1])
+
+# Define priors. Each lambda value is weighted inversely to the error
+PoisPrior = {}
+for x, y, z in list(product(Ages,Ages, Locales)):
+    AAL = (x,y,z)
+    arr= [1/PoisErr(i,AAL) for i in range(1,145)]
+    PoisPrior[AAL] = arr/np.sum(arr)
+    
+# Save them to csv
+Priordf = pd.DataFrame.from_dict(PoisPrior, orient="index")
+Priordf.to_csv("AALPoisPriors.csv")
+    
\ No newline at end of file