Update Duration-priors.py to symmetrize age brackets.

90cbd061 · Mark Penney · 7a01b27c · 90cbd061
Commit 90cbd061 authored 4 years ago by Mark Penney
--- a/IntervalsModel/network-data/POLYMOD/Duration-priors.py
+++ b/IntervalsModel/network-data/POLYMOD/Duration-priors.py
@@ -3,8 +3,8 @@
 """
 Created on 2021-03-11
-Extracts the duration distributions stratified by age-age-location from the
+Extracts the coarse duration distributions stratified by symmetric age 
-POLYMOD dataset. 
+and location from the POLYMOD dataset. 
 Additionally, uses this duration data to deduce priors on the poisson
 random variables controlling individual contact durations
@@ -13,7 +13,6 @@ random variables controlling individual contact durations
 """
 import numpy as np
 import pandas as pd
-import csv
 import math
 from itertools import product
@@ -92,17 +91,14 @@ for x, y, z in list(product(Ages,Ages, Locales)):
    #D = pd.DataFrame([[ID, C[C['part_id']==ID]['duration_multi']] for ID in PartsAge[x]], columns = ["ID", "Dur"])
    Durlist = [C[C['duration_multi']==i]["duration_multi"].count() for i in range(1,6)]
    DurFreqAAL[(x,y,z)] = Durlist/np.sum(Durlist)
-# Save to csv
-#with open('AALDur_data.csv', 'w') as csv_file:  
-#    writer = csv.writer(csv_file)
-#    for key, value in DurFreqAAL.items():
-#       writer.writerow([key, value[0], value[1], value[2], value[3], value[4]])
 # Define error functions for poisson random variables
+durcutoff = 6*16
 def PoisArray(lam):
-    arr = [math.exp(-lam + k*math.log(lam) - np.sum([math.log(n) for n in range(1,k+1)]) ) for k in range(145)]
+    arr = [math.exp(-lam + k*math.log(lam) - np.sum([math.log(n) for n in range(1,k+1)]) ) 
+           for k in range(durcutoff)]
    return [math.exp(-lam)] + arr
 def PoisBin(lam):
@@ -110,16 +106,18 @@ def PoisBin(lam):
    out = [Arr[0], Arr[1], np.sum(Arr[2:6]), np.sum(Arr[6:24]), np.sum(Arr[24:])]
    return out/np.sum(out)
-def PoisErr(lam, label):
+def PoisErr(lam, label): # label is SymAge-Location
+    agesrc, agetar, loc = label
    err = np.sum((PoisBin(lam) - DurFreqAAL[label])**2)
-    return err
+    errop = np.sum((PoisBin(lam) - DurFreqAAL[(agetar, agesrc, loc)])**2)
+    return err+errop
 PoisErr = np.vectorize(PoisErr, excluded=[1])
-# Define priors. Each lambda value is weighted inversely to the error
+PoisPrior={}
-PoisPrior = {}
+SymAge = [('Y','Y'), ('Y', 'M'), ('Y', 'O'), ('M', 'M'),('M', 'O'), ('O','O')]
-for x, y, z in list(product(Ages,Ages, Locales)):
+for symage, loc in list(product(SymAge, Locales)):
-    AAL = (x,y,z)
+    AAL = (symage[0],symage[1],loc)
-    arr= [1/PoisErr(i,AAL) for i in range(1,145)]
+    arr = [1/PoisErr(i,AAL) for i in range(1,durcutoff)]
    PoisPrior[AAL] = arr/np.sum(arr)
 # Save them to csv
@@ -129,4 +127,4 @@ dfkeys.columns = ["Age_in", "Age_out", "location"]
 dfvals = pd.DataFrame([pd.Series(x) for x in df.col2])
 dfout = dfkeys.join(dfvals)
 dfout.to_csv("AALPoisPriors.csv")
\ No newline at end of file