Skip to content
Snippets Groups Projects
Commit abe06b93 authored by Mark Penney's avatar Mark Penney
Browse files

Upload New File

parent 1e18580d
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 2021-03-11
Extracts the duration distributions stratified by age-age-location from the
POLYMOD dataset.
Additionally, uses this duration data to deduce priors on the poisson
random variables controlling individual contact durations
@author: mark
"""
import numpy as np
import pandas as pd
import csv
import math
from itertools import product
# To start, let's load the two files: The contact and participant data
Contact = pd.read_csv("2008_Mossong_POLYMOD_contact_common.csv")
Part = pd.read_csv("2008_Mossong_POLYMOD_participant_common.csv")
# Delete contacts missing age data
Contact = Contact.dropna(subset=['cnt_age_exact'])
# Delete contacts missing location data
def Loctest(row):
emptycheck = row['cnt_home'] or row['cnt_work'] or row['cnt_school'] or row['cnt_transport'] or row['cnt_leisure'] or row['cnt_otherplace']
if emptycheck == False:
return 1
else:
return 0
Contact['Loctest'] = Contact.apply(Loctest, axis=1)
Contact = Contact[Contact['Loctest']==0]
# Next, we aggregate the locations
# Define the function computing WorkSchool value from remaining entries
def WorkSchool(row):
return row['cnt_work'] or row['cnt_school']
# Create the new column
Contact['cnt_workschool'] = Contact.apply(WorkSchool, axis=1)
# Define the function computed Rest value from remaining entries
def Rest(row):
return row['cnt_transport'] or row['cnt_leisure'] or row['cnt_otherplace']
# And then create the new column
Contact['cnt_rest'] = Contact.apply(Rest, axis=1)
# We also add a column with the source age bracket. First step is to define age bracket value
def Agesource(row):
ID = row['part_id']
year = float(Part[Part['part_id'] ==ID]['part_age'])
if year < 25:
return 'Y'
elif year < 65:
return 'M'
else:
return 'O'
# Then create the new source column
Contact['age_source'] = Contact.apply(Agesource, axis=1)
# Columns function for target age bracket
def Agetarget(row):
ID = row['cont_id']
year = float(Contact[Contact['cont_id'] ==ID]['cnt_age_exact'])
if year < 25:
return 'Y'
elif year < 65:
return 'M'
else:
return 'O'
# Add the new target column
Contact['age_target'] = Contact.apply(Agetarget, axis=1)
# Specify AAL categories
Ages = ['Y', 'M', 'O']
Locales = ['home', 'workschool', 'rest']
# Dictionary separating contact data by AAL
ContAAL = {}
for x, y, z in list(product(Ages,Ages,Locales)):
ContAAL[(x,y,z)] = Contact[Contact['cnt_'+z]==True][Contact['age_source']== x][Contact['age_target']==y]
# Duration distributions stratified by AAL
DurFreqAAL = {}
for x, y, z in list(product(Ages,Ages, Locales)):
C = ContAAL[(x,y,z)]
#D = pd.DataFrame([[ID, C[C['part_id']==ID]['duration_multi']] for ID in PartsAge[x]], columns = ["ID", "Dur"])
Durlist = [C[C['duration_multi']==i]["duration_multi"].count() for i in range(1,6)]
DurFreqAAL[(x,y,z)] = Durlist/np.sum(Durlist)
# Save to csv
#with open('AALDur_data.csv', 'w') as csv_file:
# writer = csv.writer(csv_file)
# for key, value in DurFreqAAL.items():
# writer.writerow([key, value[0], value[1], value[2], value[3], value[4]])
# Define error functions for poisson random variables
def PoisArray(lam):
arr = [math.exp(-lam + k*math.log(lam) - np.sum([math.log(n) for n in range(1,k+1)]) ) for k in range(145)]
return [math.exp(-lam)] + arr
def PoisBin(lam):
Arr = PoisArray(lam)
out = [Arr[0], Arr[1], np.sum(Arr[2:6]), np.sum(Arr[6:24]), np.sum(Arr[24:])]
return out/np.sum(out)
def PoisErr(lam, label):
err = np.sum((PoisBin(lam) - DurFreqAAL[label])**2)
return err
PoisErr = np.vectorize(PoisErr, excluded=[1])
# Define priors. Each lambda value is weighted inversely to the error
PoisPrior = {}
for x, y, z in list(product(Ages,Ages, Locales)):
AAL = (x,y,z)
arr= [1/PoisErr(i,AAL) for i in range(1,145)]
PoisPrior[AAL] = arr/np.sum(arr)
# Save them to csv
Priordf = pd.DataFrame.from_dict(PoisPrior, orient="index")
Priordf.to_csv("AALPoisPriors.csv")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment