Skip to content
Snippets Groups Projects
Commit 589d4ac2 authored by Mark Penney's avatar Mark Penney
Browse files

Upload New File

parent b6773ffb
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 2020-02-01
Here we fit a model of individual contact durations, with Age-Age
stratification, based on the HHYMO data.
Individual contact durations are assumed to be normal.
The prediction error that we seek to minimise is determined as follows:
- For each respondent, we use their number of contacts to compute their
expected coverage in the intervals model.
- The per respondent error is the difference between their reported value
and the expected one computed.
@author: mark
"""
import numpy as np
import pandas as pd
from numpy import random
import Intervals_Model as INT
import itertools
from scipy import optimize
HHYMO = pd.read_csv("HHYMO.csv")
# Set the underlying parameters for the intervals model
Sparam = [60,12]
# Set parameters for intervals sample and subpopulation size
numsamples = 10
subsize = 500
# Swap age brackets for numbers
swap={'Y':0, 'M':1, 'O':2}
# Total weight in survey
Wghttotal = HHYMO['WGHT_PER'].sum()
# Error between expected coverage from model params and reported coverage
def ExpErrorNORM(row, *args):
MU, SIGMA = args # each is a 3x3 matrix
age = swap[row['AGERESP']]
# Sample durations for respondents contacts
Ydurs = random.normal(MU[age][0],SIGMA[age][0], size = int(row['YNUM'])).astype(int)
Mdurs = random.normal(MU[age][1],SIGMA[age][1], size = int(row['MNUM'])).astype(int)
Odurs = random.normal(MU[age][2],SIGMA[age][2], size = int(row['ONUM'])).astype(int)
# Compute expected total duration
Yexp = np.average(INT.TotDurSample(numsamples, Sparam, Ydurs))
Mexp = np.average(INT.TotDurSample(numsamples, Sparam, Mdurs))
Oexp = np.average(INT.TotDurSample(numsamples, Sparam, Odurs))
# Compute total error
err = (row['WGHT_PER']/Wghttotal)*((Yexp - row['YDUR'+str(Sparam[1])])**2
+(Mexp - row['MDUR'+str(Sparam[1])])**2
+ (Oexp - row['ODUR'+str(Sparam[1])])**2)
return err
# Poisson variant of the above
def ExpErrorPOIS(row, *args):
LAM = np.array(args) # a 3x3 matrix
LAM = LAM.reshape((3,3))
age = swap[row['AGERESP']]
# Sample durations for respondents contacts
Ydurs = random.poisson(LAM[age][0], size = int(row['YNUM'])).astype(int)
Mdurs = random.poisson(LAM[age][1], size = int(row['MNUM'])).astype(int)
Odurs = random.poisson(LAM[age][2], size = int(row['ONUM'])).astype(int)
# Compute expected total duration
Yexp = np.average(INT.TotDurSample(numsamples, Sparam, Ydurs))
Mexp = np.average(INT.TotDurSample(numsamples, Sparam, Mdurs))
Oexp = np.average(INT.TotDurSample(numsamples, Sparam, Odurs))
# Compute total error
err = (row['WGHT_PER']/Wghttotal)*((Yexp - row['YDUR'+str(Sparam[1])])**2
+(Mexp - row['MDUR'+str(Sparam[1])])**2
+ (Oexp - row['ODUR'+str(Sparam[1])])**2)
return err
# Compute error between expected and reported coverage
# params is flattened and concatenated MU and Sigma
# To reduce run time we only evaluate the error on random subset
# of the respondents
def ErrfunNORM(params):
MU = [[params[0], params[1], params[2]],
[params[1], params[3], params[4]],
[params[2], params[4], params[5]]]
SIGMA = [[params[6], params[7], params[8]],
[params[7], params[9], params[10]],
[params[8], params[10], params[11]]]
#SIGMA = params[9:].reshape((3,3))
# Select subset of respondents
IDs = list(HHYMO["PUMFID"])
random.shuffle(IDs)
sub = IDs[:subsize]
# Dataframe for sub population
Bools = HHYMO["PUMFID"].isin(sub)
Subdf = HHYMO[Bools]
# Compute error on subpopulation
Errcol = Subdf.apply(ExpErrorNORM, axis=1, args=[MU, SIGMA])
return Errcol.sum()/subsize
# Poisson variant of the above
def ErrfunPOIS(params):
#LAM = params.reshape((3,3))
LAM= [[params[0], params[1], params[2]],
[params[1], params[3], params[4]],
[params[2], params[4], params[5]]]
# Select subset of respondents
IDs = list(HHYMO["PUMFID"])
random.shuffle(IDs)
sub = IDs[:subsize]
# Dataframe for sub population
Bools = HHYMO["PUMFID"].isin(sub)
Subdf = HHYMO[Bools]
# Compute error on subpopulation
Errcol = Subdf.apply(ExpErrorPOIS, axis=1, args=[LAM])
return Errcol.sum()/subsize
# Set parameter bounds for fitting
MUbounds = (6,12*6)
SIGMAbounds = (1,48)
BoundsNORM = [MUbounds for i in range(6)] + [SIGMAbounds for i in range(6)]
BoundsPOIS = [MUbounds for i in range(6)]
# Run both normal and poisson fitting
numruns = 5
NORMSave = {}
POISSave = {}
for i in range(numruns):
NORMFIT = optimize.differential_evolution(ErrfunNORM, bounds = BoundsNORM,
disp = True, maxiter = 175,
popsize = 10)
POISFIT = optimize.differential_evolution(ErrfunPOIS, bounds=BoundsPOIS,
disp = True, maxiter = 150,
popsize = 10)
NORMSave[i] = list(NORMFIT.get('x')) + [float(NORMFIT.get('fun'))]
POISSave[i] = list(POISFIT.get('x')) + [float(POISFIT.get('fun'))]
pd.DataFrame.from_dict(data=NORMSave, orient='index').to_csv('NormalFit-Feb2.csv', header=False)
pd.DataFrame.from_dict(data=POISSave, orient='index').to_csv('PoissonFit-Feb2.csv', header=False)
#pd.DataFrame(durparamsfit+errorfit).to_csv("HHNormalFit.csv", index=False)
# Poisson fitting
#POISFIT = optimize.differential_evolution(ErrfunPOIS, bounds=BoundsPOIS,
#disp = True, maxiter = 150,
#popsize = 10)
#durparamsfit = list(POISFIT.get('x'))
#errorfit = [float(POISFIT.get('fun'))]
#pd.DataFrame(durparamsfit+errorfit).to_csv("HHPoissonFit.csv", index=False)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment