Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
CovidAlertABM
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Peter Jentsch
CovidAlertABM
Commits
589d4ac2
Commit
589d4ac2
authored
4 years ago
by
Mark Penney
Browse files
Options
Downloads
Patches
Plain Diff
Upload New File
parent
b6773ffb
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
network-data/Timeuse/HH/Duration-fitting.py
+147
-0
147 additions, 0 deletions
network-data/Timeuse/HH/Duration-fitting.py
with
147 additions
and
0 deletions
network-data/Timeuse/HH/Duration-fitting.py
0 → 100644
+
147
−
0
View file @
589d4ac2
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 2020-02-01
Here we fit a model of individual contact durations, with Age-Age
stratification, based on the HHYMO data.
Individual contact durations are assumed to be normal.
The prediction error that we seek to minimise is determined as follows:
- For each respondent, we use their number of contacts to compute their
expected coverage in the intervals model.
- The per respondent error is the difference between their reported value
and the expected one computed.
@author: mark
"""
import
numpy
as
np
import
pandas
as
pd
from
numpy
import
random
import
Intervals_Model
as
INT
import
itertools
from
scipy
import
optimize
HHYMO
=
pd
.
read_csv
(
"
HHYMO.csv
"
)
# Set the underlying parameters for the intervals model
Sparam
=
[
60
,
12
]
# Set parameters for intervals sample and subpopulation size
numsamples
=
10
subsize
=
500
# Swap age brackets for numbers
swap
=
{
'
Y
'
:
0
,
'
M
'
:
1
,
'
O
'
:
2
}
# Total weight in survey
Wghttotal
=
HHYMO
[
'
WGHT_PER
'
].
sum
()
# Error between expected coverage from model params and reported coverage
def
ExpErrorNORM
(
row
,
*
args
):
MU
,
SIGMA
=
args
# each is a 3x3 matrix
age
=
swap
[
row
[
'
AGERESP
'
]]
# Sample durations for respondents contacts
Ydurs
=
random
.
normal
(
MU
[
age
][
0
],
SIGMA
[
age
][
0
],
size
=
int
(
row
[
'
YNUM
'
])).
astype
(
int
)
Mdurs
=
random
.
normal
(
MU
[
age
][
1
],
SIGMA
[
age
][
1
],
size
=
int
(
row
[
'
MNUM
'
])).
astype
(
int
)
Odurs
=
random
.
normal
(
MU
[
age
][
2
],
SIGMA
[
age
][
2
],
size
=
int
(
row
[
'
ONUM
'
])).
astype
(
int
)
# Compute expected total duration
Yexp
=
np
.
average
(
INT
.
TotDurSample
(
numsamples
,
Sparam
,
Ydurs
))
Mexp
=
np
.
average
(
INT
.
TotDurSample
(
numsamples
,
Sparam
,
Mdurs
))
Oexp
=
np
.
average
(
INT
.
TotDurSample
(
numsamples
,
Sparam
,
Odurs
))
# Compute total error
err
=
(
row
[
'
WGHT_PER
'
]
/
Wghttotal
)
*
((
Yexp
-
row
[
'
YDUR
'
+
str
(
Sparam
[
1
])])
**
2
+
(
Mexp
-
row
[
'
MDUR
'
+
str
(
Sparam
[
1
])])
**
2
+
(
Oexp
-
row
[
'
ODUR
'
+
str
(
Sparam
[
1
])])
**
2
)
return
err
# Poisson variant of the above
def
ExpErrorPOIS
(
row
,
*
args
):
LAM
=
np
.
array
(
args
)
# a 3x3 matrix
LAM
=
LAM
.
reshape
((
3
,
3
))
age
=
swap
[
row
[
'
AGERESP
'
]]
# Sample durations for respondents contacts
Ydurs
=
random
.
poisson
(
LAM
[
age
][
0
],
size
=
int
(
row
[
'
YNUM
'
])).
astype
(
int
)
Mdurs
=
random
.
poisson
(
LAM
[
age
][
1
],
size
=
int
(
row
[
'
MNUM
'
])).
astype
(
int
)
Odurs
=
random
.
poisson
(
LAM
[
age
][
2
],
size
=
int
(
row
[
'
ONUM
'
])).
astype
(
int
)
# Compute expected total duration
Yexp
=
np
.
average
(
INT
.
TotDurSample
(
numsamples
,
Sparam
,
Ydurs
))
Mexp
=
np
.
average
(
INT
.
TotDurSample
(
numsamples
,
Sparam
,
Mdurs
))
Oexp
=
np
.
average
(
INT
.
TotDurSample
(
numsamples
,
Sparam
,
Odurs
))
# Compute total error
err
=
(
row
[
'
WGHT_PER
'
]
/
Wghttotal
)
*
((
Yexp
-
row
[
'
YDUR
'
+
str
(
Sparam
[
1
])])
**
2
+
(
Mexp
-
row
[
'
MDUR
'
+
str
(
Sparam
[
1
])])
**
2
+
(
Oexp
-
row
[
'
ODUR
'
+
str
(
Sparam
[
1
])])
**
2
)
return
err
# Compute error between expected and reported coverage
# params is flattened and concatenated MU and Sigma
# To reduce run time we only evaluate the error on random subset
# of the respondents
def
ErrfunNORM
(
params
):
MU
=
[[
params
[
0
],
params
[
1
],
params
[
2
]],
[
params
[
1
],
params
[
3
],
params
[
4
]],
[
params
[
2
],
params
[
4
],
params
[
5
]]]
SIGMA
=
[[
params
[
6
],
params
[
7
],
params
[
8
]],
[
params
[
7
],
params
[
9
],
params
[
10
]],
[
params
[
8
],
params
[
10
],
params
[
11
]]]
#SIGMA = params[9:].reshape((3,3))
# Select subset of respondents
IDs
=
list
(
HHYMO
[
"
PUMFID
"
])
random
.
shuffle
(
IDs
)
sub
=
IDs
[:
subsize
]
# Dataframe for sub population
Bools
=
HHYMO
[
"
PUMFID
"
].
isin
(
sub
)
Subdf
=
HHYMO
[
Bools
]
# Compute error on subpopulation
Errcol
=
Subdf
.
apply
(
ExpErrorNORM
,
axis
=
1
,
args
=
[
MU
,
SIGMA
])
return
Errcol
.
sum
()
/
subsize
# Poisson variant of the above
def
ErrfunPOIS
(
params
):
#LAM = params.reshape((3,3))
LAM
=
[[
params
[
0
],
params
[
1
],
params
[
2
]],
[
params
[
1
],
params
[
3
],
params
[
4
]],
[
params
[
2
],
params
[
4
],
params
[
5
]]]
# Select subset of respondents
IDs
=
list
(
HHYMO
[
"
PUMFID
"
])
random
.
shuffle
(
IDs
)
sub
=
IDs
[:
subsize
]
# Dataframe for sub population
Bools
=
HHYMO
[
"
PUMFID
"
].
isin
(
sub
)
Subdf
=
HHYMO
[
Bools
]
# Compute error on subpopulation
Errcol
=
Subdf
.
apply
(
ExpErrorPOIS
,
axis
=
1
,
args
=
[
LAM
])
return
Errcol
.
sum
()
/
subsize
# Set parameter bounds for fitting
MUbounds
=
(
6
,
12
*
6
)
SIGMAbounds
=
(
1
,
48
)
BoundsNORM
=
[
MUbounds
for
i
in
range
(
6
)]
+
[
SIGMAbounds
for
i
in
range
(
6
)]
BoundsPOIS
=
[
MUbounds
for
i
in
range
(
6
)]
# Run both normal and poisson fitting
numruns
=
5
NORMSave
=
{}
POISSave
=
{}
for
i
in
range
(
numruns
):
NORMFIT
=
optimize
.
differential_evolution
(
ErrfunNORM
,
bounds
=
BoundsNORM
,
disp
=
True
,
maxiter
=
175
,
popsize
=
10
)
POISFIT
=
optimize
.
differential_evolution
(
ErrfunPOIS
,
bounds
=
BoundsPOIS
,
disp
=
True
,
maxiter
=
150
,
popsize
=
10
)
NORMSave
[
i
]
=
list
(
NORMFIT
.
get
(
'
x
'
))
+
[
float
(
NORMFIT
.
get
(
'
fun
'
))]
POISSave
[
i
]
=
list
(
POISFIT
.
get
(
'
x
'
))
+
[
float
(
POISFIT
.
get
(
'
fun
'
))]
pd
.
DataFrame
.
from_dict
(
data
=
NORMSave
,
orient
=
'
index
'
).
to_csv
(
'
NormalFit-Feb2.csv
'
,
header
=
False
)
pd
.
DataFrame
.
from_dict
(
data
=
POISSave
,
orient
=
'
index
'
).
to_csv
(
'
PoissonFit-Feb2.csv
'
,
header
=
False
)
#pd.DataFrame(durparamsfit+errorfit).to_csv("HHNormalFit.csv", index=False)
# Poisson fitting
#POISFIT = optimize.differential_evolution(ErrfunPOIS, bounds=BoundsPOIS,
#disp = True, maxiter = 150,
#popsize = 10)
#durparamsfit = list(POISFIT.get('x'))
#errorfit = [float(POISFIT.get('fun'))]
#pd.DataFrame(durparamsfit+errorfit).to_csv("HHPoissonFit.csv", index=False)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment