Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
CovidAlertABM
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Peter Jentsch
CovidAlertABM
Commits
abe06b93
Commit
abe06b93
authored
4 years ago
by
Mark Penney
Browse files
Options
Downloads
Patches
Plain Diff
Upload New File
parent
1e18580d
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
IntervalsModel/network-data/POLYMOD/Duration-priors.py
+128
-0
128 additions, 0 deletions
IntervalsModel/network-data/POLYMOD/Duration-priors.py
with
128 additions
and
0 deletions
IntervalsModel/network-data/POLYMOD/Duration-priors.py
0 → 100644
+
128
−
0
View file @
abe06b93
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 2021-03-11
Extracts the duration distributions stratified by age-age-location from the
POLYMOD dataset.
Additionally, uses this duration data to deduce priors on the poisson
random variables controlling individual contact durations
@author: mark
"""
import
numpy
as
np
import
pandas
as
pd
import
csv
import
math
from
itertools
import
product
# To start, let's load the two files: The contact and participant data
Contact
=
pd
.
read_csv
(
"
2008_Mossong_POLYMOD_contact_common.csv
"
)
Part
=
pd
.
read_csv
(
"
2008_Mossong_POLYMOD_participant_common.csv
"
)
# Delete contacts missing age data
Contact
=
Contact
.
dropna
(
subset
=
[
'
cnt_age_exact
'
])
# Delete contacts missing location data
def
Loctest
(
row
):
emptycheck
=
row
[
'
cnt_home
'
]
or
row
[
'
cnt_work
'
]
or
row
[
'
cnt_school
'
]
or
row
[
'
cnt_transport
'
]
or
row
[
'
cnt_leisure
'
]
or
row
[
'
cnt_otherplace
'
]
if
emptycheck
==
False
:
return
1
else
:
return
0
Contact
[
'
Loctest
'
]
=
Contact
.
apply
(
Loctest
,
axis
=
1
)
Contact
=
Contact
[
Contact
[
'
Loctest
'
]
==
0
]
# Next, we aggregate the locations
# Define the function computing WorkSchool value from remaining entries
def
WorkSchool
(
row
):
return
row
[
'
cnt_work
'
]
or
row
[
'
cnt_school
'
]
# Create the new column
Contact
[
'
cnt_workschool
'
]
=
Contact
.
apply
(
WorkSchool
,
axis
=
1
)
# Define the function computed Rest value from remaining entries
def
Rest
(
row
):
return
row
[
'
cnt_transport
'
]
or
row
[
'
cnt_leisure
'
]
or
row
[
'
cnt_otherplace
'
]
# And then create the new column
Contact
[
'
cnt_rest
'
]
=
Contact
.
apply
(
Rest
,
axis
=
1
)
# We also add a column with the source age bracket. First step is to define age bracket value
def
Agesource
(
row
):
ID
=
row
[
'
part_id
'
]
year
=
float
(
Part
[
Part
[
'
part_id
'
]
==
ID
][
'
part_age
'
])
if
year
<
25
:
return
'
Y
'
elif
year
<
65
:
return
'
M
'
else
:
return
'
O
'
# Then create the new source column
Contact
[
'
age_source
'
]
=
Contact
.
apply
(
Agesource
,
axis
=
1
)
# Columns function for target age bracket
def
Agetarget
(
row
):
ID
=
row
[
'
cont_id
'
]
year
=
float
(
Contact
[
Contact
[
'
cont_id
'
]
==
ID
][
'
cnt_age_exact
'
])
if
year
<
25
:
return
'
Y
'
elif
year
<
65
:
return
'
M
'
else
:
return
'
O
'
# Add the new target column
Contact
[
'
age_target
'
]
=
Contact
.
apply
(
Agetarget
,
axis
=
1
)
# Specify AAL categories
Ages
=
[
'
Y
'
,
'
M
'
,
'
O
'
]
Locales
=
[
'
home
'
,
'
workschool
'
,
'
rest
'
]
# Dictionary separating contact data by AAL
ContAAL
=
{}
for
x
,
y
,
z
in
list
(
product
(
Ages
,
Ages
,
Locales
)):
ContAAL
[(
x
,
y
,
z
)]
=
Contact
[
Contact
[
'
cnt_
'
+
z
]
==
True
][
Contact
[
'
age_source
'
]
==
x
][
Contact
[
'
age_target
'
]
==
y
]
# Duration distributions stratified by AAL
DurFreqAAL
=
{}
for
x
,
y
,
z
in
list
(
product
(
Ages
,
Ages
,
Locales
)):
C
=
ContAAL
[(
x
,
y
,
z
)]
#D = pd.DataFrame([[ID, C[C['part_id']==ID]['duration_multi']] for ID in PartsAge[x]], columns = ["ID", "Dur"])
Durlist
=
[
C
[
C
[
'
duration_multi
'
]
==
i
][
"
duration_multi
"
].
count
()
for
i
in
range
(
1
,
6
)]
DurFreqAAL
[(
x
,
y
,
z
)]
=
Durlist
/
np
.
sum
(
Durlist
)
# Save to csv
#with open('AALDur_data.csv', 'w') as csv_file:
# writer = csv.writer(csv_file)
# for key, value in DurFreqAAL.items():
# writer.writerow([key, value[0], value[1], value[2], value[3], value[4]])
# Define error functions for poisson random variables
def
PoisArray
(
lam
):
arr
=
[
math
.
exp
(
-
lam
+
k
*
math
.
log
(
lam
)
-
np
.
sum
([
math
.
log
(
n
)
for
n
in
range
(
1
,
k
+
1
)])
)
for
k
in
range
(
145
)]
return
[
math
.
exp
(
-
lam
)]
+
arr
def
PoisBin
(
lam
):
Arr
=
PoisArray
(
lam
)
out
=
[
Arr
[
0
],
Arr
[
1
],
np
.
sum
(
Arr
[
2
:
6
]),
np
.
sum
(
Arr
[
6
:
24
]),
np
.
sum
(
Arr
[
24
:])]
return
out
/
np
.
sum
(
out
)
def
PoisErr
(
lam
,
label
):
err
=
np
.
sum
((
PoisBin
(
lam
)
-
DurFreqAAL
[
label
])
**
2
)
return
err
PoisErr
=
np
.
vectorize
(
PoisErr
,
excluded
=
[
1
])
# Define priors. Each lambda value is weighted inversely to the error
PoisPrior
=
{}
for
x
,
y
,
z
in
list
(
product
(
Ages
,
Ages
,
Locales
)):
AAL
=
(
x
,
y
,
z
)
arr
=
[
1
/
PoisErr
(
i
,
AAL
)
for
i
in
range
(
1
,
145
)]
PoisPrior
[
AAL
]
=
arr
/
np
.
sum
(
arr
)
# Save them to csv
Priordf
=
pd
.
DataFrame
.
from_dict
(
PoisPrior
,
orient
=
"
index
"
)
Priordf
.
to_csv
(
"
AALPoisPriors.csv
"
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment