Commit 24e8534d authored by Mark Penney's avatar Mark Penney
Browse files

Upload New File

parent 4dbce2a6
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 2021-02-08
Short script to organize the community duration data from the timeuse survey.
Community = non-household
Aggregates reported durations according to age bracket of respondent
Duration keys 206, 207 and 209 must be aggregated using the expected coverage
derived from the Intervals model.
@author: mark
"""
import numpy as np
import pandas as pd
import Intervals_Model as INT
import itertools
def Flatten(A):
return list(itertools.chain.from_iterable(A))
# load the full dataset.
Maindf = pd.read_csv("../gss-89M0034-E-2015-c-29-main_F2.csv")
#create the community dataframe
commdf = Maindf[['PUMFID','WGHT_PER', 'AGEGR10', 'DURS206', 'DURS207',
'DURS208', 'DURS209']].copy()
# Function to add age bracket of respondent
def BRACADD(row):
age = row['AGEGR10']
if age ==1 :
return 'Y'
elif age < 6:
return 'M'
else:
return 'O'
# Function to add the aggregated duration
durkeys = ['DURS206', 'DURS207', 'DURS208', 'DURS209']
def DURSADD(row):
durs = row[durkeys].to_numpy()
rounded = [int(np.around(durs[i]/10)) for i in range(len(durs))]
out = np.round(np.average(INT.TotDurSample(50, [60,12], rounded)))
return out
def COMBINED(row):
return pd.Series(BRACADD(row)).append(pd.Series(DURSADD(row)), ignore_index=True)
commdf[['AGERESP', 'DUR']] = commdf.apply(COMBINED, axis=1)
# Then slice the data by age
Brac = ('Y', 'M', 'O')
CData = {}
for x in Brac:
DF = commdf
CData[x] = DF[DF['AGERESP']==x][['PUMFID','WGHT_PER', 'DUR']]
# Save to csv
for x in Brac:
Datadf = pd.DataFrame(CData[x])
Datadf.to_csv('CData'+x+'.csv', index=False)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment