Commit bf95c2d5 authored by Vishwesh Patel's avatar Vishwesh Patel
Browse files

added csv preprocess script

parent 9fa7d765
dir=.
# use gnu awk to modify each csv
# IPODATAFULL
# symbol(1), day(8), daysBetterThanSP(2), marketMonthTrend(1648), market3MonthTrend(1649), market6MonthTrend(1650), marketYearTrend(1651), usaCompany(1653)
gawk -vFPAT='[^,]*|"[^"]*"' '{print $1 "," $8 "," $2 "," $1648 "," $1649 "," $1650 "," $1651 "," $1653}' $dir/IPODataFull.csv> $dir/IPODataFull_processed_G41.csv
# financial data
# symbol(1), revenue(2), revenueGrowth(3), costOfRevenue(4), grossProfit(5), r&dExpenses(6), eps(18), epsDiluted(19), dividendPersShare(22), ebitda(28), ebit(29), profitmargin(26), cashandEquivealent(33), debtGrowth(220)
gawk -vFPAT='[^,\\s]*|"[^"]*"' 'NR==FNR && NR==1 {$1="symbol"; $2="Revenue"; $3="revenueGrowth"; $4="costOfRevenue"; $5="grossProfit"; $6="r&dExpenses"; $18="eps"; $19="epsDiluted"; $22="dividendPersShare"; $28="ebitda"; $29="ebit"; $26="profitmargin"; $33="cashandEquivealent"; $220="debtGrowth"} {print $1 "," $2 "," $3 "," $4 "," $5 "," $6 "," $18 "," $19 "," $22 "," $28 "," $29 "," $26 "," $33 "," $220}' $dir/2014_Financial_Data.csv> $dir/2014_Financial_Data_processed_G41.csv
gawk -vFPAT='[^,\\s]*|"[^"]*"' 'NR==FNR && NR==1 {$1="symbol"; $2="Revenue"; $3="revenueGrowth"; $4="costOfRevenue"; $5="grossProfit"; $6="r&dExpenses"; $18="eps"; $19="epsDiluted"; $22="dividendPersShare"; $28="ebitda"; $29="ebit"; $26="profitmargin"; $33="cashandEquivealent"; $220="debtGrowth"} {print $1 "," $2 "," $3 "," $4 "," $5 "," $6 "," $18 "," $19 "," $22 "," $28 "," $29 "," $26 "," $33 "," $220}' $dir/2015_Financial_Data.csv> $dir/2015_Financial_Data_processed_G41.csv
gawk -vFPAT='[^,\\s]*|"[^"]*"' 'NR==FNR && NR==1 {$1="symbol"; $2="Revenue"; $3="revenueGrowth"; $4="costOfRevenue"; $5="grossProfit"; $6="r&dExpenses"; $18="eps"; $19="epsDiluted"; $22="dividendPersShare"; $28="ebitda"; $29="ebit"; $26="profitmargin"; $33="cashandEquivealent"; $220="debtGrowth"} {print $1 "," $2 "," $3 "," $4 "," $5 "," $6 "," $18 "," $19 "," $22 "," $28 "," $29 "," $26 "," $33 "," $220}' $dir/2016_Financial_Data.csv> $dir/2016_Financial_Data_processed_G41.csv
gawk -vFPAT='[^,\\s]*|"[^"]*"' 'NR==FNR && NR==1 {$1="symbol"; $2="Revenue"; $3="revenueGrowth"; $4="costOfRevenue"; $5="grossProfit"; $6="r&dExpenses"; $18="eps"; $19="epsDiluted"; $22="dividendPersShare"; $28="ebitda"; $29="ebit"; $26="profitmargin"; $33="cashandEquivealent"; $220="debtGrowth"} {print $1 "," $2 "," $3 "," $4 "," $5 "," $6 "," $18 "," $19 "," $22 "," $28 "," $29 "," $26 "," $33 "," $220}' $dir/2017_Financial_Data.csv> $dir/2017_Financial_Data_processed_G41.csv
gawk -vFPAT='[^,\\s]*|"[^"]*"' 'NR==FNR && NR==1 {$1="symbol"; $2="Revenue"; $3="revenueGrowth"; $4="costOfRevenue"; $5="grossProfit"; $6="r&dExpenses"; $18="eps"; $19="epsDiluted"; $22="dividendPersShare"; $28="ebitda"; $29="ebit"; $26="profitmargin"; $33="cashandEquivealent"; $220="debtGrowth"} {print $1 "," $2 "," $3 "," $4 "," $5 "," $6 "," $18 "," $19 "," $22 "," $28 "," $29 "," $26 "," $33 "," $220}' $dir/2018_Financial_Data.csv> $dir/2018_Financial_Data_processed_G41.csv
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment