Commit db0928c7 authored by Eugene Lu's avatar Eugene Lu
Browse files

done

parent f9e8fda8
......@@ -86,6 +86,10 @@ create table county_candidate_statistics (
foreign key (county_id) references county(id)
);
alter table county_candidate_statistics add index party_idx (party);
alter table county_candidate_statistics add index candidate_idx (candidate);
alter table county_candidate_statistics add index votes_idx (votes);
'''create table poll (
id int,
state_id int,
......
......@@ -103,12 +103,12 @@ def parse_tweet_csv(raw_path, hashtag, processed_tweets_path, processed_users_pa
user_idx = 0
errors = 0
for row in csv_reader:
if idx > 250000:
if idx > 100000:
break
try:
idx += 1
user_id= row[6]
tweet_writer.writerow([idx, user_id, row[0], row[2], hashtag, row[3], row[4], row[5]])
tweet_writer.writerow([idx, user_idx, row[0], row[2], hashtag, row[3], row[4], row[5]])
if not user_id in users:
user_idx += 1
users[user_id] = True
......@@ -257,8 +257,6 @@ def import_data():
q = 'INSERT IGNORE INTO county VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'
rows = 0
for row in csv_reader:
if idx > 1000:
break
values.append(tuple([None if x == '' else x for x in row]))
if idx % BATCH_SIZE == 0 or idx == file_len:
rows += len(values)
......@@ -277,8 +275,6 @@ def import_data():
q = 'INSERT IGNORE INTO county_candidate_statistics VALUES(%s, %s, %s, %s, %s, %s)'
rows = 0
for row in csv_reader:
if idx > 1000:
break
values.append(tuple([None if x == '' else x for x in row]))
if idx % BATCH_SIZE == 0 or idx == file_len:
rows += len(values)
......@@ -298,8 +294,6 @@ def import_data():
q = 'INSERT IGNORE INTO user VALUES(%s, %s, %s, %s, %s, %s, %s)'
rows = 0
for row in csv_reader:
if idx > 1000:
break
values.append(tuple([None if x == '' else x for x in row]))
if idx % BATCH_SIZE == 0 or idx == file_len:
rows += len(values)
......@@ -318,8 +312,6 @@ def import_data():
q = 'INSERT IGNORE INTO tweet VALUES(%s, %s, %s, %s, %s, %s, %s, %s)'
rows = 0
for row in csv_reader:
if idx > 1000:
break
values.append(tuple([None if x == '' else x for x in row]))
if idx % BATCH_SIZE == 0 or idx == file_len:
rows += len(values)
......@@ -332,5 +324,5 @@ def import_data():
if __name__ == "__main__":
#parse_csvs()
parse_csvs()
import_data()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment