Commit 6a1f609f authored by Robert's avatar Robert
Browse files

loading article data

parent b09d380e
......@@ -159,4 +159,122 @@ add foreign key (symbol) references Stock(symbol);
/********************************************************************************************/
/********************************************************************************************/
/* LOAD IPO DATA */
/* TODO once new csv is created */
/********************************************************************************************/
/********************************************************************************************/
/* LOAD ARTICLE DATA */
create table Article (
articleID int not null,
symbol varchar(6),
url varchar(2048),
headline varchar(512),
date datetime,
primary key (articleID)
);
create table AnalystArticle (
articleID int not null,
analyst varchar(255),
primary key (articleID),
foreign key (articleID) references Article(articleID)
);
create table PartnerArticle (
articleID int not null,
partner varchar(255),
primary key (articleID),
foreign key (articleID) references Article(articleID)
);
/* Create temp table for loading the csvs */
create table ArticleTemp (
id int not null auto_increment,
headline varchar(512),
url varchar(2048),
author varchar(255),
date datetime,
symbol varchar(6),
type int,
primary key (id)
);
/* Load the analyst articles */
load data infile '/var/lib/mysql-files/18-Stocks/raw_analyst_ratings.csv' ignore into table ArticleTemp
fields terminated by ','
enclosed by '"'
lines terminated by '\n'
ignore 1 lines;
/* Remove invisible characters and set article type */
update ArticleTemp
set symbol = replace(symbol, '\r', ''), type = 0
where type is null;
/* Insert the analyst data into the relevant tables */
insert into Article(articleID, symbol, url, headline, date)
select id, symbol, url, headline, date
from ArticleTemp
where type = 0;
insert into AnalystArticle(articleID, analyst)
select id, author
from ArticleTemp
where type = 0;
/* Load the partner articles */
load data infile '/var/lib/mysql-files/18-Stocks/raw_partner_headlines.csv' ignore into table ArticleTemp
fields terminated by ','
enclosed by '"'
lines terminated by '\n'
ignore 1 lines;
/* Remove invisible characters and set article type */
update ArticleTemp
set symbol = replace(symbol, '\r', ''), type = 1
where type is null;
/* Insert the partner data into the relevant tables */
insert into Article(articleID, symbol, url, headline, date)
select id, symbol, url, headline, date
from ArticleTemp
where type = 1;
insert into PartnerArticle(articleID, partner)
select id, author
from ArticleTemp
where type = 1;
/* Add missing stock tickers to Stock */
insert ignore into Stock (symbol)
select distinct symbol
from Article;
/* Set up the foreign key restraint on symbol and auto increment PK */
alter table AnalystArticle
drop foreign key AnalystArticle_ibfk_1;
alter table PartnerArticle
drop foreign key PartnerArticle_ibfk_1;
alter table Article
add foreign key (symbol) references Stock(symbol),
modify articleID int auto_increment;
alter table AnalystArticle
add foreign key (articleID) references Article(articleID);
alter table PartnerArticle
add foreign key (articleID) references Article(articleID);
/* Drop the temp table as we no longer need it */
drop table ArticleTemp;
/********************************************************************************************/
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment