Commit e49ad577 authored by Colin Chung's avatar Colin Chung
Browse files

sql scripts

parent 12d7faf3
-- create and load raw data
CREATE TABLE raw_recipes (
Name VARCHAR(255),
ID int,
Minutes int,
ContributorID int,
Submitted date,
Tags longtext,
calories int,
fat int,
sugar int,
sodium int,
protein int,
sat_fat int,
carbohydrates int,
StepsCount int,
Steps longtext,
Description longtext,
Ingredients longtext,
IngredientsCount int
);
LOAD DATA INFILE "/var/lib/mysql-files/02-Recipes/RAW_recipes.csv" IGNORE
INTO TABLE raw_recipes
CHARACTER SET latin1
FIELDS TERMINATED BY ',' ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
(@col1, @col2, @col3, @col4, @col5, @col6, @col7, @col8, @col9, @col10, @col11, @col12)
set Name=@col1,
ID=cast(@col2 as unsigned integer),
Minutes=cast(@col3 as unsigned integer),
ContributorID=cast(@col4 as unsigned integer),
Submitted=cast(@col5 as date),
Tags=@col6,
calories=cast(SUBSTRING_INDEX(@col17, ',', 1) as unsigned),
fat=cast(SUBSTRING_INDEX(SUBSTRING_INDEX(@col17, ',', 2), ',', -1) as unsigned),
sugar=cast(SUBSTRING_INDEX(SUBSTRING_INDEX(@col17, ',', 3), ',', -1) as unsigned),
sodium=cast(SUBSTRING_INDEX(SUBSTRING_INDEX(@col17, ',', 4), ',', -1) as unsigned),
protein=cast(SUBSTRING_INDEX(SUBSTRING_INDEX(@col17, ',', 5), ',', -1) as unsigned),
sat_fat=cast(SUBSTRING_INDEX(SUBSTRING_INDEX(@col17, ',', 6), ',', -1) as unsigned),
carbohydrates=cast(SUBSTRING_INDEX(SUBSTRING_INDEX(@col17, ',', 7), ',', -1) as unsigned),
StepsCount=cast(@col8 as unsigned integer),
Steps=@col9,
Description=@col10,
Ingredients=@col11,
IngredientsCount=cast(@col12 as unsigned integer)
;
CREATE TABLE raw_inter (
UserID int,
RecipeID int,
Date date,
Rating int,
Review VARCHAR(1000)
);
LOAD DATA INFILE "/var/lib/mysql-files/02-Recipes/RAW_interactions.csv" IGNORE
INTO TABLE raw_inter
CHARACTER SET latin1
FIELDS TERMINATED BY ',' ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
(@col1, @col2, @col3, @col4, @col5)
set UserID=cast(@col1 as unsigned integer),
RecipeID=cast(@col2 as unsigned integer),
Date=cast(@col3 as date),
Rating=cast(@col4 as unsigned integer),
Review=@col5
;
-- create entity sets and relationship setse
drop table recipe_tag;
drop table recipe_ingredients;
drop table ingredients;
drop table tag;
drop table interactions;
drop table recipes;
create table recipes (
name varchar(255),
recipe_id int not null,
minutes int,
contributer_id int,
date_submitted date,
calories decimal,
fat decimal,
sugar decimal,
sodium decimal,
protein decimal,
sat_fat decimal,
carbohydrates decimal,
n_steps int,
steps varchar(1000),
description varchar(1000),
primary key (recipe_id)
);
create table interactions (
user_id int not null,
recipe_id int not null,
date date not null,
rating int,
review varchar(10000),
primary key (user_id, recipe_id),
foreign key (recipe_id) references recipes(recipe_id)
);
create table ingredients (
ingredient_id int not null,
name varchar(255),
primary key (ingredient_id)
);
create table tag (
tag_id int not null,
name varchar(255),
primary key (tag_id)
);
create table recipe_ingredients (
recipe_id int,
ingredient_id int,
primary key (recipe_id, ingredient_id),
foreign key (ingredient_id) references ingredients(ingredient_id),
foreign key (recipe_id) references recipes(recipe_id)
);
create table recipe_tag (
recipe_id int not null,
tag_id int not null,
primary key (recipe_id, tag_id),
foreign key (tag_id) references tag(tag_id),
foreign key (recipe_id) references recipes(recipe_id)
);
-- insert recipes and interactions
SET SESSION sql_mode = '';
insert into recipes (
name,
recipe_id,
minutes,
contributer_id,
date_submitted,
calories,
fat,
sugar,
sodium,
protein,
sat_fat,
carbohydrates,
n_steps,
steps,
description
) select
Name,
ID,
Minutes,
ContributorID,
Submitted,
calories,
fat,
sugar,
sodium,
protein,
sat_fat,
carbohydrates,
StepsCount,
Steps,
Description
from raw_recipes;
insert into interactions (
user_id,
recipe_id,
date,
rating,
review
) select
UserID,
RecipeID,
Date,
Rating,
Review
from raw_inter;
create index idx_recipe_id on interactions(recipe_id);
-- insert ingredients
DROP PROCEDURE IF EXISTS addIngredients;
DELETE FROM ingredients;
DELIMITER $$
CREATE PROCEDURE addIngredients()
BEGIN
DECLARE finished INTEGER DEFAULT 0; -- variable indicating if there are more rows
DECLARE ingredient varchar(4000) DEFAULT ""; -- placeholder for ingredients varchar
DECLARE currentIngredient varchar(50) DEFAULT ""; -- placeholder for current ingredient
DECLARE n INTEGER DEFAULT 0;
DECLARE ingredientID INTEGER DEFAULT 1; -- running count of ingredient_id (increments to keep unique)
-- cursor for iterating over recipes table
DECLARE curIngredient
CURSOR FOR
SELECT ingredients FROM raw_recipes;
OPEN curIngredient;
getIngredients: LOOP
FETCH curIngredient INTO ingredient;
IF ingredient <> 'ingredients' THEN
IF finished = 1 THEN
LEAVE getIngredients;
END IF;
SET ingredient = REPLACE(TRIM(LEADING '[' FROM TRIM(TRAILING ']' FROM ingredient)), "'", "");
WHILE ingredient <> '' DO
-- read until next ',' to get next ingredient
SET currentIngredient = TRIM(SUBSTRING_INDEX(ingredient, ",", 1));
SET n = LOCATE(",", ingredient);
IF n = 0 THEN
SET ingredient = '';
ELSE
SET ingredient = SUBSTR(ingredient, n + 1);
END IF;
-- insert new value into table
INSERT INTO ingredients(
ingredient_id,
name
) VALUES (
ingredientID,
currentIngredient
);
-- increment ingredient_id
SET ingredientID = ingredientID + 1;
END WHILE;
END IF;
END LOOP getIngredients;
CLOSE curIngredient;
END $$
DELIMITER ;
-- Call stored procedure
CALL addIngredients;
-- Remove duplicate ingredients
DELETE i1 FROM ingredients i1
INNER JOIN ingredients i2 USING(NAME)
WHERE i1.ingredient_id > i2.ingredient_id;
CREATE INDEX idx_ingredient_name ON ingredients(name);
-- insert tags
DROP PROCEDURE IF EXISTS addTags;
DELETE FROM tag;
DELIMITER $$
CREATE PROCEDURE addTags()
BEGIN
DECLARE finished INTEGER DEFAULT 0; -- variable indicating if there are more rows
DECLARE tag varchar(4000) DEFAULT ""; -- placeholder for tags varchar
DECLARE currentTag varchar(50) DEFAULT ""; -- placeholder for current tag
DECLARE n INTEGER DEFAULT 0;
DECLARE tagID INTEGER DEFAULT 1; -- running count of tag_id (increments to keep unique)
-- cursor for iterating over recipes table
DECLARE curTag
CURSOR FOR
SELECT tags FROM raw_recipes;
OPEN curTag;
getTags: LOOP
FETCH curTag INTO tag;
IF tag <> 'tags' THEN
IF finished = 1 THEN
LEAVE getTags;
END IF;
SET tag = REPLACE(TRIM(LEADING '[' FROM TRIM(TRAILING ']' FROM tag)), "'", "");
WHILE tag <> '' DO
-- read until next ',' to get next tag
SET currentTag = TRIM(SUBSTRING_INDEX(tag, ",", 1));
SET n = LOCATE(",", tag);
IF n = 0 THEN
SET tag = '';
ELSE
SET tag = SUBSTR(tag, n + 1);
END IF;
-- insert new value into table
INSERT INTO tag(
tag_id,
name
) VALUES (
tagID,
currentTag
);
-- increment tag_id
SET tagID = tagID + 1;
END WHILE;
END IF;
END LOOP getTags;
CLOSE curTag;
END $$
DELIMITER ;
-- Call stored procedure
CALL addTags;
-- Remove duplicate tag
DELETE i1 FROM tag i1
INNER JOIN tag i2 USING(NAME)
WHERE i1.tag_id > i2.tag_id;
CREATE INDEX idx_tag_name ON tag(name);
-- insert recipe_ingredients
DROP PROCEDURE addRecipeIngredient;
DELIMITER $$
CREATE PROCEDURE addRecipeIngredient()
BEGIN
DECLARE finished INTEGER DEFAULT 0;
DECLARE recipe_id INTEGER DEFAULT 0;
DECLARE ingredient varchar(4000) DEFAULT "";
DECLARE curr varchar(50) DEFAULT "";
DECLARE n INTEGER DEFAULT 0;
DECLARE ingredientID INTEGER DEFAULT 1;
DECLARE curIngredient
CURSOR FOR
SELECT ID, ingredients FROM raw_recipes;
OPEN curIngredient;
getIngredients: LOOP
FETCH curIngredient INTO recipe_id, ingredient;
IF ingredient <> 'ingredients' THEN
IF finished = 1 THEN
LEAVE getIngredients;
END IF;
SET ingredient = REPLACE(TRIM(LEADING '[' FROM TRIM(TRAILING ']' FROM ingredient)), "'", "");
WHILE ingredient <> '' DO
SET curr = TRIM(SUBSTRING_INDEX(ingredient, ",", 1));
SET n = LOCATE(",", ingredient);
IF n = 0 THEN
SET ingredient = '';
ELSE
SET ingredient = SUBSTR(ingredient, n + 1);
END IF;
SET ingredientID = (select ingredient_id from ingredients where name=curr);
INSERT INTO recipe_ingredients (
recipe_id,
ingredient_id
) VALUES (
recipe_id,
ingredientID
);
END WHILE;
END IF;
END LOOP getIngredients;
CLOSE curIngredient;
END $$
DELIMITER ;
CALL addRecipeIngredient;
-- insert recipe_tags
DROP PROCEDURE addRecipeTag;
DELIMITER $$
CREATE PROCEDURE addRecipeTag()
BEGIN
DECLARE finished INTEGER DEFAULT 0;
DECLARE recipe_id INTEGER DEFAULT 0;
DECLARE tag varchar(4000) DEFAULT "";
DECLARE curr varchar(50) DEFAULT "";
DECLARE n INTEGER DEFAULT 0;
DECLARE tagID INTEGER DEFAULT 1;
DECLARE curTag
CURSOR FOR
SELECT ID, tags FROM raw_recipes;
OPEN curTag;
getTags: LOOP
FETCH curTag INTO recipe_id, tag;
IF tag <> 'Tags' THEN
IF finished = 1 THEN
LEAVE getTags;
END IF;
SET tag = REPLACE(TRIM(LEADING '[' FROM TRIM(TRAILING ']' FROM tag)), "'", "");
WHILE tag <> '' DO
SET curr = TRIM(SUBSTRING_INDEX(tag, ",", 1));
SET n = LOCATE(",", tag);
IF n = 0 THEN
SET tag = '';
ELSE
SET tag = SUBSTR(tag, n + 1);
END IF;
SET tagID = (select tag_id from tag where name=curr);
INSERT INTO recipe_tag (
recipe_id,
tag_id
) VALUES (
recipe_id,
tagID
);
END WHILE;
END IF;
END LOOP getTags;
CLOSE curTag;
END $$
DELIMITER ;
CALL addRecipeTag;
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment