Commit fc5402f4 authored by Stefan Vercillo's avatar Stefan Vercillo
Browse files

Working scripts creation and modification

parent b6f6ee12
CREATING DB:
cd to scripts run:
script1.sql
script2.sql
script3.sql
python3 update_course_offering_date.py # with mysql-connector pip module installed
python3 update_imd_band.py # with mysql-connector pip module installed
script.sql
\ No newline at end of file
This diff is collapsed.
......@@ -4,8 +4,8 @@ drop table if exists studentAssessment;
drop table if exists studentInfo;
drop table if exists vle;
drop table if exists assessments;
drop table if exists courses;
create table courses(
drop table if exists courseOfferings;
create table courseOfferings(
code_module varchar(45),
code_presentation varchar(45),
module_presentation_length int(3),
......@@ -21,7 +21,7 @@ create table assessments(
date VARCHAR(10), -- ?????
weight VARCHAR(10),
primary key (id_assessment),
constraint fk_courses_assessments foreign key (code_module, code_presentation) references courses (code_module, code_presentation)
constraint fk_courses_assessments foreign key (code_module, code_presentation) references courseOfferings (code_module, code_presentation)
);
create table vle (
......@@ -32,7 +32,7 @@ create table vle (
week_from int,
week_to int,
primary key (id_site),
constraint fk_courses_vle foreign key (code_module, code_presentation) references courses (code_module, code_presentation)
constraint fk_courses_vle foreign key (code_module, code_presentation) references courseOfferings (code_module, code_presentation)
);
create table studentInfo(
......@@ -47,7 +47,7 @@ create table studentInfo(
num_of_prev_attempts int,
studied_credits int,
primary key (id_student),
constraint fk_courses_studentinfo foreign key (code_module, code_presentation) references courses (code_module, code_presentation)
constraint fk_courses_studentinfo foreign key (code_module, code_presentation) references courseOfferings (code_module, code_presentation)
);
......@@ -69,7 +69,7 @@ create table studentRegistration(
date_registration int,
date_unregistration int,
constraint fk_studentinfo_registration foreign key (id_student) references studentInfo(id_student),
constraint fk_courses_registration foreign key (code_module, code_presentation) references courses (code_module, code_presentation)
constraint fk_courses_registration foreign key (code_module, code_presentation) references courseOfferings (code_module, code_presentation)
);
......@@ -81,56 +81,58 @@ create table studentVle (
date int,
sum_click int,
constraint fk_studentinfo_studentvle foreign key (id_student) references studentInfo(id_student),
constraint fk_courses_studentvle foreign key (code_module, code_presentation) references courses (code_module, code_presentation),
constraint fk_courses_studentvle foreign key (code_module, code_presentation) references courseOfferings (code_module, code_presentation),
constraint fk_vle_studentvle foreign key (id_site) references vle(id_site)
);
load data LOCAL infile '/Users/rzhang/workspace/356/356-group-40-project/data_files/courses.csv' ignore into table courses
load data LOCAL infile '/home/stefan/stefan/3B/356/ece356/project/data_files/courses.csv' ignore into table courseOfferings
fields terminated by ','
enclosed by '"'
lines terminated by '\r\n'
ignore 1 lines;
load data LOCAL infile '/Users/rzhang/workspace/356/356-group-40-project/data_files/assessments.csv' ignore into table assessments
load data LOCAL infile '/home/stefan/stefan/3B/356/ece356/project/data_files/assessments.csv' ignore into table assessments
fields terminated by ','
enclosed by '"'
lines terminated by '\r\n'
ignore 1 lines;
load data LOCAL infile '/Users/rzhang/workspace/356/356-group-40-project/data_files/vle.csv' ignore into table vle
load data LOCAL infile '/home/stefan/stefan/3B/356/ece356/project/data_files/vle.csv' ignore into table vle
fields terminated by ','
enclosed by '"'
lines terminated by '\r\n'
ignore 1 lines;
load data LOCAL infile '/Users/rzhang/workspace/356/356-group-40-project/data_files/studentInfo.csv' ignore into table studentInfo
load data LOCAL infile '/home/stefan/stefan/3B/356/ece356/project/data_files/studentInfo.csv' ignore into table studentInfo
fields terminated by ','
enclosed by '"'
lines terminated by '\r\n'
ignore 1 lines;
load data LOCAL infile '/Users/rzhang/workspace/356/356-group-40-project/data_files/studentAssessment.csv' ignore into table studentAssessment
load data LOCAL infile '/home/stefan/stefan/3B/356/ece356/project/data_files/studentAssessment.csv' ignore into table studentAssessment
fields terminated by ','
enclosed by '"'
lines terminated by '\r\n'
ignore 1 lines;
load data LOCAL infile '/Users/rzhang/workspace/356/356-group-40-project/data_files/studentRegistration.csv' ignore into table studentRegistration
load data LOCAL infile '/home/stefan/stefan/3B/356/ece356/project/data_files/studentRegistration.csv' ignore into table studentRegistration
fields terminated by ','
enclosed by '"'
lines terminated by '\r\n'
ignore 1 lines;
load data LOCAL infile '/Users/rzhang/workspace/356/356-group-40-project/data_files/studentVle.csv' ignore into table studentVle
load data LOCAL infile '/home/stefan/stefan/3B/356/ece356/project/data_files/studentVle.csv' ignore into table studentVle
fields terminated by ','
enclosed by '"'
lines terminated by '\r\n'
ignore 10600000 lines;
ignore 8000000 lines;
......@@ -18,8 +18,8 @@ ALTER TABLE assessments DROP CONSTRAINT fk_courses_assessments;
ALTER TABLE assessments DROP INDEX fk_courses_assessments;
-- Add new PK column
ALTER TABLE Courses DROP PRIMARY KEY;
ALTER TABLE Courses ADD course_id int auto_increment primary key;
ALTER TABLE courseOfferings DROP PRIMARY KEY;
ALTER TABLE courseOfferings ADD course_offering_id int auto_incre6ment primary key;
-- ---------
-- TABLE_NAME, CONSTRAINT_TYPE, CONSTRAINT_NAME
......@@ -29,30 +29,31 @@ ALTER TABLE Courses ADD course_id int auto_increment primary key;
-- ------
-- Add ID to the tables
ALTER TABLE studentVle ADD course_id int;
ALTER TABLE studentVle ADD CONSTRAINT fk_courseid_studentVle FOREIGN KEY (course_id) REFERENCES Courses (course_id);
UPDATE studentVle LEFT JOIN Courses ON (studentVle.code_module = Courses.code_module AND studentVle.code_presentation = Courses.code_presentation)
SET studentVle.course_id = Courses.course_id;
ALTER TABLE studentRegistration ADD course_id int;
ALTER TABLE studentRegistration ADD CONSTRAINT fk_courseid_studentRegistration FOREIGN KEY (course_id) REFERENCES Courses (course_id);
UPDATE studentRegistration LEFT JOIN Courses ON (studentRegistration.code_module = Courses.code_module AND studentRegistration.code_presentation = Courses.code_presentation)
SET studentRegistration.course_id = Courses.course_id;
ALTER TABLE studentInfo ADD course_id int;
ALTER TABLE studentInfo ADD CONSTRAINT fk_courseid_studentInfo FOREIGN KEY (course_id) REFERENCES Courses (course_id);
UPDATE studentInfo LEFT JOIN Courses ON (studentInfo.code_module = Courses.code_module AND studentInfo.code_presentation = Courses.code_presentation)
SET studentInfo.course_id = Courses.course_id;
ALTER TABLE vle ADD course_id int;
ALTER TABLE vle ADD CONSTRAINT fk_courseid_vle FOREIGN KEY (course_id) REFERENCES Courses (course_id);
UPDATE vle LEFT JOIN Courses ON (vle.code_module = Courses.code_module AND vle.code_presentation = Courses.code_presentation)
SET vle.course_id = Courses.course_id;
ALTER TABLE assessments ADD course_id int;
ALTER TABLE assessments ADD CONSTRAINT fk_courseid_assessments FOREIGN KEY (course_id) REFERENCES Courses (course_id);
UPDATE assessments LEFT JOIN Courses ON (assessments.code_module = Courses.code_module AND assessments.code_presentation = Courses.code_presentation)
SET assessments.course_id = Courses.course_id;
ALTER TABLE studentVle ADD course_offering_id int;
ALTER TABLE studentVle ADD CONSTRAINT fk_courseid_studentVle FOREIGN KEY (course_offering_id) REFERENCES courseOfferings (course_offering_id);
UPDATE studentVle LEFT JOIN courseOfferings ON (studentVle.code_module = courseOfferings.code_module AND studentVle.code_presentation = courseOfferings.code_presentation)
SET studentVle.course_offering_id = courseOfferings.course_offering_id;
ALTER TABLE studentRegistration ADD course_offering_id int;
ALTER TABLE studentRegistration ADD CONSTRAINT fk_courseid_studentRegistration FOREIGN KEY (course_offering_id) REFERENCES courseOfferings (course_offering_id);
UPDATE studentRegistration LEFT JOIN courseOfferings ON (studentRegistration.code_module = courseOfferings.code_module AND studentRegistration.code_presentation = courseOfferings.code_presentation)
SET studentRegistration.course_offering_id = courseOfferings.course_offering_id;
ALTER TABLE studentInfo ADD CONSTRAINT fk_courseid_studentInfo FOREIGN KEY (course_offering_id) REFERENCES courseOfferings (course_offering_id);
UPDATE studentInfo LEFT JOIN courseOfferings ON (studentInfo.code_module = courseOfferings.code_module AND studentInfo.code_presentation = courseOfferings.code_presentation)
SET studentInfo.course_offering_id = courseOfferings.course_offering_id;
UPDATE vle LEFT JOIN courseOfferings ON (vle.code_module = courseOfferings.code_module AND vle.code_presentation = courseOfferings.code_presentation)
SET vle.course_offering_id = courseOfferings.course_offering_id;
ALTER TABLE assessments ADD course_offering_id int;
ALTER TABLE assessments ADD CONSTRAINT fk_courseid_assessments FOREIGN KEY (course_offering_id) REFERENCES courseOfferings (course_offering_id);
UPDATE assessments LEFT JOIN courseOfferings ON (assessments.code_module = courseOfferings.code_module AND assessments.code_presentation = courseOfferings.code_presentation)
SET assessments.course_offering_id = courseOfferings.course_offering_id;
-- Remove modules and presentations
ALTER TABLE studentVle DROP COLUMN code_module;
......@@ -78,5 +79,80 @@ ALTER TABLE assessments DROP COLUMN code_presentation;
-- ALTER TABLE studentInfo ADD date_unregistration datetime;
-- INSERT INTO studentInfo (date_registration, date_unregistration) SELECT date_registration, date_unregistration
-- FROM studentRegistration WHERE studentInfo.course_id == studentRegistration.course_id;
-- FROM studentRegistration WHERE studentInfo.course_offering_id == studentRegistration.course_offering_id;
-- imd_band -> imd_upperbound
ALTER TABLE studentInfo ADD age_range ENUM('0-35','35-55','55<=');
UPDATE studentInfo SET age_range = '0-35' WHERE STRCMP(age_band, '0-35');
UPDATE studentInfo SET age_range = '35-55' WHERE STRCMP(age_band, '35-55');
UPDATE studentInfo SET age_range = '55<=' WHERE STRCMP(age_band, '55<=');
ALTER TABLE studentInfo DROP COLUMN age_band;
-- move region to new table
CREATE TABLE region (
regionid int primary key AUTO_INCREMENT,
name varchar(20)
);
INSERT INTO region (name)
SELECT DISTINCT region FROM studentInfo;
ALTER TABLE studentInfo ADD regionid int;
ALTER TABLE studentInfo ADD CONSTRAINT fk_regionid_studentInfo FOREIGN KEY (regionid) REFERENCES region (regionid);
UPDATE studentInfo JOIN region ON (studentInfo.region = region.name) SET studentInfo.regionid = region.regionid;
ALTER TABLE studentInfo DROP COLUMN region;
-- education to own table
CREATE TABLE educationLevel (
education_rank int primary key AUTO_INCREMENT,
education_level varchar(40)
);
INSERT INTO educationLevel (education_level)
SELECT DISTINCT highest_education FROM studentInfo;
ALTER TABLE studentInfo ADD education_rank int;
ALTER TABLE studentInfo ADD CONSTRAINT fk_education_rank_studentInfo FOREIGN KEY (education_rank) REFERENCES educationLevel (education_rank);
UPDATE studentInfo JOIN educationLevel ON (studentInfo.highest_education = educationLevel.education_level) SET studentInfo.education_rank = educationLevel.education_rank;
-- gender to enum
ALTER TABLE studentInfo ADD gender_enum ENUM('M','F');
UPDATE studentInfo SET gender_enum = 'M' WHERE STRCMP(gender, 'M');
UPDATE studentInfo SET gender_enum = 'F' WHERE STRCMP(gender, 'F');
ALTER TABLE studentInfo DROP COLUMN gender;
ALTER TABLE studentInfo RENAME COLUMN gender_enum TO gender;
-- ===== studentAssessment Changes ===== --
-- new failed boolean column
ALTER TABLE studentAssessment ADD failed boolean;
UPDATE studentAssessment SET failed = true WHERE score < 40;
UPDATE studentAssessment SET failed = false WHERE score >= 40;
-- ===== studentVle Changes ===== --
-- rename column for better readability
ALTER TABLE studentVle RENAME COLUMN sum_click TO interactions;
-- add primary key
ALTER TABLE studentVle ADD id int auto_increment primary key;
alter table studentAssessment add
primary key (id_assessment, id_student);
ALTER TABLE studentRegistration
ADD COLUMN id_registration int auto_increment not NULL FIRST,
ADD PRIMARY KEY (id_registration);
-- normalize student registration tables
update studentRegistration
set date_unregistration = NULL where date_unregistration = 0;
update studentRegistration
set date_registration = NULL where date_registration = 0;
create table studentUnregistration(
id_unregistration int AUTO_INCREMENT,
id_student int NOT NULL,
date_unregistration int NOT NULL,
course_offering_id int,
id_registration int,
PRIMARY key (id_unregistration),
constraint fk_studentid_unregistration FOREIGN key (id_student) REFERENCES studentInfo(id_student),
constraint fk_courseofferingid_unregistration FOREIGN key (course_offering_id) REFERENCES courseOfferings(course_offering_id),
constraint fk_registrationid_unregistration FOREIGN key (id_registration) REFERENCES studentRegistration(id_registration)
);
-- Populate studentUnregistration
insert into studentUnregistration(id_student, date_unregistration, course_offering_id, id_registration)
select id_student, date_unregistration, course_offering_id, id_registration from studentRegistration
where date_unregistration is not NULL;
-- CHANGE studentInfo
ALTER TABLE studentInfo
rename column imd_band to imd_band_o;
ALTER TABLE studentInfo
add column imd_band int;
alter table courseOfferings
add column year int not null,
add column semester ENUM('Fall', 'Winter') not null;
-- AFTER RUNNING PYTHON!
ALTER TABLE studentInfo
drop column imd_band_o;
UPDATE studentInfo SET
imd_band = NULL where imd_band = -1;
alter table assessments
rename column date to date_o,
rename column weight to weight_o,
add column date int,
add column weight int;
update assessments
set date_o = "-9999.9"
where date_o = "";
-- cant have a negative score likely wont happen
update assessments
set weight_o = "-9999.9"
where weight_o = "";
update assessments
set date = CAST(date_o AS DECIMAL),
weight = CAST(weight_o AS DECIMAL)
where 1;
update assessments
set date = NULL
where date = -9999.9;
update assessments
set weight = NULL
where weight = -9999.9;
alter table assessments
drop column date_o,
drop column weight_o;
create table courseInfo (
course_id int AUTO_INCREMENT,
course_code CHAR(3),
description varchar(500),
PRIMARY key (course_id),
constraint fk_courseid_courseinfo
FOREIGN key (course_id) REFERENCES courseOfferings(course_offering_id)
);
insert into courseInfo (course_code)
select distinct code_module from courseOfferings;
alter table courseOfferings
drop column code_module,
drop column code_presentation,
rename column module_presentation_length to course_offering_length_in_days;
import mysql.connector
cnx = mysql.connector.connect(host='localhost', user='snvercil', database='Education', password="32darklink")
cursor = cnx.cursor()
cursor.execute("USE EDUCATION;")
cursor.execute("SELECT * FROM courseOfferings;")
c = cursor.fetchall()
for row in c:
year = row[1][:4]
semester = row[1][-1:]
if semester == "J":
semester = "Fall"
else:
semester = "Winter"
query_string = f"update courseOfferings set year = {year}, semester = '{semester}' where course_offering_id = {row[3]} ;"
print(query_string)
cursor.execute(query_string)
cnx.commit()
cursor.close()
cnx.close()
\ No newline at end of file
import mysql.connector
cnx = mysql.connector.connect(host='localhost', user='snvercil', database='Education', password="32darklink")
cursor = cnx.cursor()
cursor.execute("USE EDUCATION;")
cursor.execute("SELECT * FROM studentInfo;")
c = cursor.fetchall()
for row in c:
d = row[9]
if d == "" or d is None:
d = -1
elif d == "0-10%":
d = 10
elif d == "10-20":
d = 20
elif d == "20-30%":
d = 30
elif d == "30-40%":
d = 40
elif d == "40-50%":
d = 50
elif d == "50-60%":
d = 60
elif d == "60-70%":
d = 70
elif d == "70-80%":
d = 80
elif d == "80-90%":
d = 90
elif d == "90-100%":
d = 100
s = f"update studentInfo set imd_band = {d} where id_student = {row[0]}"
cursor.execute(s)
# Make sure data is committed to the database
cnx.commit()
cursor.close()
cnx.close()
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment