Skip to content

Commit

Permalink
Updated SQL code
Browse files Browse the repository at this point in the history
  • Loading branch information
ciioprof0 committed Aug 7, 2024
1 parent a0feb86 commit 3f446b9
Show file tree
Hide file tree
Showing 34 changed files with 1,225 additions and 1,120 deletions.
52 changes: 13 additions & 39 deletions info579/sql/procedures/sp_check_lexicon_constraints.sql
Original file line number Diff line number Diff line change
@@ -1,45 +1,19 @@
-- Create procedure to check for prolog constraints
DELIMITER //
CREATE PROCEDURE stixd_corpus.check_prolog_constraints(IN word_form VARCHAR(255))
BEGIN
DECLARE done INT DEFAULT FALSE;
DECLARE constraint_violation VARCHAR(255);
DECLARE constraint_message VARCHAR(255);

DECLARE cur CURSOR FOR SELECT pattern, message FROM stixd_corpus.prolog_constraints;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE;

OPEN cur;

read_loop: LOOP
FETCH cur INTO constraint_violation, constraint_message;
IF done THEN
LEAVE read_loop;
END IF;
IF NOT (word_form REGEXP constraint_violation) THEN
SIGNAL SQLSTATE '45000' SET MESSAGE_TEXT = constraint_message;
END IF;
END LOOP;

CLOSE cur;
END;
//
DELIMITER ;

-- Create main procedure to check lexicon constraints
-- Create main procedure to check lexicon constraints (sp_check_lexicon_constraints.sql)
DELIMITER //
CREATE PROCEDURE stixd_corpus.sp_check_lexicon_constraints(
IN new_word_tag VARCHAR(12),
IN new_word_form VARCHAR(99),
IN new_logical_symbol VARCHAR(99),
IN new_third_arg VARCHAR(15)
IN new_word_tag VARCHAR(12) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
IN new_word_form VARCHAR(99) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
IN new_logical_symbol VARCHAR(99) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
IN new_third_arg VARCHAR(15) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci
)
BEGIN
DECLARE prohibited_word_found INT DEFAULT 0;
DECLARE tag_form_hash VARCHAR(64);
DECLARE tag_form_hash VARCHAR(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
DECLARE new_word_class VARCHAR(31) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
DECLARE new_preposition INT DEFAULT NULL;

-- Step 1: Check prolog constraints for word_form
CALL stixd_corpus.check_prolog_constraints(new_word_form);
CALL stixd_corpus.sp_check_prolog_constraints(new_word_form);

-- Step 2: Check prohibited words
SELECT COUNT(*)
Expand All @@ -53,10 +27,10 @@ BEGIN
END IF;

-- Step 3: Check prohibited intersections for word_form
CALL stixd_corpus.check_prohib_intersections(new_word_form);
CALL stixd_corpus.sp_check_prohib_intersections(new_word_tag, new_word_form);

-- Step 4: Generate the SHA256 hash for word_tag and word_form combination
SET tag_form_hash = SHA2(CONCAT(new_word_tag, new_word_form), 256);
-- Step 4: Generate and validate hashes
CALL stixd_corpus.sp_gen_hashes(new_word_tag, new_word_form, tag_form_hash);

-- Step 5: Check if the tag_form_hash already exists in the lexicon table
IF EXISTS (SELECT 1 FROM stixd_corpus.lexicon WHERE tag_form_hash = tag_form_hash) THEN
Expand Down Expand Up @@ -85,6 +59,6 @@ BEGIN
SIGNAL SQLSTATE '45000'
SET MESSAGE_TEXT = 'logical_symbol and third_arg cannot both be NOT NULL';
END IF;
END//
END //
//
DELIMITER ;
46 changes: 46 additions & 0 deletions info579/sql/procedures/sp_check_prohib_intersections.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
-- Create procedure to check for prohibited word_word_tag intersections (sp_check_prohib_intersections.sql)
DELIMITER //

CREATE PROCEDURE stixd_corpus.sp_check_prohib_intersections(
IN new_word_tag VARCHAR(12) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
IN new_word_form VARCHAR(99) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci
)
BEGIN
DECLARE tag_check INT DEFAULT 0;

-- Check if the new_word_form exists in the lexicon table
IF EXISTS (SELECT 1 FROM stixd_corpus.lexicon WHERE word_form = new_word_form) THEN
-- Check first condition
SELECT COUNT(*)
INTO tag_check
FROM stixd_corpus.prohibited_intersections
WHERE word_tag1 = new_word_tag
AND word_tag2 IN (SELECT word_tag
FROM stixd_corpus.lexicon
WHERE word_form = new_word_form);

IF tag_check > 0 THEN
SIGNAL SQLSTATE '45000'
SET MESSAGE_TEXT = 'This combination of word_tags is prohibited';
END IF;

-- Reset the variable
SET tag_check = 0;

-- Check second condition
SELECT COUNT(*)
INTO tag_check
FROM stixd_corpus.prohibited_intersections
WHERE word_tag2 = new_word_tag
AND word_tag1 IN (SELECT word_tag
FROM stixd_corpus.lexicon
WHERE word_form = new_word_form);

IF tag_check > 0 THEN
SIGNAL SQLSTATE '45000'
SET MESSAGE_TEXT = 'This combination of word_tags is prohibited';
END IF;
END IF;
END;
//
DELIMITER ;
67 changes: 45 additions & 22 deletions info579/sql/procedures/sp_check_prolog_constraints.sql
Original file line number Diff line number Diff line change
@@ -1,27 +1,50 @@
-- Create procedure to check for prolog constraints )
-- Create procedure to check for prohibited word_class intersections of word_class and preposition (sp_check_prohib_intersections.sql)
DELIMITER //
CREATE PROCEDURE stixd_corpus.check_prolog_constraints (IN word_form VARCHAR(255))

CREATE PROCEDURE stixd_corpus.sp_check_prohib_intersections(
IN new_word_tag VARCHAR(31) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
IN new_word_form VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci
)
BEGIN
DECLARE done INT DEFAULT FALSE;
DECLARE constraint_violation VARCHAR(255);
DECLARE constraint_message VARCHAR(255);

DECLARE cur CURSOR FOR SELECT pattern, message FROM stixd_corpus.prolog_constraints;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE;

OPEN cur;

read_loop: LOOP
FETCH cur INTO constraint_violation, constraint_message;
IF done THEN
LEAVE read_loop;
END IF;
IF NOT (word_form REGEXP constraint_violation) THEN
SIGNAL SQLSTATE '45000' SET MESSAGE_TEXT = constraint_message;
END IF;
END LOOP;

CLOSE cur;
DECLARE class_check INT DEFAULT 0;

-- Get the word_class and preposition from the lexicon table
DECLARE new_word_class VARCHAR(31) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
DECLARE new_preposition INT;

SELECT word_class INTO new_word_class FROM stixd_corpus.word_tags WHERE word_tag = new_word_tag;
SELECT preposition INTO new_preposition FROM stixd_corpus.lexicon WHERE word_form = new_word_form;

-- Check first condition
SELECT COUNT(*)
INTO class_check
FROM stixd_corpus.prohibited_intersections
WHERE class1 = new_word_class
AND class2 IN (SELECT word_class
FROM stixd_corpus.lexicon
WHERE lex_id = new_preposition);

IF class_check > 0 THEN
SIGNAL SQLSTATE '45000'
SET MESSAGE_TEXT = 'This combination of word_class and preposition is prohibited';
END IF;

-- Reset the variable
SET class_check = 0;

-- Check second condition
SELECT COUNT(*)
INTO class_check
FROM stixd_corpus.prohibited_intersections
WHERE class2 = new_word_class
AND class1 IN (SELECT word_class
FROM stixd_corpus.lexicon
WHERE lex_id = new_preposition);

IF class_check > 0 THEN
SIGNAL SQLSTATE '45000'
SET MESSAGE_TEXT = 'This combination of word_class and preposition is prohibited';
END IF;
END;
//
DELIMITER ;
14 changes: 14 additions & 0 deletions info579/sql/procedures/sp_gen_hases.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
-- Create procedure to generate and return hash (sp_gen_hashes.sql)
DELIMITER //

CREATE PROCEDURE stixd_corpus.sp_gen_hashes(
IN new_word_tag VARCHAR(12) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
IN new_word_form VARCHAR(99) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
OUT tag_form_hash VARCHAR(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci
)
BEGIN
-- Generate the SHA256 hash for word_tag and word_form combination
SET tag_form_hash = SHA2(CONCAT(new_word_tag, new_word_form), 256);
END;
//
DELIMITER ;
1 change: 1 addition & 0 deletions info579/sql/procedures/sp_run_seed_scripts.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
-- Seed all the ACE 6.7 tables in the database (sp_run_seed_scripts.sql)
DELIMITER //

CREATE PROCEDURE stixd_corpus.run_seed_scripts()
Expand Down
5 changes: 3 additions & 2 deletions info579/sql/schema/tables/create_table_doc_lex_jt.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-- Create the stixd_corpus.doc_lex_jt table
-- Create the stixd_corpus.doc_lex_jt table (create_table_doc_lex_jt.sql)
-- DROP TABLE IF EXISTS stixd_corpus.doc_lex_jt;
CREATE TABLE IF NOT EXISTS stixd_corpus.doc_lex_jt (
doc_id INT,
Expand All @@ -7,4 +7,5 @@ CREATE TABLE IF NOT EXISTS stixd_corpus.doc_lex_jt (
FOREIGN KEY (doc_id) REFERENCES stixd_corpus.documents(doc_id),
FOREIGN KEY (lex_id) REFERENCES stixd_corpus.lexicon(lex_id)
);
-- SHOW CREATE TABLE stixd_corpus.doc_lex_jt;
-- SHOW CREATE TABLE stixd_corpus.doc_lex_jt;
-- Populating table done from primary tables.
5 changes: 3 additions & 2 deletions info579/sql/schema/tables/create_table_doc_sent_jt.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-- Create the stixd_corpus.doc_sent_jt table
-- Create the stixd_corpus.doc_sent_jt table (create_table_doc_sent_jt.sql)
-- DROP TABLE IF EXISTS stixd_corpus.doc_sent_jt;
CREATE TABLE IF NOT EXISTS stixd_corpus.doc_sent_jt (
doc_id INT,
Expand All @@ -7,4 +7,5 @@ CREATE TABLE IF NOT EXISTS stixd_corpus.doc_sent_jt (
FOREIGN KEY (doc_id) REFERENCES stixd_corpus.documents(doc_id),
FOREIGN KEY (sent_id) REFERENCES stixd_corpus.sentences(sent_id)
);
-- SHOW CREATE TABLE stixd_corpus.doc_sent_jt;
-- SHOW CREATE TABLE stixd_corpus.doc_sent_jt;
-- Populating table done from primary tables.
7 changes: 3 additions & 4 deletions info579/sql/schema/tables/create_table_documents.sql
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
-- Create the stixd_corpus.documents table
-- Create the stixd_corpus.documents table (create_table_documents.sql)
-- DROP TABLE IF EXISTS stixd_corpus.documents;
CREATE TABLE IF NOT EXISTS stixd_corpus.documents (
doc_id INT AUTO_INCREMENT PRIMARY KEY,
raw_text LONGTEXT,
raw_text_hash CHAR(64) UNIQUE,
raw_text LONGTEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
raw_text_hash CHAR(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci UNIQUE,
proc_text JSON,
metadata JSON
);
-- SHOW CREATE TABLE stixd_corpus.documents;

-- Populating table done from Python script.
7 changes: 3 additions & 4 deletions info579/sql/schema/tables/create_table_genders.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
-- Create the stixd_corpus.genders table
-- Create the stixd_corpus.genders table (create_table_genders.sql)
-- DROP TABLE IF EXISTS stixd_corpus.genders;
CREATE TABLE IF NOT EXISTS stixd_corpus.genders (
gender VARCHAR(7) PRIMARY KEY
gender VARCHAR(7) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci PRIMARY KEY
);
-- SHOW CREATE TABLE stixd_corpus.genders;

-- Populating the table done with SQL seed script.
-- Populating the table done with SQL seed script.
17 changes: 8 additions & 9 deletions info579/sql/schema/tables/create_table_lexicon.sql
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
-- Create the stixd_corpus.lexicon table
-- Create the stixd_corpus.lexicon table (create_table_lexicon.sql)
DROP TABLE IF EXISTS stixd_corpus.lexicon;
CREATE TABLE IF NOT EXISTS stixd_corpus.lexicon (
lex_id INT AUTO_INCREMENT PRIMARY KEY,
word_tag VARCHAR(12),
word_form VARCHAR(99),
logical_symbol VARCHAR(99),
third_arg VARCHAR(15),
tag_form_hash VARCHAR(64) UNIQUE,
word_def TEXT,
word_tag VARCHAR(12) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
word_form VARCHAR(99) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
logical_symbol VARCHAR(99) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
third_arg VARCHAR(15) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
tag_form_hash VARCHAR(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci UNIQUE,
word_def TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
synsets JSON,
tagsets JSON
);
-- SHOW CREATE TABLE stixd_corpus.lexicon;

-- Populating table done from Python script.
-- Populating table done from Python script.
17 changes: 9 additions & 8 deletions info579/sql/schema/tables/create_table_obj_doc_jt.sql
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
-- Create the stixd_corpus.obj_doc_jt table
-- DROP TABLE IF EXISTS stixd_corpus.obj_doc_jt;
CREATE TABLE IF NOT EXISTS stixd_corpus.obj_doc_jt (
obj_id VARCHAR(292),
-- Create the stixd_corpus.doc_sent_jt table (create_table_doc_sent_jt.sql)
-- DROP TABLE IF EXISTS stixd_corpus.doc_sent_jt;
CREATE TABLE IF NOT EXISTS stixd_corpus.doc_sent_jt (
doc_id INT,
PRIMARY KEY (obj_id, doc_id),
FOREIGN KEY (obj_id) REFERENCES stixd_corpus.stix_objects(obj_id),
FOREIGN KEY (doc_id) REFERENCES stixd_corpus.documents(doc_id)
sent_id INT,
PRIMARY KEY (doc_id, sent_id),
FOREIGN KEY (doc_id) REFERENCES stixd_corpus.documents(doc_id),
FOREIGN KEY (sent_id) REFERENCES stixd_corpus.sentences(sent_id)
);
-- SHOW CREATE TABLE stixd_corpus.obj_doc_jt;
-- SHOW CREATE TABLE stixd_corpus.doc_sent_jt;
-- Populating table done from primary tables.
5 changes: 3 additions & 2 deletions info579/sql/schema/tables/create_table_obj_lex_jt.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-- Create the stixd_corpus.obj_lex_jt table
-- Create the stixd_corpus.obj_lex_jt table (create_table_obj_lex_jt.sql)
-- DROP TABLE IF EXISTS stixd_corpus.obj_lex_jt;
CREATE TABLE IF NOT EXISTS stixd_corpus.obj_lex_jt (
obj_id VARCHAR(292),
Expand All @@ -7,4 +7,5 @@ CREATE TABLE IF NOT EXISTS stixd_corpus.obj_lex_jt (
FOREIGN KEY (obj_id) REFERENCES stixd_corpus.stix_objects(obj_id),
FOREIGN KEY (lex_id) REFERENCES stixd_corpus.lexicon(lex_id)
);
-- SHOW CREATE TABLE stixd_corpus.obj_lex_jt;
-- SHOW CREATE TABLE stixd_corpus.obj_lex_jt;
-- Populating table done from primary tables.
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
-- Create the stixd_corpus.prohibited_intersections table
-- Create the stixd_corpus.prohibited_intersections table (create_table_prohib_intersections.sql)
-- DROP TABLE IF EXISTS stixd_corpus.prohibited_intersections;
CREATE TABLE IF NOT EXISTS stixd_corpus.prohibited_intersections (
class1 VARCHAR(31),
class2 VARCHAR(31),
PRIMARY KEY (class1, class2)
word_tag1 VARCHAR(12) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
word_tag2 VARCHAR(12) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
PRIMARY KEY (word_tag1, word_tag2)
);
-- SHOW CREATE TABLE stixd_corpus.prohibited_intersections;

-- Populating the table done with SQL seed script.
7 changes: 3 additions & 4 deletions info579/sql/schema/tables/create_table_prohib_words.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
-- Create the stixd_corpus.prohibited_words table
-- Create the stixd_corpus.prohibited_words table (create_table_prohib_words.sql)
-- DROP TABLE IF EXISTS stixd_corpus.prohibited_words;
CREATE TABLE IF NOT EXISTS stixd_corpus.prohibited_words (
word VARCHAR(20) PRIMARY KEY
word VARCHAR(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci PRIMARY KEY
);
-- SHOW CREATE TABLE stixd_corpus.prohibited_words;

-- Populating the table done with SQL seed script.
-- Populating the table done with SQL seed script.
11 changes: 6 additions & 5 deletions info579/sql/schema/tables/create_table_prolog_constraints.sql
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
-- Create the stixd_corpus.prolog_constraints table
-- Create the stixd_corpus.prolog_constraints table (create_table_prolog_constraints.sql)
-- DROP TABLE IF EXISTS stixd_corpus.prolog_constraints;
CREATE TABLE IF NOT EXISTS stixd_corpus.prolog_constraints (
constraint_id INT AUTO_INCREMENT PRIMARY KEY,
description VARCHAR(511),
pattern VARCHAR(255),
message VARCHAR(255),
description VARCHAR(511) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
pattern VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
message VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
auto_correct BOOLEAN
);
-- SHOW CREATE TABLE stixd_corpus.prolog_constraints;
-- SHOW CREATE TABLE stixd_corpus.prolog_constraints;
-- Populating the table done with SQL seed script.
Loading

0 comments on commit 3f446b9

Please sign in to comment.