-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
34 changed files
with
1,225 additions
and
1,120 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
-- Create procedure to check for prohibited word_word_tag intersections (sp_check_prohib_intersections.sql) | ||
DELIMITER // | ||
|
||
CREATE PROCEDURE stixd_corpus.sp_check_prohib_intersections( | ||
IN new_word_tag VARCHAR(12) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, | ||
IN new_word_form VARCHAR(99) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci | ||
) | ||
BEGIN | ||
DECLARE tag_check INT DEFAULT 0; | ||
|
||
-- Check if the new_word_form exists in the lexicon table | ||
IF EXISTS (SELECT 1 FROM stixd_corpus.lexicon WHERE word_form = new_word_form) THEN | ||
-- Check first condition | ||
SELECT COUNT(*) | ||
INTO tag_check | ||
FROM stixd_corpus.prohibited_intersections | ||
WHERE word_tag1 = new_word_tag | ||
AND word_tag2 IN (SELECT word_tag | ||
FROM stixd_corpus.lexicon | ||
WHERE word_form = new_word_form); | ||
|
||
IF tag_check > 0 THEN | ||
SIGNAL SQLSTATE '45000' | ||
SET MESSAGE_TEXT = 'This combination of word_tags is prohibited'; | ||
END IF; | ||
|
||
-- Reset the variable | ||
SET tag_check = 0; | ||
|
||
-- Check second condition | ||
SELECT COUNT(*) | ||
INTO tag_check | ||
FROM stixd_corpus.prohibited_intersections | ||
WHERE word_tag2 = new_word_tag | ||
AND word_tag1 IN (SELECT word_tag | ||
FROM stixd_corpus.lexicon | ||
WHERE word_form = new_word_form); | ||
|
||
IF tag_check > 0 THEN | ||
SIGNAL SQLSTATE '45000' | ||
SET MESSAGE_TEXT = 'This combination of word_tags is prohibited'; | ||
END IF; | ||
END IF; | ||
END; | ||
// | ||
DELIMITER ; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,50 @@ | ||
-- Create procedure to check for prolog constraints ) | ||
-- Create procedure to check for prohibited word_class intersections of word_class and preposition (sp_check_prohib_intersections.sql) | ||
DELIMITER // | ||
CREATE PROCEDURE stixd_corpus.check_prolog_constraints (IN word_form VARCHAR(255)) | ||
|
||
CREATE PROCEDURE stixd_corpus.sp_check_prohib_intersections( | ||
IN new_word_tag VARCHAR(31) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, | ||
IN new_word_form VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci | ||
) | ||
BEGIN | ||
DECLARE done INT DEFAULT FALSE; | ||
DECLARE constraint_violation VARCHAR(255); | ||
DECLARE constraint_message VARCHAR(255); | ||
|
||
DECLARE cur CURSOR FOR SELECT pattern, message FROM stixd_corpus.prolog_constraints; | ||
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE; | ||
|
||
OPEN cur; | ||
|
||
read_loop: LOOP | ||
FETCH cur INTO constraint_violation, constraint_message; | ||
IF done THEN | ||
LEAVE read_loop; | ||
END IF; | ||
IF NOT (word_form REGEXP constraint_violation) THEN | ||
SIGNAL SQLSTATE '45000' SET MESSAGE_TEXT = constraint_message; | ||
END IF; | ||
END LOOP; | ||
|
||
CLOSE cur; | ||
DECLARE class_check INT DEFAULT 0; | ||
|
||
-- Get the word_class and preposition from the lexicon table | ||
DECLARE new_word_class VARCHAR(31) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; | ||
DECLARE new_preposition INT; | ||
|
||
SELECT word_class INTO new_word_class FROM stixd_corpus.word_tags WHERE word_tag = new_word_tag; | ||
SELECT preposition INTO new_preposition FROM stixd_corpus.lexicon WHERE word_form = new_word_form; | ||
|
||
-- Check first condition | ||
SELECT COUNT(*) | ||
INTO class_check | ||
FROM stixd_corpus.prohibited_intersections | ||
WHERE class1 = new_word_class | ||
AND class2 IN (SELECT word_class | ||
FROM stixd_corpus.lexicon | ||
WHERE lex_id = new_preposition); | ||
|
||
IF class_check > 0 THEN | ||
SIGNAL SQLSTATE '45000' | ||
SET MESSAGE_TEXT = 'This combination of word_class and preposition is prohibited'; | ||
END IF; | ||
|
||
-- Reset the variable | ||
SET class_check = 0; | ||
|
||
-- Check second condition | ||
SELECT COUNT(*) | ||
INTO class_check | ||
FROM stixd_corpus.prohibited_intersections | ||
WHERE class2 = new_word_class | ||
AND class1 IN (SELECT word_class | ||
FROM stixd_corpus.lexicon | ||
WHERE lex_id = new_preposition); | ||
|
||
IF class_check > 0 THEN | ||
SIGNAL SQLSTATE '45000' | ||
SET MESSAGE_TEXT = 'This combination of word_class and preposition is prohibited'; | ||
END IF; | ||
END; | ||
// | ||
DELIMITER ; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
-- Create procedure to generate and return hash (sp_gen_hashes.sql) | ||
DELIMITER // | ||
|
||
CREATE PROCEDURE stixd_corpus.sp_gen_hashes( | ||
IN new_word_tag VARCHAR(12) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, | ||
IN new_word_form VARCHAR(99) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, | ||
OUT tag_form_hash VARCHAR(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci | ||
) | ||
BEGIN | ||
-- Generate the SHA256 hash for word_tag and word_form combination | ||
SET tag_form_hash = SHA2(CONCAT(new_word_tag, new_word_form), 256); | ||
END; | ||
// | ||
DELIMITER ; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,11 @@ | ||
-- Create the stixd_corpus.documents table | ||
-- Create the stixd_corpus.documents table (create_table_documents.sql) | ||
-- DROP TABLE IF EXISTS stixd_corpus.documents; | ||
CREATE TABLE IF NOT EXISTS stixd_corpus.documents ( | ||
doc_id INT AUTO_INCREMENT PRIMARY KEY, | ||
raw_text LONGTEXT, | ||
raw_text_hash CHAR(64) UNIQUE, | ||
raw_text LONGTEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, | ||
raw_text_hash CHAR(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci UNIQUE, | ||
proc_text JSON, | ||
metadata JSON | ||
); | ||
-- SHOW CREATE TABLE stixd_corpus.documents; | ||
|
||
-- Populating table done from Python script. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,7 @@ | ||
-- Create the stixd_corpus.genders table | ||
-- Create the stixd_corpus.genders table (create_table_genders.sql) | ||
-- DROP TABLE IF EXISTS stixd_corpus.genders; | ||
CREATE TABLE IF NOT EXISTS stixd_corpus.genders ( | ||
gender VARCHAR(7) PRIMARY KEY | ||
gender VARCHAR(7) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci PRIMARY KEY | ||
); | ||
-- SHOW CREATE TABLE stixd_corpus.genders; | ||
|
||
-- Populating the table done with SQL seed script. | ||
-- Populating the table done with SQL seed script. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,15 @@ | ||
-- Create the stixd_corpus.lexicon table | ||
-- Create the stixd_corpus.lexicon table (create_table_lexicon.sql) | ||
DROP TABLE IF EXISTS stixd_corpus.lexicon; | ||
CREATE TABLE IF NOT EXISTS stixd_corpus.lexicon ( | ||
lex_id INT AUTO_INCREMENT PRIMARY KEY, | ||
word_tag VARCHAR(12), | ||
word_form VARCHAR(99), | ||
logical_symbol VARCHAR(99), | ||
third_arg VARCHAR(15), | ||
tag_form_hash VARCHAR(64) UNIQUE, | ||
word_def TEXT, | ||
word_tag VARCHAR(12) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, | ||
word_form VARCHAR(99) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, | ||
logical_symbol VARCHAR(99) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, | ||
third_arg VARCHAR(15) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, | ||
tag_form_hash VARCHAR(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci UNIQUE, | ||
word_def TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, | ||
synsets JSON, | ||
tagsets JSON | ||
); | ||
-- SHOW CREATE TABLE stixd_corpus.lexicon; | ||
|
||
-- Populating table done from Python script. | ||
-- Populating table done from Python script. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,11 @@ | ||
-- Create the stixd_corpus.obj_doc_jt table | ||
-- DROP TABLE IF EXISTS stixd_corpus.obj_doc_jt; | ||
CREATE TABLE IF NOT EXISTS stixd_corpus.obj_doc_jt ( | ||
obj_id VARCHAR(292), | ||
-- Create the stixd_corpus.doc_sent_jt table (create_table_doc_sent_jt.sql) | ||
-- DROP TABLE IF EXISTS stixd_corpus.doc_sent_jt; | ||
CREATE TABLE IF NOT EXISTS stixd_corpus.doc_sent_jt ( | ||
doc_id INT, | ||
PRIMARY KEY (obj_id, doc_id), | ||
FOREIGN KEY (obj_id) REFERENCES stixd_corpus.stix_objects(obj_id), | ||
FOREIGN KEY (doc_id) REFERENCES stixd_corpus.documents(doc_id) | ||
sent_id INT, | ||
PRIMARY KEY (doc_id, sent_id), | ||
FOREIGN KEY (doc_id) REFERENCES stixd_corpus.documents(doc_id), | ||
FOREIGN KEY (sent_id) REFERENCES stixd_corpus.sentences(sent_id) | ||
); | ||
-- SHOW CREATE TABLE stixd_corpus.obj_doc_jt; | ||
-- SHOW CREATE TABLE stixd_corpus.doc_sent_jt; | ||
-- Populating table done from primary tables. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
9 changes: 4 additions & 5 deletions
9
info579/sql/schema/tables/create_table_prohib_intersections.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,9 @@ | ||
-- Create the stixd_corpus.prohibited_intersections table | ||
-- Create the stixd_corpus.prohibited_intersections table (create_table_prohib_intersections.sql) | ||
-- DROP TABLE IF EXISTS stixd_corpus.prohibited_intersections; | ||
CREATE TABLE IF NOT EXISTS stixd_corpus.prohibited_intersections ( | ||
class1 VARCHAR(31), | ||
class2 VARCHAR(31), | ||
PRIMARY KEY (class1, class2) | ||
word_tag1 VARCHAR(12) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, | ||
word_tag2 VARCHAR(12) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, | ||
PRIMARY KEY (word_tag1, word_tag2) | ||
); | ||
-- SHOW CREATE TABLE stixd_corpus.prohibited_intersections; | ||
|
||
-- Populating the table done with SQL seed script. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,7 @@ | ||
-- Create the stixd_corpus.prohibited_words table | ||
-- Create the stixd_corpus.prohibited_words table (create_table_prohib_words.sql) | ||
-- DROP TABLE IF EXISTS stixd_corpus.prohibited_words; | ||
CREATE TABLE IF NOT EXISTS stixd_corpus.prohibited_words ( | ||
word VARCHAR(20) PRIMARY KEY | ||
word VARCHAR(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci PRIMARY KEY | ||
); | ||
-- SHOW CREATE TABLE stixd_corpus.prohibited_words; | ||
|
||
-- Populating the table done with SQL seed script. | ||
-- Populating the table done with SQL seed script. |
11 changes: 6 additions & 5 deletions
11
info579/sql/schema/tables/create_table_prolog_constraints.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,11 @@ | ||
-- Create the stixd_corpus.prolog_constraints table | ||
-- Create the stixd_corpus.prolog_constraints table (create_table_prolog_constraints.sql) | ||
-- DROP TABLE IF EXISTS stixd_corpus.prolog_constraints; | ||
CREATE TABLE IF NOT EXISTS stixd_corpus.prolog_constraints ( | ||
constraint_id INT AUTO_INCREMENT PRIMARY KEY, | ||
description VARCHAR(511), | ||
pattern VARCHAR(255), | ||
message VARCHAR(255), | ||
description VARCHAR(511) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, | ||
pattern VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, | ||
message VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, | ||
auto_correct BOOLEAN | ||
); | ||
-- SHOW CREATE TABLE stixd_corpus.prolog_constraints; | ||
-- SHOW CREATE TABLE stixd_corpus.prolog_constraints; | ||
-- Populating the table done with SQL seed script. |
Oops, something went wrong.