diff --git a/info579/scripts/bash/create_procedures.sh b/info579/scripts/bash/create_procedures.sh index 8b1d20c..c376dc2 100644 --- a/info579/scripts/bash/create_procedures.sh +++ b/info579/scripts/bash/create_procedures.sh @@ -8,8 +8,15 @@ DB_USER="your_username" DB_PASSWORD="your_password" DB_NAME="stixd_corpus" +# Array of stored procedure scripts in the order they should be run +procedures_scripts=( + "../../sql/procedures/sp_check_prolog_constraints.sql" + "../../sql/procedures/sp_check_lexicon_constraints.sql" +) + # Create procedures echo "Creating procedures..." -mysql -u $DB_USER -p$DB_PASSWORD $DB_NAME < ../../sql/procedures/17_prc_check_prolog_constraints.sql - -echo "Procedures created successfully." \ No newline at end of file +for script in "${procedures_scripts[@]}"; do + echo "Running $script..." + mysql -u $DB_USER -p$DB_PASSWORD $DB_NAME < $script +done \ No newline at end of file diff --git a/info579/scripts/bash/create_tables.sh b/info579/scripts/bash/create_tables.sh index 0d307ff..d034443 100644 --- a/info579/scripts/bash/create_tables.sh +++ b/info579/scripts/bash/create_tables.sh @@ -18,7 +18,6 @@ tables_scripts=( "../../sql/schema/tables/05_create_table_stix_objects.sql" "../../sql/schema/tables/06_create_table_documents.sql" "../../sql/schema/tables/07_create_table_sentences.sql" - "../../sql/schema/tables/08_create_table_validation_results.sql" "../../sql/schema/tables/09_create_table_lexicon.sql" "../../sql/schema/tables/20_create_table_obj_doc_jt.sql" "../../sql/schema/tables/21_create_table_doc_sent_jt.sql" diff --git a/info579/scripts/bash/create_triggers.sh b/info579/scripts/bash/create_triggers.sh index 886f58f..964b584 100644 --- a/info579/scripts/bash/create_triggers.sh +++ b/info579/scripts/bash/create_triggers.sh @@ -10,13 +10,7 @@ DB_NAME="stixd_corpus" # Array of trigger creation scripts in the order they should be run triggers_scripts=( - "../../sql/triggers/11_trg_lexicon_preposition_gender.sql" - "../../sql/triggers/12_trg_lexicon_preposition_check.sql" - "../../sql/triggers/13_trg_lexicon_word_class_prep.sql" - "../../sql/triggers/14_trg_lexicon_word_class_gender.sql" - "../../sql/triggers/15_trg_lexicon_prohibited_words.sql" - "../../sql/triggers/16_trg_lexicon_prohibited_intersections.sql" - "../../sql/triggers/18_trg_lexicon_prolog_constraints.sql" + "../../sql/triggers/master_triggers_lexicon.sql" ) # Create triggers diff --git a/info579/scripts/bash/setup_database.sh b/info579/scripts/bash/setup_database.sh index f46bfb3..93db57d 100644 --- a/info579/scripts/bash/setup_database.sh +++ b/info579/scripts/bash/setup_database.sh @@ -10,6 +10,6 @@ DB_NAME="stixd_corpus" # Create the database echo "Creating database..." -mysql -u $DB_USER -p$DB_PASSWORD < ../../sql/setup/00_create_database.sql +mysql -u $DB_USER -p$DB_PASSWORD < ../../sql/setup/create_database.sql echo "Database created successfully." \ No newline at end of file diff --git a/info579/scripts/python/create_tables.py b/info579/scripts/python/create_tables.py index 5bba7ec..3636422 100644 --- a/info579/scripts/python/create_tables.py +++ b/info579/scripts/python/create_tables.py @@ -13,7 +13,6 @@ def main(): "../../sql/schema/tables/05_create_table_stix_objects.sql", "../../sql/schema/tables/06_create_table_documents.sql", "../../sql/schema/tables/07_create_table_sentences.sql", - "../../sql/schema/tables/08_create_table_validation_results.sql", "../../sql/schema/tables/09_create_table_lexicon.sql", "../../sql/schema/tables/20_create_table_obj_doc_jt.sql", "../../sql/schema/tables/21_create_table_doc_sent_jt.sql", diff --git a/info579/scripts/python/setup_database.py b/info579/scripts/python/setup_database.py index 1c0732e..5b6fa2b 100644 --- a/info579/scripts/python/setup_database.py +++ b/info579/scripts/python/setup_database.py @@ -4,7 +4,7 @@ def run_sql_script(script_path): subprocess.run(["mysql", "-u", "your_username", "-pyour_password", "<", script_path], shell=True) def main(): - script_path = "../../sql/setup/00_create_database.sql" + script_path = "../../sql/setup/create_database.sql" print(f"Running {script_path}...") run_sql_script(script_path) print("Database created successfully.") diff --git a/info579/sql/procedures/sp_check_lexicon_constraints.sql b/info579/sql/procedures/sp_check_lexicon_constraints.sql new file mode 100644 index 0000000..d8233ca --- /dev/null +++ b/info579/sql/procedures/sp_check_lexicon_constraints.sql @@ -0,0 +1,79 @@ +-- Create procedure to check for prolog constraints +DELIMITER // +CREATE PROCEDURE stixd_corpus.check_prolog_constraints(IN base_form VARCHAR(255)) +BEGIN + DECLARE done INT DEFAULT FALSE; + DECLARE constraint_violation VARCHAR(255); + DECLARE constraint_message VARCHAR(255); + + DECLARE cur CURSOR FOR SELECT pattern, message FROM stixd_corpus.prolog_constraints; + DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE; + + OPEN cur; + + read_loop: LOOP + FETCH cur INTO constraint_violation, constraint_message; + IF done THEN + LEAVE read_loop; + END IF; + IF NOT (base_form REGEXP constraint_violation) THEN + SIGNAL SQLSTATE '45000' SET MESSAGE_TEXT = constraint_message; + END IF; + END LOOP; + + CLOSE cur; +END; +// +DELIMITER ; + +-- Create main procedure to check lexicon constraints +DELIMITER // +CREATE PROCEDURE stixd_corpus.sp_check_lexicon_constraints( + IN new_base_form VARCHAR(255), + IN new_word_class VARCHAR(31), + IN new_preposition INT, + IN new_gender VARCHAR(7) +) +BEGIN + -- Step 1: Check prolog constraints + CALL stixd_corpus.check_prolog_constraints(new_base_form); + + -- Step 2: Check prohibited words + DECLARE prohibited_word_found INT DEFAULT 0; + SELECT COUNT(*) + INTO prohibited_word_found + FROM stixd_corpus.prohibited_words + WHERE word = new_base_form; + + IF prohibited_word_found > 0 THEN + SIGNAL SQLSTATE '45000' + SET MESSAGE_TEXT = 'base_form contains a prohibited word'; + END IF; + + -- Step 3: Check prohibited intersections + CALL stixd_corpus.check_prohib_intersections(new_base_form); + + -- Step 4: Ensure preposition is not null if word_class starts with adj_tr or dv_ + IF (new_word_class LIKE 'adj_tr%' OR new_word_class LIKE 'dv_%') THEN + IF (new_preposition IS NULL) THEN + SIGNAL SQLSTATE '45000' + SET MESSAGE_TEXT = 'preposition must be NOT NULL if word_class starts with "adj_tr" or "dv_"'; + END IF; + END IF; + + -- Step 5: Ensure gender is not null if word_class starts with noun_ or pn + IF (new_word_class LIKE 'noun_%' OR new_word_class LIKE 'pn%') THEN + IF (new_gender IS NULL) THEN + SIGNAL SQLSTATE '45000' + SET MESSAGE_TEXT = 'gender must be NOT NULL if word_class starts with "noun_" or "pn"'; + END IF; + END IF; + + -- Step 6: Enforce the mutual exclusivity of preposition and gender + IF (new_preposition IS NOT NULL AND new_gender IS NOT NULL) THEN + SIGNAL SQLSTATE '45000' + SET MESSAGE_TEXT = 'preposition and gender cannot both be NOT NULL'; + END IF; +END// +// +DELIMITER ; diff --git a/info579/sql/procedures/17_pr_check_prolog_constraints.sql b/info579/sql/procedures/sp_check_prolog_constraints.sql similarity index 91% rename from info579/sql/procedures/17_pr_check_prolog_constraints.sql rename to info579/sql/procedures/sp_check_prolog_constraints.sql index a13a879..57b5a8d 100644 --- a/info579/sql/procedures/17_pr_check_prolog_constraints.sql +++ b/info579/sql/procedures/sp_check_prolog_constraints.sql @@ -1,4 +1,4 @@ --- Create procedure to check for prolog constraints (stix_corpus_18.sql) +-- Create procedure to check for prolog constraints ) DELIMITER // CREATE PROCEDURE stixd_corpus.check_prolog_constraints (IN base_form VARCHAR(255)) BEGIN diff --git a/info579/sql/procedures/sp_run_seed_scripts.sql b/info579/sql/procedures/sp_run_seed_scripts.sql new file mode 100644 index 0000000..35184aa --- /dev/null +++ b/info579/sql/procedures/sp_run_seed_scripts.sql @@ -0,0 +1,42 @@ +DELIMITER // + +CREATE PROCEDURE stixd_corpus.run_seed_scripts() +BEGIN + -- Seed Genders + SET @sql = 'SOURCE ../seeds/seed_genders.sql'; + PREPARE stmt FROM @sql; + EXECUTE stmt; + DEALLOCATE PREPARE stmt; + + -- Seed Prohibited Words + SET @sql = 'SOURCE ../seeds/seed_prohib_words.sql'; + PREPARE stmt FROM @sql; + EXECUTE stmt; + DEALLOCATE PREPARE stmt; + + -- Seed Prohibited Intersections + SET @sql = 'SOURCE ../seeds/seed_prohib_intersections.sql'; + PREPARE stmt FROM @sql; + EXECUTE stmt; + DEALLOCATE PREPARE stmt; + + -- Seed Prolog Constraints + SET @sql = 'SOURCE ../seeds/seed_prolog_constraints.sql'; + PREPARE stmt FROM @sql; + EXECUTE stmt; + DEALLOCATE PREPARE stmt; + + -- Seed Word Tags + SET @sql = 'SOURCE ../seeds/seed_word_tags.sql'; + PREPARE stmt FROM @sql; + EXECUTE stmt; + DEALLOCATE PREPARE stmt; + + -- Seed Special Characters + SET @sql = 'SOURCE ../seeds/seed_spec_chars.sql'; + PREPARE stmt FROM @sql; + EXECUTE stmt; + DEALLOCATE PREPARE stmt; +END // + +DELIMITER ; diff --git a/info579/sql/schema/tables/00_create_table_genders.sql b/info579/sql/schema/tables/00_create_table_genders.sql deleted file mode 100644 index 31c2045..0000000 --- a/info579/sql/schema/tables/00_create_table_genders.sql +++ /dev/null @@ -1,6 +0,0 @@ --- Create the genders table in the specified database -DROP TABLE IF EXISTS stixd_corpus.genders; -CREATE TABLE stixd_corpus.genders ( - gender VARCHAR(7) PRIMARY KEY -); --- SHOW CREATE TABLE stixd_corpus.genders; \ No newline at end of file diff --git a/info579/sql/schema/tables/01_create_table_special_chars.sql b/info579/sql/schema/tables/01_create_table_special_chars.sql deleted file mode 100644 index 9d186b9..0000000 --- a/info579/sql/schema/tables/01_create_table_special_chars.sql +++ /dev/null @@ -1,8 +0,0 @@ --- Create the special_characters table in the specified database -DROP TABLE IF EXISTS stixd_corpus.special_characters; -CREATE TABLE stixd_corpus.special_characters ( - char_id INT AUTO_INCREMENT PRIMARY KEY, - spec_char CHAR(1) UNIQUE, - esc_seq VARCHAR(10) -); --- SHOW CREATE TABLE stixd_corpus.special_characters; \ No newline at end of file diff --git a/info579/sql/schema/tables/02_create_table_prohib_words.sql b/info579/sql/schema/tables/02_create_table_prohib_words.sql deleted file mode 100644 index e5b9ab3..0000000 --- a/info579/sql/schema/tables/02_create_table_prohib_words.sql +++ /dev/null @@ -1,6 +0,0 @@ --- Create the prohibited_words table in the specified database -DROP TABLE IF EXISTS stixd_corpus.prohibited_words; -CREATE TABLE stixd_corpus.prohibited_words ( - word VARCHAR(20) PRIMARY KEY -); --- SHOW CREATE TABLE stixd_corpus.prohibited_words; \ No newline at end of file diff --git a/info579/sql/schema/tables/03_create_table_prohib_intersections.sql b/info579/sql/schema/tables/03_create_table_prohib_intersections.sql deleted file mode 100644 index 05975e8..0000000 --- a/info579/sql/schema/tables/03_create_table_prohib_intersections.sql +++ /dev/null @@ -1,8 +0,0 @@ --- Create the prohibited_intersections table in the specified database -DROP TABLE IF EXISTS stixd_corpus.prohibited_intersections; -CREATE TABLE stixd_corpus.prohibited_intersections ( - class1 VARCHAR(31), - class2 VARCHAR(31), - PRIMARY KEY (class1, class2) -); --- SHOW CREATE TABLE stixd_corpus.prohibited_intersections; \ No newline at end of file diff --git a/info579/sql/schema/tables/06_create_table_documents.sql b/info579/sql/schema/tables/06_create_table_documents.sql deleted file mode 100644 index c704096..0000000 --- a/info579/sql/schema/tables/06_create_table_documents.sql +++ /dev/null @@ -1,11 +0,0 @@ --- Create the documents table in the specified database -DROP TABLE IF EXISTS stixd_corpus.documents; -CREATE TABLE stixd_corpus.documents ( - doc_id INT AUTO_INCREMENT PRIMARY KEY, - raw_text LONGTEXT, - proc_text JSON, - metadata JSON -); -SHOW CREATE TABLE stixd_corpus.documents; - --- Populating table done from Python script. \ No newline at end of file diff --git a/info579/sql/schema/tables/07_create_table_sentences.sql b/info579/sql/schema/tables/07_create_table_sentences.sql deleted file mode 100644 index 6feece1..0000000 --- a/info579/sql/schema/tables/07_create_table_sentences.sql +++ /dev/null @@ -1,10 +0,0 @@ --- Create the sentences table in the specified database -DROP TABLE IF EXISTS stixd_corpus.sentences; -CREATE TABLE stixd_corpus.sentences ( - sent_id INT AUTO_INCREMENT PRIMARY KEY, - raw_sent TEXT, - proc_sent JSON -); -SHOW CREATE TABLE stixd_corpus.sentences; - --- Populating table done from Python script. \ No newline at end of file diff --git a/info579/sql/schema/tables/creat_table_word_tags.sql b/info579/sql/schema/tables/creat_table_word_tags.sql new file mode 100644 index 0000000..a08b03d --- /dev/null +++ b/info579/sql/schema/tables/creat_table_word_tags.sql @@ -0,0 +1,14 @@ +-- Create the stixd_corpus.word_tags table in the specified database +-- DROP TABLE IF EXISTS stixd_corpus.word_tags; +CREATE TABLE IF NOT EXISTS stixd_corpus.word_tags ( + id INT AUTO_INCREMENT PRIMARY KEY, + word_tag VARCHAR(12) NOT NULL, + first_arg VARCHAR(31) NOT NULL, + second_arg VARCHAR(31) NOT NULL, + third_arg VARCHAR(31) DEFAULT NULL, + word_class VARCHAR(31) NOT NULL +); + +-- SHOW CREATE TABLE stixd_corpus.word_tags + +-- Populating the table done with SQL seed script. \ No newline at end of file diff --git a/info579/sql/schema/tables/22_create_table_doc_lex_jt.sql b/info579/sql/schema/tables/create_table_doc_lex_jt.sql similarity index 62% rename from info579/sql/schema/tables/22_create_table_doc_lex_jt.sql rename to info579/sql/schema/tables/create_table_doc_lex_jt.sql index d21e8a6..a832658 100644 --- a/info579/sql/schema/tables/22_create_table_doc_lex_jt.sql +++ b/info579/sql/schema/tables/create_table_doc_lex_jt.sql @@ -1,6 +1,6 @@ --- Create the doc_lex_jt table in the specified database -DROP TABLE IF EXISTS stixd_corpus.doc_lex_jt; -CREATE TABLE stixd_corpus.doc_lex_jt ( +-- Create the stixd_corpus.doc_lex_jt table +-- DROP TABLE IF EXISTS stixd_corpus.doc_lex_jt; +CREATE TABLE IF NOT EXISTS stixd_corpus.doc_lex_jt ( doc_id INT, lex_id INT, PRIMARY KEY (doc_id, lex_id), diff --git a/info579/sql/schema/tables/21_create_table_doc_sent_jt.sql b/info579/sql/schema/tables/create_table_doc_sent_jt.sql similarity index 60% rename from info579/sql/schema/tables/21_create_table_doc_sent_jt.sql rename to info579/sql/schema/tables/create_table_doc_sent_jt.sql index e9dff5c..24a5684 100644 --- a/info579/sql/schema/tables/21_create_table_doc_sent_jt.sql +++ b/info579/sql/schema/tables/create_table_doc_sent_jt.sql @@ -1,6 +1,6 @@ --- Create the doc_sent_jt table in the specified database (stix_corpus_20.sql) -DROP TABLE IF EXISTS stixd_corpus.doc_sent_jt; -CREATE TABLE stixd_corpus.doc_sent_jt ( +-- Create the stixd_corpus.doc_sent_jt table +-- DROP TABLE IF EXISTS stixd_corpus.doc_sent_jt; +CREATE TABLE IF NOT EXISTS stixd_corpus.doc_sent_jt ( doc_id INT, sent_id INT, PRIMARY KEY (doc_id, sent_id), diff --git a/info579/sql/schema/tables/create_table_documents.sql b/info579/sql/schema/tables/create_table_documents.sql new file mode 100644 index 0000000..2b29419 --- /dev/null +++ b/info579/sql/schema/tables/create_table_documents.sql @@ -0,0 +1,12 @@ +-- Create the stixd_corpus.documents table +-- DROP TABLE IF EXISTS stixd_corpus.documents; +CREATE TABLE IF NOT EXISTS stixd_corpus.documents ( + doc_id INT AUTO_INCREMENT PRIMARY KEY, + raw_text LONGTEXT, + raw_text_hash CHAR(64) UNIQUE, + proc_text JSON, + metadata JSON +); +-- SHOW CREATE TABLE stixd_corpus.documents; + +-- Populating table done from Python script. \ No newline at end of file diff --git a/info579/sql/schema/tables/create_table_genders.sql b/info579/sql/schema/tables/create_table_genders.sql new file mode 100644 index 0000000..a97538b --- /dev/null +++ b/info579/sql/schema/tables/create_table_genders.sql @@ -0,0 +1,8 @@ +-- Create the stixd_corpus.genders table +-- DROP TABLE IF EXISTS stixd_corpus.genders; +CREATE TABLE IF NOT EXISTS stixd_corpus.genders ( + gender VARCHAR(7) PRIMARY KEY +); +-- SHOW CREATE TABLE stixd_corpus.genders; + +-- Populating the table done with SQL seed script. \ No newline at end of file diff --git a/info579/sql/schema/tables/09_create_table_lexicon.sql b/info579/sql/schema/tables/create_table_lexicon.sql similarity index 78% rename from info579/sql/schema/tables/09_create_table_lexicon.sql rename to info579/sql/schema/tables/create_table_lexicon.sql index 4f32726..cd81c97 100644 --- a/info579/sql/schema/tables/09_create_table_lexicon.sql +++ b/info579/sql/schema/tables/create_table_lexicon.sql @@ -1,6 +1,6 @@ --- Create the lexicon table in the specified database -DROP TABLE IF EXISTS stixd_corpus.lexicon; -CREATE TABLE stixd_corpus.lexicon ( +-- Create the stixd_corpus.lexicon table +-- DROP TABLE IF EXISTS stixd_corpus.lexicon; +CREATE TABLE IF NOT EXISTS stixd_corpus.lexicon ( lex_id INT AUTO_INCREMENT PRIMARY KEY, word_class VARCHAR(31), base_form VARCHAR(255), diff --git a/info579/sql/schema/tables/20_create_table_obj_doc_jt.sql b/info579/sql/schema/tables/create_table_obj_doc_jt.sql similarity index 64% rename from info579/sql/schema/tables/20_create_table_obj_doc_jt.sql rename to info579/sql/schema/tables/create_table_obj_doc_jt.sql index 8d7f0d2..85c9836 100644 --- a/info579/sql/schema/tables/20_create_table_obj_doc_jt.sql +++ b/info579/sql/schema/tables/create_table_obj_doc_jt.sql @@ -1,6 +1,6 @@ --- Create the obj_doc_jt table in the specified database -DROP TABLE IF EXISTS stixd_corpus.obj_doc_jt; -CREATE TABLE stixd_corpus.obj_doc_jt ( +-- Create the stixd_corpus.obj_doc_jt table +-- DROP TABLE IF EXISTS stixd_corpus.obj_doc_jt; +CREATE TABLE IF NOT EXISTS stixd_corpus.obj_doc_jt ( obj_id VARCHAR(292), doc_id INT, PRIMARY KEY (obj_id, doc_id), diff --git a/info579/sql/schema/tables/24_create_table_obj_lex_jt.sql b/info579/sql/schema/tables/create_table_obj_lex_jt.sql similarity index 63% rename from info579/sql/schema/tables/24_create_table_obj_lex_jt.sql rename to info579/sql/schema/tables/create_table_obj_lex_jt.sql index ab604fa..9576dfb 100644 --- a/info579/sql/schema/tables/24_create_table_obj_lex_jt.sql +++ b/info579/sql/schema/tables/create_table_obj_lex_jt.sql @@ -1,6 +1,6 @@ --- Create the obj_lex_jt table in the specified database -DROP TABLE IF EXISTS stixd_corpus.obj_lex_jt; -CREATE TABLE stixd_corpus.obj_lex_jt ( +-- Create the stixd_corpus.obj_lex_jt table +-- DROP TABLE IF EXISTS stixd_corpus.obj_lex_jt; +CREATE TABLE IF NOT EXISTS stixd_corpus.obj_lex_jt ( obj_id VARCHAR(292), lex_id INT, PRIMARY KEY (obj_id, lex_id), diff --git a/info579/sql/schema/tables/create_table_prohib_intersections.sql b/info579/sql/schema/tables/create_table_prohib_intersections.sql new file mode 100644 index 0000000..32ddac6 --- /dev/null +++ b/info579/sql/schema/tables/create_table_prohib_intersections.sql @@ -0,0 +1,10 @@ +-- Create the stixd_corpus.prohibited_intersections table +-- DROP TABLE IF EXISTS stixd_corpus.prohibited_intersections; +CREATE TABLE IF NOT EXISTS stixd_corpus.prohibited_intersections ( + class1 VARCHAR(31), + class2 VARCHAR(31), + PRIMARY KEY (class1, class2) +); +-- SHOW CREATE TABLE stixd_corpus.prohibited_intersections; + +-- Populating the table done with SQL seed script. \ No newline at end of file diff --git a/info579/sql/schema/tables/create_table_prohib_words.sql b/info579/sql/schema/tables/create_table_prohib_words.sql new file mode 100644 index 0000000..ed39720 --- /dev/null +++ b/info579/sql/schema/tables/create_table_prohib_words.sql @@ -0,0 +1,8 @@ +-- Create the stixd_corpus.prohibited_words table +-- DROP TABLE IF EXISTS stixd_corpus.prohibited_words; +CREATE TABLE IF NOT EXISTS stixd_corpus.prohibited_words ( + word VARCHAR(20) PRIMARY KEY +); +-- SHOW CREATE TABLE stixd_corpus.prohibited_words; + +-- Populating the table done with SQL seed script. \ No newline at end of file diff --git a/info579/sql/schema/tables/04_create_table_prolog_constraints.sql b/info579/sql/schema/tables/create_table_prolog_constraints.sql similarity index 55% rename from info579/sql/schema/tables/04_create_table_prolog_constraints.sql rename to info579/sql/schema/tables/create_table_prolog_constraints.sql index 2f43170..7cfb81e 100644 --- a/info579/sql/schema/tables/04_create_table_prolog_constraints.sql +++ b/info579/sql/schema/tables/create_table_prolog_constraints.sql @@ -1,6 +1,6 @@ --- Create the prolog_constraints table in the specified database -DROP TABLE IF EXISTS stixd_corpus.prolog_constraints; -CREATE TABLE stixd_corpus.prolog_constraints ( +-- Create the stixd_corpus.prolog_constraints table +-- DROP TABLE IF EXISTS stixd_corpus.prolog_constraints; +CREATE TABLE IF NOT EXISTS stixd_corpus.prolog_constraints ( constraint_id INT AUTO_INCREMENT PRIMARY KEY, description VARCHAR(511), pattern VARCHAR(255), diff --git a/info579/sql/schema/tables/23_create_table_sent_lex_jt.sql b/info579/sql/schema/tables/create_table_sent_lex_jt.sql similarity index 62% rename from info579/sql/schema/tables/23_create_table_sent_lex_jt.sql rename to info579/sql/schema/tables/create_table_sent_lex_jt.sql index 63a1b71..ae0f60d 100644 --- a/info579/sql/schema/tables/23_create_table_sent_lex_jt.sql +++ b/info579/sql/schema/tables/create_table_sent_lex_jt.sql @@ -1,6 +1,6 @@ --- Create the sent_lex_jt table in the specified database -DROP TABLE IF EXISTS stixd_corpus.sent_lex_jt; -CREATE TABLE stixd_corpus.sent_lex_jt ( +-- Create the stixd_corpus.sent_lex_jt table +-- DROP TABLE IF EXISTS stixd_corpus.sent_lex_jt; +CREATE TABLE IF NOT EXISTS stixd_corpus.sent_lex_jt ( sent_id INT, lex_id INT, PRIMARY KEY (sent_id, lex_id), diff --git a/info579/sql/schema/tables/create_table_sentences.sql b/info579/sql/schema/tables/create_table_sentences.sql new file mode 100644 index 0000000..f50c81a --- /dev/null +++ b/info579/sql/schema/tables/create_table_sentences.sql @@ -0,0 +1,10 @@ +-- Create the stixd_corpus.sentences table +-- DROP TABLE IF EXISTS stixd_corpus.sentences; +CREATE TABLE IF NOT EXISTS stixd_corpus.sentences ( + sent_id INT AUTO_INCREMENT PRIMARY KEY, + raw_sent TEXT, + proc_sent JSON +); +-- SHOW CREATE TABLE stixd_corpus.sentences; + +-- Populating table done from Python script. \ No newline at end of file diff --git a/info579/sql/schema/tables/create_table_special_chars.sql b/info579/sql/schema/tables/create_table_special_chars.sql new file mode 100644 index 0000000..cf95179 --- /dev/null +++ b/info579/sql/schema/tables/create_table_special_chars.sql @@ -0,0 +1,10 @@ +-- Create the stixd_corpus.special_characters table +-- DROP TABLE IF EXISTS stixd_corpus.special_characters; +CREATE TABLE IF NOT EXISTS stixd_corpus.special_characters ( + char_id INT AUTO_INCREMENT PRIMARY KEY, + spec_char CHAR(1) UNIQUE, + esc_seq VARCHAR(10) +); +-- SHOW CREATE TABLE stixd_corpus.special_characters; + +-- Populating the table done with SQL seed script. \ No newline at end of file diff --git a/info579/sql/schema/tables/05_create_table_stix_objects.sql b/info579/sql/schema/tables/create_table_stix_objects.sql similarity index 69% rename from info579/sql/schema/tables/05_create_table_stix_objects.sql rename to info579/sql/schema/tables/create_table_stix_objects.sql index d8ec772..e12d435 100644 --- a/info579/sql/schema/tables/05_create_table_stix_objects.sql +++ b/info579/sql/schema/tables/create_table_stix_objects.sql @@ -1,6 +1,6 @@ --- Create the stix_objects table in the specified database -DROP TABLE IF EXISTS stixd_corpus.stix_objects; -CREATE TABLE stixd_corpus.stix_objects ( +-- Create the stixd_corpus.stix_objects table +-- DROP TABLE IF EXISTS stixd_corpus.stix_objects; +CREATE TABLE IF NOT EXISTS stixd_corpus.stix_objects ( obj_id VARCHAR(292) PRIMARY KEY, type VARCHAR(255), created_by_ref VARCHAR(255), @@ -20,4 +20,6 @@ CREATE TABLE stixd_corpus.stix_objects ( duplicate_of JSON, related_to JSON ); --- SHOW CREATE TABLE stixd_corpus.stix_objects; \ No newline at end of file +-- SHOW CREATE TABLE stixd_corpus.stix_objects; + +-- Populating table done from Python script. \ No newline at end of file diff --git a/info579/sql/schema/tables/08_create_table_validation_results.sql b/info579/sql/schema/tables/create_table_validation_results.sql similarity index 50% rename from info579/sql/schema/tables/08_create_table_validation_results.sql rename to info579/sql/schema/tables/create_table_validation_results.sql index 9c85999..c7bca5c 100644 --- a/info579/sql/schema/tables/08_create_table_validation_results.sql +++ b/info579/sql/schema/tables/create_table_validation_results.sql @@ -1,12 +1,12 @@ --- Create the validation_results table in the specified database -DROP TABLE IF EXISTS stixd_corpus.validation_results; -CREATE TABLE stixd_corpus.validation_results ( +-- Create the stixd_corpus.validation_results table +-- DROP TABLE IF EXISTS stixd_corpus.validation_results; +CREATE TABLE IF NOT EXISTS stixd_corpus.validation_results ( val_id INT AUTO_INCREMENT PRIMARY KEY, obj_id VARCHAR(292), is_valid BOOLEAN, val_errors JSON, FOREIGN KEY (obj_id) REFERENCES stixd_corpus.stix_objects(obj_id) ); -SHOW CREATE TABLE stixd_corpus.validation_results; +-- SHOW CREATE TABLE stixd_corpus.validation_results; -- Populating table done from Python script. \ No newline at end of file diff --git a/info579/sql/seeds/31_seed_genders.sql b/info579/sql/seeds/31_seed_genders.sql deleted file mode 100644 index 6c37850..0000000 --- a/info579/sql/seeds/31_seed_genders.sql +++ /dev/null @@ -1,5 +0,0 @@ --- Populate the genders table from ACE 6.7 Lexicon Specification -INSERT INTO stixd_corpus.genders (gender) VALUES -('undef'), ('neutr'), ('human'), ('masc'), ('fem'); - -SELECT * FROM stixd_corpus.genders; \ No newline at end of file diff --git a/info579/sql/seeds/32_seed_special_chars.sql b/info579/sql/seeds/32_seed_special_chars.sql deleted file mode 100644 index 2eb521d..0000000 --- a/info579/sql/seeds/32_seed_special_chars.sql +++ /dev/null @@ -1,8 +0,0 @@ --- Insert the special characters and their escape sequences -INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) VALUES -('\\', '\\\\'), ('\'', '\'\''), ('"', '\\"'), (',', '\\,'), ('.', '\\.'), -(';', '\\;'), (':', '\\:'), ('!', '\\!'), ('(', '\\('), (')', '\\)'), -('[', '\\['), (']', '\\]'), ('{', '\\{'), ('}', '\\}'), ('|', '\\|'), ('@', '\\@'), -('#', '\\#'), ('&', '\\&'), ('^', '\\^'), ('*', '\\*'), ('?', '\\?'), ('/', '\\/'); - --- SELECT * FROM stixd_corpus.special_characters; \ No newline at end of file diff --git a/info579/sql/seeds/33_seed_prohib_words.sql b/info579/sql/seeds/33_seed_prohib_words.sql deleted file mode 100644 index 65e7f9c..0000000 --- a/info579/sql/seeds/33_seed_prohib_words.sql +++ /dev/null @@ -1,13 +0,0 @@ --- Insert the prohibited words from ACE 6.7 Lexicon Specification -INSERT INTO stixd_corpus.prohibited_words (word) VALUES -('null'), ('zero'), ('one'), ('two'), ('three'), ('four'), ('five'), ('six'), -('seven'), ('eight'), ('nine'), ('ten'), ('eleven'), ('twelve'), ('dozen'), ('there'), -('and'), ('or'), ('not'), ('that'), ('than'), ('of'), ('if'), ('then'), ('such'), -('be'), ('provably'), ('more'), ('most'), ('are'), ('is'), ('the'), ('a'), ('an'), -('some'), ('no'), ('every'), ('all'), ('each'), ('which'), ('its'), ('his'), ('her'), -('their'), ('whose'), ('it'), ('he'), ('she'), ('they'), ('him'), ('them'), -('itself'), ('himself'), ('herself'), ('themselves'), ('someone'), ('somebody'), -('something'), ('nobody'), ('nothing'), ('everyone'), ('everybody'), ('everything'), -('what'), ('who'), ('how'), ('where'), ('when'); - --- SELECT * FROM stixd_corpus.prohibited_words; \ No newline at end of file diff --git a/info579/sql/seeds/34_seed_prohib_intersections.sql b/info579/sql/seeds/34_seed_prohib_intersections.sql deleted file mode 100644 index 5083f41..0000000 --- a/info579/sql/seeds/34_seed_prohib_intersections.sql +++ /dev/null @@ -1,10 +0,0 @@ --- Insert the prohibited intersections from ACE 6.7 Lexicon Specification -INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) VALUES -('adv', 'noun_sg'), ('adv', 'noun_pl'), ('adv', 'noun_mass'), ('adv', 'iv_finsg'), -('adv', 'iv_infpl'), ('adj_itr', 'adj_tr'), ('pndef_sg', 'noun_sg'), -('pndef_sg', 'noun_pl'), ('pndef_sg', 'noun_mass'), ('pndef_pl', 'noun_sg'), -('pndef_pl', 'noun_pl'), ('pndef_pl', 'noun_mass'), ('prep', 'adj_itr'), -('prep', 'adj_itr_comp'), ('prep', 'adj_itr_sup'), ('prep', 'tv_finsg'), -('prep', 'tv_infpl'), ('prep', 'tv_pp'); - --- SELECT * FROM stixd_corpus.prohibited_intersections; \ No newline at end of file diff --git a/info579/sql/seeds/35_seed_prolog_constraints.sql b/info579/sql/seeds/35_seed_prolog_constraints.sql deleted file mode 100644 index 13acc71..0000000 --- a/info579/sql/seeds/35_seed_prolog_constraints.sql +++ /dev/null @@ -1,9 +0,0 @@ --- Insert the constraints with auto_correct flag -INSERT INTO stixd_corpus.prolog_constraints (description, pattern, message, auto_correct) VALUES -('The word form must contain only lower and upper case letters (a-z, A-Z), digits (0-9), hyphens (-), underscores (_), dollar signs ($), and degree signs (°).', '^[a-zA-Z_$°][a-zA-Z0-9_$°-]*$', 'Invalid characters in word_form. Allowed characters are: a-z, A-Z, 0-9, -, _, $, °.', TRUE), -('The first character must not be a digit or a hyphen.', '^[^0-9-]', 'The first character must not be a digit or a hyphen.', TRUE), -('Blank spaces are not allowed. Use hyphens instead of blank spaces.', '^.*[^ ].*$', 'Blank spaces are not allowed in word_form. Use hyphens instead.', TRUE), -('Symbols special for Prolog (e.g., apostrophe) must be escaped.', '', 'Symbols special for Prolog (e.g., apostrophe) must be escaped.', TRUE), -('Capitalized words (e.g., proper names) must be in quotes, otherwise they would be considered variables by Prolog.', '', 'Capitalized words (e.g., proper names) must be in quotes.', TRUE); - --- SELECT * FROM stixd_corpus.prolog_constraints; \ No newline at end of file diff --git a/info579/sql/seeds/seed_genders.sql b/info579/sql/seeds/seed_genders.sql new file mode 100644 index 0000000..2ae192b --- /dev/null +++ b/info579/sql/seeds/seed_genders.sql @@ -0,0 +1,33 @@ +-- Populate the genders table if they do not exist per ACE 6.7 Lexicon Specification +INSERT INTO stixd_corpus.genders (gender) +SELECT * FROM (SELECT 'undef') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.genders WHERE gender = 'undef' +); + +INSERT INTO stixd_corpus.genders (gender) +SELECT * FROM (SELECT 'neutr') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.genders WHERE gender = 'neutr' +); + +INSERT INTO stixd_corpus.genders (gender) +SELECT * FROM (SELECT 'human') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.genders WHERE gender = 'human' +); + +INSERT INTO stixd_corpus.genders (gender) +SELECT * FROM (SELECT 'masc') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.genders WHERE gender = 'masc' +); + +INSERT INTO stixd_corpus.genders (gender) +SELECT * FROM (SELECT 'fem') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.genders WHERE gender = 'fem' +); + +-- Verify the data has been inserted correctly +-- SELECT * FROM stixd_corpus.genders; diff --git a/info579/sql/seeds/seed_prohib_intersections.sql b/info579/sql/seeds/seed_prohib_intersections.sql new file mode 100644 index 0000000..923b79d --- /dev/null +++ b/info579/sql/seeds/seed_prohib_intersections.sql @@ -0,0 +1,111 @@ +-- Insert the prohibited intersections if they do not exist per ACE 6.7 Lexicon Specification +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'adv', 'noun_sg') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'adv' AND class2 = 'noun_sg' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'adv', 'noun_pl') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'adv' AND class2 = 'noun_pl' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'adv', 'noun_mass') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'adv' AND class2 = 'noun_mass' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'adv', 'iv_finsg') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'adv' AND class2 = 'iv_finsg' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'adv', 'iv_infpl') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'adv' AND class2 = 'iv_infpl' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'adj_itr', 'adj_tr') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'adj_itr' AND class2 = 'adj_tr' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'pndef_sg', 'noun_sg') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'pndef_sg' AND class2 = 'noun_sg' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'pndef_sg', 'noun_pl') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'pndef_sg' AND class2 = 'noun_pl' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'pndef_sg', 'noun_mass') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'pndef_sg' AND class2 = 'noun_mass' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'pndef_pl', 'noun_sg') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'pndef_pl' AND class2 = 'noun_sg' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'pndef_pl', 'noun_pl') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'pndef_pl' AND class2 = 'noun_pl' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'pndef_pl', 'noun_mass') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'pndef_pl' AND class2 = 'noun_mass' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'prep', 'adj_itr') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'prep' AND class2 = 'adj_itr' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'prep', 'adj_itr_comp') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'prep' AND class2 = 'adj_itr_comp' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'prep', 'adj_itr_sup') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'prep' AND class2 = 'adj_itr_sup' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'prep', 'tv_finsg') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'prep' AND class2 = 'tv_finsg' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'prep', 'tv_infpl') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'prep' AND class2 = 'tv_infpl' +); + +INSERT INTO stixd_corpus.prohibited_intersections (class1, class2) +SELECT * FROM (SELECT 'prep', 'tv_pp') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_intersections WHERE class1 = 'prep' AND class2 = 'tv_pp' +); + +-- Verify the data has been inserted correctly +-- SELECT * FROM stixd_corpus.prohibited_intersections; diff --git a/info579/sql/seeds/seed_prohib_words.sql b/info579/sql/seeds/seed_prohib_words.sql new file mode 100644 index 0000000..4706925 --- /dev/null +++ b/info579/sql/seeds/seed_prohib_words.sql @@ -0,0 +1,411 @@ +-- Insert the prohibited words if they do not exist per ACE 6.7 Lexicon Specification +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'null') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'null' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'zero') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'zero' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'one') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'one' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'two') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'two' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'three') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'three' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'four') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'four' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'five') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'five' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'six') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'six' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'seven') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'seven' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'eight') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'eight' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'nine') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'nine' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'ten') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'ten' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'eleven') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'eleven' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'twelve') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'twelve' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'dozen') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'dozen' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'there') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'there' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'and') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'and' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'or') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'or' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'not') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'not' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'that') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'that' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'than') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'than' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'of') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'of' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'if') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'if' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'then') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'then' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'such') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'such' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'be') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'be' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'provably') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'provably' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'more') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'more' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'most') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'most' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'are') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'are' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'is') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'is' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'the') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'the' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'a') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'a' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'an') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'an' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'some') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'some' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'no') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'no' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'every') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'every' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'all') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'all' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'each') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'each' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'which') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'which' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'its') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'its' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'his') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'his' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'her') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'her' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'their') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'their' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'whose') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'whose' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'it') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'it' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'he') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'he' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'she') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'she' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'they') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'they' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'him') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'him' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'them') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'them' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'itself') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'itself' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'himself') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'himself' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'herself') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'herself' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'themselves') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'themselves' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'someone') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'someone' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'somebody') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'somebody' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'something') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'something' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'nobody') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'nobody' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'nothing') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'nothing' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'everyone') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'everyone' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'everybody') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'everybody' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'everything') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'everything' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'what') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'what' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'who') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'who' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'how') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'how' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'where') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'where' +); + +INSERT INTO stixd_corpus.prohibited_words (word) +SELECT * FROM (SELECT 'when') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prohibited_words WHERE word = 'when' +); + +-- Verify the data has been inserted correctly +-- SELECT * FROM stixd_corpus.prohibited_words; diff --git a/info579/sql/seeds/seed_prolog_constraints.sql b/info579/sql/seeds/seed_prolog_constraints.sql new file mode 100644 index 0000000..062aae4 --- /dev/null +++ b/info579/sql/seeds/seed_prolog_constraints.sql @@ -0,0 +1,33 @@ +-- Insert the constraints with auto_correct flag only if they do not exist per ACE 6.7 Lexicon Specification +INSERT INTO stixd_corpus.prolog_constraints (description, pattern, message, auto_correct) +SELECT * FROM (SELECT 'The word form must contain only lower and upper case letters (a-z, A-Z), digits (0-9), hyphens (-), underscores (_), dollar signs ($), and degree signs (°).', '^[a-zA-Z_$°][a-zA-Z0-9_$°-]*$', 'Invalid characters in word_form. Allowed characters are: a-z, A-Z, 0-9, -, _, $, °.', TRUE) AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prolog_constraints WHERE description = 'The word form must contain only lower and upper case letters (a-z, A-Z), digits (0-9), hyphens (-), underscores (_), dollar signs ($), and degree signs (°).' +); + +INSERT INTO stixd_corpus.prolog_constraints (description, pattern, message, auto_correct) +SELECT * FROM (SELECT 'The first character must not be a digit or a hyphen.', '^[^0-9-]', 'The first character must not be a digit or a hyphen.', TRUE) AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prolog_constraints WHERE description = 'The first character must not be a digit or a hyphen.' +); + +INSERT INTO stixd_corpus.prolog_constraints (description, pattern, message, auto_correct) +SELECT * FROM (SELECT 'Blank spaces are not allowed. Use hyphens instead of blank spaces.', '^.*[^ ].*$', 'Blank spaces are not allowed in word_form. Use hyphens instead.', TRUE) AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prolog_constraints WHERE description = 'Blank spaces are not allowed. Use hyphens instead of blank spaces.' +); + +INSERT INTO stixd_corpus.prolog_constraints (description, pattern, message, auto_correct) +SELECT * FROM (SELECT 'Symbols special for Prolog (e.g., apostrophe) must be escaped.', '', 'Symbols special for Prolog (e.g., apostrophe) must be escaped.', TRUE) AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prolog_constraints WHERE description = 'Symbols special for Prolog (e.g., apostrophe) must be escaped.' +); + +INSERT INTO stixd_corpus.prolog_constraints (description, pattern, message, auto_correct) +SELECT * FROM (SELECT 'Capitalized words (e.g., proper names) must be in quotes, otherwise they would be considered variables by Prolog.', '', 'Capitalized words (e.g., proper names) must be in quotes.', TRUE) AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.prolog_constraints WHERE description = 'Capitalized words (e.g., proper names) must be in quotes, otherwise they would be considered variables by Prolog.' +); + +-- Verify the data has been inserted correctly +-- SELECT * FROM stixd_corpus.prolog_constraints; diff --git a/info579/sql/seeds/seed_spec_chars.sql b/info579/sql/seeds/seed_spec_chars.sql new file mode 100644 index 0000000..0645b91 --- /dev/null +++ b/info579/sql/seeds/seed_spec_chars.sql @@ -0,0 +1,135 @@ +-- Insert the special characters and their escape sequences if they do not exist per ACE 6.7 Lexicon Specification +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '\\', '\\\\') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '\\' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '\'', '\'\'') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '\'' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '"', '\\"') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '"' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT ',', '\\,') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = ',' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '.', '\\.') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '.' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT ';', '\\;') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = ';' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT ':', '\\:') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = ':' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '!', '\\!') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '!' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '(', '\\(') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '(' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT ')', '\\)') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = ')' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '[', '\\[') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '[' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT ']', '\\]') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = ']' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '{', '\\{') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '{' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '}', '\\}') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '}' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '|', '\\|') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '|' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '@', '\\@') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '@' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '#', '\\#') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '#' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '&', '\\&') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '&' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '^', '\\^') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '^' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '*', '\\*') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '*' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '?', '\\?') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '?' +); + +INSERT INTO stixd_corpus.special_characters (spec_char, esc_seq) +SELECT * FROM (SELECT '/', '\\/') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.special_characters WHERE spec_char = '/' +); + +-- Verify the data has been inserted correctly +-- SELECT * FROM stixd_corpus.special_characters; diff --git a/info579/sql/seeds/seed_word_tags.sql b/info579/sql/seeds/seed_word_tags.sql new file mode 100644 index 0000000..2735a18 --- /dev/null +++ b/info579/sql/seeds/seed_word_tags.sql @@ -0,0 +1,165 @@ +-- Insert data into the word_tags table if it does not exist per ACE 6.7 Lexicon Specification +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'adv', 'PositiveForm', 'LogicalSymbol', NULL, 'Adverbs') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'adv' AND first_arg = 'PositiveForm' AND second_arg = 'LogicalSymbol' AND third_arg IS NULL AND word_class = 'Adverbs' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'adv_comp', 'ComparativeForm', 'LogicalSymbol', NULL, 'Adverbs') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'adv_comp' AND first_arg = 'ComparativeForm' AND second_arg = 'LogicalSymbol' AND third_arg IS NULL AND word_class = 'Adverbs' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'adv_sup', 'SuperlativeForm', 'LogicalSymbol', NULL, 'Adverbs') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'adv_sup' AND first_arg = 'SuperlativeForm' AND second_arg = 'LogicalSymbol' AND third_arg IS NULL AND word_class = 'Adverbs' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'adj_itr', 'PositiveForm', 'LogicalSymbol', NULL, 'Intransitive adjectives') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'adj_itr' AND first_arg = 'PositiveForm' AND second_arg = 'LogicalSymbol' AND third_arg IS NULL AND word_class = 'Intransitive adjectives' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'adj_itr_comp', 'ComparativeForm', 'LogicalSymbol', NULL, 'Intransitive adjectives') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'adj_itr_comp' AND first_arg = 'ComparativeForm' AND second_arg = 'LogicalSymbol' AND third_arg IS NULL AND word_class = 'Intransitive adjectives' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'adj_itr_sup', 'SuperlativeForm', 'LogicalSymbol', NULL, 'Intransitive adjectives') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'adj_itr_sup' AND first_arg = 'SuperlativeForm' AND second_arg = 'LogicalSymbol' AND third_arg IS NULL AND word_class = 'Intransitive adjectives' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'adj_tr', 'PositiveForm', 'LogicalSymbol', 'Preposition', 'Transitive adjectives') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'adj_tr' AND first_arg = 'PositiveForm' AND second_arg = 'LogicalSymbol' AND third_arg = 'Preposition' AND word_class = 'Transitive adjectives' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'adj_tr_comp', 'ComparativeForm', 'LogicalSymbol', 'Preposition', 'Transitive adjectives') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'adj_tr_comp' AND first_arg = 'ComparativeForm' AND second_arg = 'LogicalSymbol' AND third_arg = 'Preposition' AND word_class = 'Transitive adjectives' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'adj_tr_sup', 'SuperlativeForm', 'LogicalSymbol', 'Preposition', 'Transitive adjectives') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'adj_tr_sup' AND first_arg = 'SuperlativeForm' AND second_arg = 'LogicalSymbol' AND third_arg = 'Preposition' AND word_class = 'Transitive adjectives' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'noun_sg', 'SingularForm', 'LogicalSymbol', 'Gender', 'Countable nouns') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'noun_sg' AND first_arg = 'SingularForm' AND second_arg = 'LogicalSymbol' AND third_arg = 'Gender' AND word_class = 'Countable nouns' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'noun_pl', 'PluralForm', 'LogicalSymbol', 'Gender', 'Countable nouns') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'noun_pl' AND first_arg = 'PluralForm' AND second_arg = 'LogicalSymbol' AND third_arg = 'Gender' AND word_class = 'Countable nouns' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'noun_mass', 'WordForm', 'LogicalSymbol', 'Gender', 'Mass nouns') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'noun_mass' AND first_arg = 'WordForm' AND second_arg = 'LogicalSymbol' AND third_arg = 'Gender' AND word_class = 'Mass nouns' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'mn_sg', 'SingularForm', 'LogicalSymbol', NULL, 'Measurement nouns') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'mn_sg' AND first_arg = 'SingularForm' AND second_arg = 'LogicalSymbol' AND third_arg IS NULL AND word_class = 'Measurement nouns' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'mn_pl', 'PluralForm', 'LogicalSymbol', NULL, 'Measurement nouns') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'mn_pl' AND first_arg = 'PluralForm' AND second_arg = 'LogicalSymbol' AND third_arg IS NULL AND word_class = 'Measurement nouns' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'pn_sg', 'WordForm', 'LogicalSymbol', 'Gender', 'Proper names') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'pn_sg' AND first_arg = 'WordForm' AND second_arg = 'LogicalSymbol' AND third_arg = 'Gender' AND word_class = 'Proper names' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'pn_pl', 'WordForm', 'LogicalSymbol', 'Gender', 'Proper names') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'pn_pl' AND first_arg = 'WordForm' AND second_arg = 'LogicalSymbol' AND third_arg = 'Gender' AND word_class = 'Proper names' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'pndef_sg', 'WordForm', 'LogicalSymbol', 'Gender', 'Proper names') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'pndef_sg' AND first_arg = 'WordForm' AND second_arg = 'LogicalSymbol' AND third_arg = 'Gender' AND word_class = 'Proper names' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'pndef_pl', 'WordForm', 'LogicalSymbol', 'Gender', 'Proper names') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'pndef_pl' AND first_arg = 'WordForm' AND second_arg = 'LogicalSymbol' AND third_arg = 'Gender' AND word_class = 'Proper names' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'iv_finsg', 'ThirdSgForm', 'LogicalSymbol', NULL, 'Intransitive verbs') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'iv_finsg' AND first_arg = 'ThirdSgForm' AND second_arg = 'LogicalSymbol' AND third_arg IS NULL AND word_class = 'Intransitive verbs' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'iv_infpl', 'InfForm', 'LogicalSymbol', NULL, 'Intransitive verbs') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'iv_infpl' AND first_arg = 'InfForm' AND second_arg = 'LogicalSymbol' AND third_arg IS NULL AND word_class = 'Intransitive verbs' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'tv_finsg', 'ThirdSgForm', 'LogicalSymbol', NULL, 'Transitive verbs') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'tv_finsg' AND first_arg = 'ThirdSgForm' AND second_arg = 'LogicalSymbol' AND third_arg IS NULL AND word_class = 'Transitive verbs' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'tv_infpl', 'InfForm', 'LogicalSymbol', NULL, 'Transitive verbs') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'tv_infpl' AND first_arg = 'InfForm' AND second_arg = 'LogicalSymbol' AND third_arg IS NULL AND word_class = 'Transitive verbs' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'tv_pp', 'PastPartForm', 'LogicalSymbol', NULL, 'Transitive verbs') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'tv_pp' AND first_arg = 'PastPartForm' AND second_arg = 'LogicalSymbol' AND third_arg IS NULL AND word_class = 'Transitive verbs' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'dv_finsg', 'ThirdSgForm', 'LogicalSymbol', 'Preposition', 'Ditransitive verbs') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'dv_finsg' AND first_arg = 'ThirdSgForm' AND second_arg = 'LogicalSymbol' AND third_arg = 'Preposition' AND word_class = 'Ditransitive verbs' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'dv_infpl', 'InfForm', 'LogicalSymbol', 'Preposition', 'Ditransitive verbs') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'dv_infpl' AND first_arg = 'InfForm' AND second_arg = 'LogicalSymbol' AND third_arg = 'Preposition' AND word_class = 'Ditransitive verbs' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'dv_pp', 'PastPartForm', 'LogicalSymbol', 'Preposition', 'Ditransitive verbs') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'dv_pp' AND first_arg = 'PastPartForm' AND second_arg = 'LogicalSymbol' AND third_arg = 'Preposition' AND word_class = 'Ditransitive verbs' +); + +INSERT INTO stixd_corpus.word_tags (word_tag, first_arg, second_arg, third_arg, word_class) +SELECT * FROM (SELECT 'prep', 'WordForm', 'LogicalSymbol', NULL, 'Prepositions') AS tmp +WHERE NOT EXISTS ( + SELECT 1 FROM stixd_corpus.word_tags WHERE word_tag = 'prep' AND first_arg = 'WordForm' AND second_arg = 'LogicalSymbol' AND third_arg IS NULL AND word_class = 'Prepositions' +); + +-- Verify the data has been inserted correctly +-- SELECT * FROM stixd_corpus.word_tags; diff --git a/info579/sql/setup/00_create_database.sql b/info579/sql/setup/create_database.sql similarity index 100% rename from info579/sql/setup/00_create_database.sql rename to info579/sql/setup/create_database.sql diff --git a/info579/sql/triggers/11_trg_lexicon_preposition_gender.sql b/info579/sql/triggers/11_trg_lexicon_preposition_gender.sql deleted file mode 100644 index ad19299..0000000 --- a/info579/sql/triggers/11_trg_lexicon_preposition_gender.sql +++ /dev/null @@ -1,14 +0,0 @@ --- Create a trigger to enforce the mutual exclusivity of preposition and gender --- DROP TRIGGER IF EXISTS stixd_corpus.trg_lexicon_preposition_gender -DELIMITER // -CREATE TRIGGER stixd_corpus.trg_lexicon_preposition_gender -BEFORE INSERT ON stixd_corpus.lexicon -FOR EACH ROW -BEGIN - IF (NEW.preposition IS NOT NULL AND NEW.gender IS NOT NULL) THEN - SIGNAL SQLSTATE '45000' - SET MESSAGE_TEXT = 'preposition and gender cannot both be NOT NULL'; - END IF; -END; -// -DELIMITER ; \ No newline at end of file diff --git a/info579/sql/triggers/12_trg_lexicon_preposition_check.sql b/info579/sql/triggers/12_trg_lexicon_preposition_check.sql deleted file mode 100644 index 0877ce0..0000000 --- a/info579/sql/triggers/12_trg_lexicon_preposition_check.sql +++ /dev/null @@ -1,21 +0,0 @@ --- Create the trigger to enforce preposition must be from an entry with word_class = prep --- DROP TRIGGER IF EXISTS stixd_corpus.trg_lexicon_preposition_check; -DELIMITER // -CREATE TRIGGER stixd_corpus.trg_lexicon_preposition_check -BEFORE INSERT ON stixd_corpus.lexicon -FOR EACH ROW -BEGIN - DECLARE word_class_check VARCHAR(31); - IF (NEW.preposition IS NOT NULL) THEN - SELECT word_class INTO word_class_check - FROM stixd_corpus.lexicon - WHERE lex_id = NEW.preposition; - - IF (word_class_check NOT LIKE 'prep%') THEN - SIGNAL SQLSTATE '45000' - SET MESSAGE_TEXT = 'preposition must reference an entry with word_class starting with "prep"'; - END IF; - END IF; -END; -// -DELIMITER ; \ No newline at end of file diff --git a/info579/sql/triggers/16_trg_lexicon_prohibited_intersections.sql b/info579/sql/triggers/13_trg_check_prohib_intersections.sql similarity index 92% rename from info579/sql/triggers/16_trg_lexicon_prohibited_intersections.sql rename to info579/sql/triggers/13_trg_check_prohib_intersections.sql index 455fe62..3e699cc 100644 --- a/info579/sql/triggers/16_trg_lexicon_prohibited_intersections.sql +++ b/info579/sql/triggers/13_trg_check_prohib_intersections.sql @@ -1,4 +1,4 @@ --- Create the trigger to check for prohibited intersections (stix_corpus_17.sql) +-- Create the trigger to check for prohibited word_class intersections of word_class and preposition -- DROP TRIGGER IF EXISTS stixd_corpus.trg_lexicon_prohibited_intersections; DELIMITER // CREATE TRIGGER stixd_corpus.trg_lexicon_prohibited_intersections diff --git a/info579/sql/triggers/13_trg_lexicon_word_class_prep.sql b/info579/sql/triggers/13_trg_lexicon_word_class_prep.sql deleted file mode 100644 index 9f8f3ec..0000000 --- a/info579/sql/triggers/13_trg_lexicon_word_class_prep.sql +++ /dev/null @@ -1,16 +0,0 @@ --- Create trigger to ensure preposition is not null if word_class starts with adj_tr or dv_ --- DROP TRIGGER IF EXISTS stixd_corpus.trg_lexicon_word_class_preposition -DELIMITER // -CREATE TRIGGER stixd_corpus.trg_lexicon_word_class_preposition -BEFORE INSERT ON stixd_corpus.lexicon -FOR EACH ROW -BEGIN - IF (NEW.word_class LIKE 'adj_tr%' OR NEW.word_class LIKE 'dv_%') THEN - IF (NEW.preposition IS NULL) THEN - SIGNAL SQLSTATE '45000' - SET MESSAGE_TEXT = 'preposition must be NOT NULL if word_class starts with "adj_tr" or "dv_"'; - END IF; - END IF; -END; -// -DELIMITER ; \ No newline at end of file diff --git a/info579/sql/triggers/14_trg_lexicon_word_class_gender.sql b/info579/sql/triggers/14_trg_lexicon_word_class_gender.sql deleted file mode 100644 index 00b0ddd..0000000 --- a/info579/sql/triggers/14_trg_lexicon_word_class_gender.sql +++ /dev/null @@ -1,16 +0,0 @@ --- Create trigger to ensure gender is not null if word_class starts with noun_ or pn --- DROP TRIGGER IF EXISTS stixd_corpus.trg_lexicon_word_class_gender -DELIMITER // -CREATE TRIGGER stixd_corpus.trg_lexicon_word_class_gender -BEFORE INSERT ON stixd_corpus.lexicon -FOR EACH ROW -BEGIN - IF (NEW.word_class LIKE 'noun_%' OR NEW.word_class LIKE 'pn%') THEN - IF (NEW.gender IS NULL) THEN - SIGNAL SQLSTATE '45000' - SET MESSAGE_TEXT = 'gender must be NOT NULL if word_class starts with "noun_" or "pn"'; - END IF; - END IF; -END; -// -DELIMITER ; \ No newline at end of file diff --git a/info579/sql/triggers/15_trg_lexicon_prohibited_words.sql b/info579/sql/triggers/15_trg_lexicon_prohibited_words.sql deleted file mode 100644 index d2bfe54..0000000 --- a/info579/sql/triggers/15_trg_lexicon_prohibited_words.sql +++ /dev/null @@ -1,20 +0,0 @@ --- Create the trigger to check for prohibited words in the base_form --- DROP TRIGGER IF EXISTS stixd_corpus.trg_lexicon_prohibited_words; -DELIMITER // -CREATE TRIGGER stixd_corpus.trg_lexicon_prohibited_words -BEFORE INSERT ON stixd_corpus.lexicon -FOR EACH ROW -BEGIN - DECLARE prohibited_word_found INT DEFAULT 0; - SELECT COUNT(*) - INTO prohibited_word_found - FROM stixd_corpus.prohibited_words - WHERE word = NEW.base_form; - - IF prohibited_word_found > 0 THEN - SIGNAL SQLSTATE '45000' - SET MESSAGE_TEXT = 'base_form contains a prohibited word'; - END IF; -END; -// -DELIMITER ; \ No newline at end of file diff --git a/info579/sql/triggers/18_trg_lexicon_prolog_constraints.sql b/info579/sql/triggers/18_trg_lexicon_prolog_constraints.sql deleted file mode 100644 index c9dbe36..0000000 --- a/info579/sql/triggers/18_trg_lexicon_prolog_constraints.sql +++ /dev/null @@ -1,11 +0,0 @@ --- Create the trigger to check for prolog constraints --- DROP TRIGGER IF EXISTS stixd_corpus.trg_lexicon_prolog_constraints; -DELIMITER // -CREATE TRIGGER stixd_corpus.trg_lexicon_prolog_constraints -BEFORE INSERT ON stixd_corpus.lexicon -FOR EACH ROW -BEGIN - CALL stixd_corpus.check_prolog_constraints(NEW.base_form); -END; -// -DELIMITER ; diff --git a/info579/sql/triggers/master_triggers_lexicon.sql b/info579/sql/triggers/master_triggers_lexicon.sql new file mode 100644 index 0000000..01ef6ee --- /dev/null +++ b/info579/sql/triggers/master_triggers_lexicon.sql @@ -0,0 +1,18 @@ +-- Create master triggers for the lexicon table +DELIMITER // + +CREATE TRIGGER stixd_corpus.master_trigger_before_insert +BEFORE INSERT ON stixd_corpus.lexicon +FOR EACH ROW +BEGIN + CALL stixd_corpus.sp_check_lexicon_constraints(NEW.base_form, NEW.word_class, NEW.preposition, NEW.gender); +END// + +CREATE TRIGGER stixd_corpus.master_trigger_before_update +BEFORE UPDATE ON stixd_corpus.lexicon +FOR EACH ROW +BEGIN + CALL stixd_corpus.sp_check_lexicon_constraints(NEW.base_form, NEW.word_class, NEW.preposition, NEW.gender); +END// + +DELIMITER ; \ No newline at end of file