-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path5_build_cdm_pk_idx_fk.py
142 lines (135 loc) · 6.08 KB
/
5_build_cdm_pk_idx_fk.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
import time
import sys
import glob
from importlib.machinery import SourceFileLoader
mapping_util = SourceFileLoader('mapping_util', os.path.dirname(os.path.realpath(__file__)) + '/mapping_util.py').load_module()
# ---------------------------------------------------------
def build_fk(dir_code):
# Build FK in parallel when possible
# ---------------------------------------------------------
ret = True
try:
plist = []
plist.append(dir_code + "5b_cdm_fk_care_site__concept.sql")
plist.append(dir_code + "5b_cdm_fk_person__provider.sql")
ret = mapping_util.execute_sql_files_parallel(db_conf, plist, True)
if ret == True:
plist.clear()
plist.append(dir_code + "5b_cdm_fk_care_site__location.sql")
plist.append(dir_code + "5b_cdm_fk_provider__concept.sql")
plist.append(dir_code + "5b_cdm_fk_observation_period__person.sql")
ret = mapping_util.execute_sql_files_parallel(db_conf, plist, True)
if ret == True:
plist.clear()
plist.append(dir_code + "5b_cdm_fk_person__location.sql")
plist.append(dir_code + "5b_cdm_fk_provider__care_site.sql")
plist.append(dir_code + "5b_cdm_fk_observation_period__concept.sql")
ret = mapping_util.execute_sql_files_parallel(db_conf, plist, True)
if ret == True:
plist.clear()
plist.append(dir_code + "5b_cdm_fk_person__care_site.sql")
plist.append(dir_code + "5b_cdm_fk_death__concept.sql")
ret = mapping_util.execute_sql_files_parallel(db_conf, plist, True)
if ret == True:
plist.clear()
plist.append(dir_code + "5b_cdm_fk_visit_occurrence__care_site.sql")
plist.append(dir_code + "5b_cdm_fk_death__person.sql")
plist.append(dir_code + "5b_cdm_fk_metadata__concept.sql")
ret = mapping_util.execute_sql_files_parallel(db_conf, plist, True)
if ret == True:
plist.clear()
plist.append(dir_code + "5b_cdm_fk_person__concept.sql")
plist.append(dir_code + "5b_cdm_fk_visit_detail__care_site.sql")
ret = mapping_util.execute_sql_files_parallel(db_conf, plist, True)
if ret == True:
plist.clear()
plist.append(dir_code + "5b_cdm_fk_specimen__person.sql")
plist.append(dir_code + "5b_cdm_fk_dose_era__concept.sql")
ret = mapping_util.execute_sql_files_parallel(db_conf, plist, True)
if ret == True:
plist.clear()
plist.append(dir_code + "5b_cdm_fk_dose_era__person.sql")
plist.append(dir_code + "5b_cdm_fk_specimen__concept.sql")
ret = mapping_util.execute_sql_files_parallel(db_conf, plist, True)
if ret == True:
sql_file_list1 = sorted(glob.iglob(dir_code + '5b_cdm_fk_*.sql'))
sql_file_list2 = sorted(glob.iglob(dir_code + '5b' + db_conf['cdm_version'][2] + '_cdm_fk_*.sql'))
list1 = ['condition_occurrence','device_exposure','drug_exposure','measurement','note','observation','procedure_occurrence','visit_detail','visit_occurrence']
list2 = ['concept','person','provider','visit_detail','visit_occurrence']
for i in range(len(list1)):
plist.clear()
for j in range(len(list2)):
fname = dir_code + '5b_cdm_fk_' + list1[j] + '__' + list2[j] + '.sql'
if fname in sql_file_list1:
plist.append(fname)
else:
fname = dir_code + '5b' + db_conf['cdm_version'][2] + '_cdm_fk_' + list1[j] + '__' + list2[j] + '.sql'
if fname in sql_file_list2:
plist.append(fname)
if plist != []:
ret = mapping_util.execute_sql_files_parallel(db_conf, plist, True)
if ret == False:
break
plist.clear()
list1.append(list1.pop(0))
if ret == True and db_conf['cdm_version'][:3] == '5.4':
fname = dir_code + "5c4_cdm_fk.sql"
ret = mapping_util.execute_multiple_queries(db_conf, fname, None, None, True, True)
except:
ret = False
err = sys.exc_info()
print("Function = {0}, Error = {1}, {2}".format("build_fk", err[0], err[1]))
return(ret)
# ---------------------------------------------------------
# MAIN PROGRAM
# ---------------------------------------------------------
def main():
ret = True
global db_conf
try:
(ret, dir_study, db_conf, debug) = mapping_util.get_parameters()
if ret == True and dir_study != '':
time0 = time.time()
dir_sql = os.getcwd() + "\\sql_scripts\\"
processed_folder = dir_sql + "processed\\"
if not os.path.exists(processed_folder):
os.makedirs(processed_folder)
# ---------------------------------------------------------
# Build PKs & IDXs
# ---------------------------------------------------------
qa = input('Are you sure you want to CREATE PK/IDX on all cdm tables (y/n):')
while qa.lower() not in ['y', 'n', 'yes', 'no']:
qa = input('I did not understand that. Are you sure you want to CREATE PK/IDX on all cdm tables (y/n):')
if qa.lower() in ['y', 'yes']:
print('Build PKs and IDXs ...')
sql_file_list = sorted(glob.iglob(dir_sql + '5a_cdm_pk_idx_*.sql'))
sql_file_list.extend(sorted(glob.iglob(dir_sql + '5a' + db_conf['cdm_version'][2] + '_cdm_pk_idx_*.sql')))
if db_conf['source_schema'] == '': # For data provided already mapped or data being merged
sql_file_list.extend(sorted(glob.iglob(dir_sql + '5a__cdm_pk_idx_*.sql')))
ret = mapping_util.execute_sql_files_parallel(db_conf, sql_file_list, True)
# ---------------------------------------------------------
# Build FK
# ---------------------------------------------------------
if ret == True:
qa = input('Are you sure you want to CREATE FK on all cdm tables (y/n):')
while qa.lower() not in ['y', 'n', 'yes', 'no']:
qa = input('I did not understand that. Are you sure you want to CREATE FK on all cdm tables (y/n):')
if qa.lower() in ['y', 'yes']:
print('Build FKs ...')
ret = build_fk(dir_sql)
if ret == True:
print('Finished building FK')
# ---------------------------------------------------------
# Report total time
# ---------------------------------------------------------
if ret == True:
process_finished = "{0} completed in {1}".format(os.path.basename(__file__), mapping_util.calc_time(time.time() - time0))
print(process_finished)
except:
print(str(sys.exc_info()[1]))
# ---------------------------------------------------------
# Protect entry point
# ---------------------------------------------------------
if __name__ == "__main__":
main()