Skip to content

Commit 1e2e4ed

Browse files
authored
Merge pull request #30 from oxford-pharmacoepi/anto_server1
Anto server1
2 parents cae589c + bb8f9ee commit 1e2e4ed

12 files changed

+70
-13
lines changed

8_load_source_denominator.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def main():
3131
while qa.lower() not in ['y', 'n', 'yes', 'no']:
3232
qa = input('I did not understand that. Are you sure you want to DROP the ' + database_type.upper() + ' denominators tables (y/n):')
3333
if qa.lower() in ['y', 'yes']:
34-
fname = dir_sql + '7a_' + database_type + '_denom_drop.sql'
34+
fname = dir_sql + '8a_' + database_type + '_denom_drop.sql'
3535
print('Calling ' + fname + ' ...')
3636
ret = mapping_util.execute_sql_file_parallel(db_conf, fname, False)
3737
if ret == True:
@@ -45,7 +45,7 @@ def main():
4545
# ---------------------------------------------------------
4646
# Create denominators tables
4747
# ---------------------------------------------------------
48-
fname = dir_sql + '7b_' + database_type + '_denom_create.sql'
48+
fname = dir_sql + '8b_' + database_type + '_denom_create.sql'
4949
print('Calling ' + fname + ' ...')
5050
ret = mapping_util.execute_sql_file_parallel(db_conf, fname, False)
5151
# ---------------------------------------------------------
@@ -72,7 +72,7 @@ def main():
7272
# Build PKs & IDXs
7373
# ---------------------------------------------------------
7474
print('Build PKs and IDXs ...')
75-
sql_file_list = sorted(glob.iglob(dir_sql + '7c_' + database_type + '_denom_pk_idx_*.sql'))
75+
sql_file_list = sorted(glob.iglob(dir_sql + '8c_' + database_type + '_denom_pk_idx_*.sql'))
7676
ret = mapping_util.execute_sql_files_parallel(db_conf, sql_file_list, True)
7777
if ret == True:
7878
print('Finished adding ' + database_type.upper() + ' PKs/indexes')

9_load_achilles_dqd.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ def main():
5656
if ret == True:
5757
data_provider = db_conf['data_provider']
5858
prefix = ''
59+
with_quotes = False
60+
null_string = ''
5961
if data_provider == 'cprd':
6062
extension = '.txt'
6163
separator = ' '
@@ -65,6 +67,8 @@ def main():
6567
elif data_provider == 'thin':
6668
extension = '.csv'
6769
separator = ','
70+
with_quotes = True
71+
null_string = 'NA'
6872
elif data_provider == 'ukbiobank':
6973
extension = '.tsv'
7074
separator = ' '
@@ -73,7 +77,7 @@ def main():
7377
print(tbl_list_full)
7478
file_list = [[dir_data + '*' + tbl + '*' + extension] for tbl in tbl_list]
7579
print(file_list)
76-
ret = mapping_util.load_files_parallel(db_conf, result_schema, tbl_list, file_list, dir_data_processed, separator)
80+
ret = mapping_util.load_files_parallel(db_conf, result_schema, tbl_list, file_list, dir_data_processed, separator, with_quotes, null_string)
7781
if ret == True:
7882
print('Finished loading cdm vocabulary.')
7983
# ---------------------------------------------------------

mapping_util.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def does_tbl_exist(cnx, tbl_name):
158158
return(ret, exist)
159159

160160
# ---------------------------------------------------------
161-
def load_files(db_conf, schema, tbl_name, file_list, dir_processed, separator, with_quotes):
161+
def load_files(db_conf, schema, tbl_name, file_list, dir_processed, separator, with_quotes, null_string):
162162
"Load files into tables"
163163
# ---------------------------------------------------------
164164
ret = True
@@ -195,15 +195,17 @@ def load_files(db_conf, schema, tbl_name, file_list, dir_processed, separator, w
195195
stream = StringIO()
196196
if data_provider == 'ukbiobank':
197197
stream.write(open(fname, encoding='cp1252', errors = 'ignore').read().replace('\\', ''))
198+
# elif data_provider == 'thin':
199+
# stream.write(open(fname, errors = 'ignore').read().replace('\\', '').replace(',NA,', ',,'))
198200
else:
199201
stream.write(open(fname, errors = 'ignore').read().replace('\\', ''))
200202
# stream.write(open(fname, errors = 'ignore').read().replace('\\', '').replace('\u0000', ''))
201203
stream.seek(0)
202204
stream.readline() #To avoid headers
203205
if with_quotes == False:
204-
cursor1.copy_from(stream, tbl_name, sep = separator, null = '')
206+
cursor1.copy_from(stream, tbl_name, sep = separator, null = null_string)
205207
else:
206-
cursor1.copy_expert("COPY " + tbl_name + " FROM STDIN WITH (FORMAT CSV, delimiter '" + separator + "', quote '\"')", stream)
208+
cursor1.copy_expert("COPY " + tbl_name + " FROM STDIN WITH (FORMAT CSV, delimiter '" + separator + "', quote '\"', NULL '" + null_string + "')", stream)
207209
# ---------------------------------------------------------
208210
# Move loaded file to PROCESSED directory
209211
# ---------------------------------------------------------
@@ -222,7 +224,7 @@ def load_files(db_conf, schema, tbl_name, file_list, dir_processed, separator, w
222224
return(ret)
223225

224226
# ---------------------------------------------------------
225-
def load_files_parallel(db_conf, schema, tbl_list, file_list, dir_processed, separator = ' ', with_quotes = False):
227+
def load_files_parallel(db_conf, schema, tbl_list, file_list, dir_processed, separator = ' ', with_quotes = False, null_string = ''):
226228
"Load files into tables"
227229
# ---------------------------------------------------------
228230
ret = True
@@ -234,7 +236,7 @@ def load_files_parallel(db_conf, schema, tbl_list, file_list, dir_processed, sep
234236
# Load files in parallel (all tables), sequentially within each table
235237
# ---------------------------------------------------------
236238
with ProcessPoolExecutor(int(db_conf['max_workers'])) as executor:
237-
futures = [executor.submit(load_files, db_conf, schema, tbl_name, file_list[idx], dir_processed, separator, with_quotes) for idx, tbl_name in enumerate(tbl_list)]
239+
futures = [executor.submit(load_files, db_conf, schema, tbl_name, file_list[idx], dir_processed, separator, with_quotes, null_string) for idx, tbl_name in enumerate(tbl_list)]
238240
for future in as_completed(futures):
239241
if future.result() == False:
240242
ret = False

sql_scripts/7c_aurum_denom_pk_idx_practice.sql

-2
This file was deleted.
File renamed without changes.

sql_scripts/8a_gold_denom_drop.sql

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
drop table if exists {SOURCE_SCHEMA}.gold_acceptable_pats CASCADE;
2+
drop table if exists {SOURCE_SCHEMA}.gold_allpractices CASCADE;
3+
drop table if exists {SOURCE_SCHEMA}.gold_eligibility CASCADE;

sql_scripts/7b_aurum_denom_create.sql sql_scripts/8b_aurum_denom_create.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.aurum_acceptablepats (
1818
uts date,
1919
lcd date,
2020
region int
21-
);
21+
) TABLESPACE pg_default;
2222

2323
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.aurum_practices (
2424
pracid int,
2525
lcd date,
2626
uts date,
2727
region int
28-
);
28+
) TABLESPACE pg_default;

sql_scripts/8b_gold_denom_create.sql

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
CREATE SCHEMA IF NOT EXISTS {SOURCE_SCHEMA};
2+
CREATE SCHEMA IF NOT EXISTS {SOURCE_NOK_SCHEMA};
3+
CREATE SCHEMA IF NOT EXISTS temp;
4+
CREATE SCHEMA IF NOT EXISTS results;
5+
6+
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.gold_acceptable_pats (
7+
patid bigint,
8+
gender char(1),
9+
yob int,
10+
mob int,
11+
frd date,
12+
crd date,
13+
regstat int DEFAULT NULL,
14+
reggap int DEFAULT NULL,
15+
internal smallint DEFAULT NULL,
16+
tod date DEFAULT NULL,
17+
toreason smallint DEFAULT NULL,
18+
deathdate date
19+
) TABLESPACE pg_default;
20+
21+
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.gold_allpractices (
22+
pracid int,
23+
region int,
24+
lcd date,
25+
uts date
26+
) TABLESPACE pg_default;
27+
28+
patid pracid linkdate hes_apc_e ons_death_e lsoa_e sgss_e chess_e hes_op_e hes_ae_e hes_did_e cr_e sact_e rtds_e mhds_e icnarc_e
29+
30+
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.gold_eligibility (
31+
patid bigint,
32+
pracid integer,
33+
linkdare date,
34+
hes_apc_e smallint,
35+
ons_death_e smallint,
36+
lsoa_e smallint,
37+
sgss_e smallint,
38+
chess_e smallint,
39+
hes_op_e smallint,
40+
hes_ae_e smallint,
41+
hes_did_e smallint,
42+
cr_e smallint,
43+
sact_e smallint,
44+
rtds_e smallint,
45+
mhds_e smallint,
46+
icnarc_e smallint
47+
) TABLESPACE pg_default;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
alter table {SOURCE_SCHEMA}.aurum_practices add constraint pk_denom_prac primary key (pracid) USING INDEX TABLESPACE pg_default;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
alter table {SOURCE_SCHEMA}.gold_acceptable_pats add constraint pk_accept_pat primary key (patid, gender, yob) USING INDEX TABLESPACE pg_default;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
alter table {SOURCE_SCHEMA}.gold_allpractices add constraint pk_denom_prac primary key (pracid) USING INDEX TABLESPACE pg_default;

0 commit comments

Comments
 (0)