@@ -158,7 +158,7 @@ def does_tbl_exist(cnx, tbl_name):
158
158
return (ret , exist )
159
159
160
160
# ---------------------------------------------------------
161
- def load_files (db_conf , schema , tbl_name , file_list , dir_processed , separator , with_quotes ):
161
+ def load_files (db_conf , schema , tbl_name , file_list , dir_processed , separator , with_quotes , null_string ):
162
162
"Load files into tables"
163
163
# ---------------------------------------------------------
164
164
ret = True
@@ -195,15 +195,17 @@ def load_files(db_conf, schema, tbl_name, file_list, dir_processed, separator, w
195
195
stream = StringIO ()
196
196
if data_provider == 'ukbiobank' :
197
197
stream .write (open (fname , encoding = 'cp1252' , errors = 'ignore' ).read ().replace ('\\ ' , '' ))
198
+ # elif data_provider == 'thin':
199
+ # stream.write(open(fname, errors = 'ignore').read().replace('\\', '').replace(',NA,', ',,'))
198
200
else :
199
201
stream .write (open (fname , errors = 'ignore' ).read ().replace ('\\ ' , '' ))
200
202
# stream.write(open(fname, errors = 'ignore').read().replace('\\', '').replace('\u0000', ''))
201
203
stream .seek (0 )
202
204
stream .readline () #To avoid headers
203
205
if with_quotes == False :
204
- cursor1 .copy_from (stream , tbl_name , sep = separator , null = '' )
206
+ cursor1 .copy_from (stream , tbl_name , sep = separator , null = null_string )
205
207
else :
206
- cursor1 .copy_expert ("COPY " + tbl_name + " FROM STDIN WITH (FORMAT CSV, delimiter '" + separator + "', quote '\" ')" , stream )
208
+ cursor1 .copy_expert ("COPY " + tbl_name + " FROM STDIN WITH (FORMAT CSV, delimiter '" + separator + "', quote '\" ', NULL '" + null_string + "' )" , stream )
207
209
# ---------------------------------------------------------
208
210
# Move loaded file to PROCESSED directory
209
211
# ---------------------------------------------------------
@@ -222,7 +224,7 @@ def load_files(db_conf, schema, tbl_name, file_list, dir_processed, separator, w
222
224
return (ret )
223
225
224
226
# ---------------------------------------------------------
225
- def load_files_parallel (db_conf , schema , tbl_list , file_list , dir_processed , separator = ' ' , with_quotes = False ):
227
+ def load_files_parallel (db_conf , schema , tbl_list , file_list , dir_processed , separator = ' ' , with_quotes = False , null_string = '' ):
226
228
"Load files into tables"
227
229
# ---------------------------------------------------------
228
230
ret = True
@@ -234,7 +236,7 @@ def load_files_parallel(db_conf, schema, tbl_list, file_list, dir_processed, sep
234
236
# Load files in parallel (all tables), sequentially within each table
235
237
# ---------------------------------------------------------
236
238
with ProcessPoolExecutor (int (db_conf ['max_workers' ])) as executor :
237
- futures = [executor .submit (load_files , db_conf , schema , tbl_name , file_list [idx ], dir_processed , separator , with_quotes ) for idx , tbl_name in enumerate (tbl_list )]
239
+ futures = [executor .submit (load_files , db_conf , schema , tbl_name , file_list [idx ], dir_processed , separator , with_quotes , null_string ) for idx , tbl_name in enumerate (tbl_list )]
238
240
for future in as_completed (futures ):
239
241
if future .result () == False :
240
242
ret = False
0 commit comments