-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path0_manage_gold_files.py
112 lines (108 loc) · 4.11 KB
/
0_manage_gold_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#--------------------------------------------------------------
# Author = A. Delmestri
# Version = 1.0
# Manage GOLD flat files
# -------------------------------------------------------------
import subprocess
import sys
import time
import os
import glob
from importlib.machinery import SourceFileLoader
# ---------------------------------------------------------
mapping_util = SourceFileLoader('mapping_util', os.path.dirname(os.path.realpath(__file__)) + '/mapping_util.py').load_module()
# ---------------------------------------------------------
def sort_dirs(dir_study, dir_downloaded, dir_data):
"Create all the necessary directories"
# ---------------------------------------------------------
ret = True
file_list = []
try:
print("sort_dirs 1")
if not os.path.exists(dir_downloaded):
print("Directory {0} does not exist: no data were downloaded".format(dir_downloaded))
ret = False
else:
folder_processed = dir_downloaded + 'processed\\'
if not os.path.exists(folder_processed):
os.makedirs(folder_processed)
# ---------------------------------------------------------
# Create data directory if does not exist
# ---------------------------------------------------------
if not os.path.exists(dir_data):
os.makedirs(dir_data)
dir_list_data = [dir_data + tbl + "\\" for tbl in db_conf['tbl_gold']]
for folder in dir_list_data:
if not os.path.exists(folder):
os.makedirs(folder)
folder_processed = folder + 'processed\\'
if not os.path.exists(folder_processed):
os.makedirs(folder_processed)
# ---------------------------------------------------------
# Create other files directories
# ---------------------------------------------------------
dir_list_data = [dir_study + tbl + "\\" for tbl in db_conf['tbl_cprd']]
for folder in dir_list_data:
if not os.path.exists(folder):
os.makedirs(folder)
if folder in ('denominators', 'lookups'):
folder_processed = folder + 'processed\\'
if not os.path.exists(folder_processed):
os.makedirs(folder_processed)
print("sort_dirs 2")
except:
ret = False
err = sys.exc_info()
print("Function = {0}, Error = {1}, {2}".format("sort_dirs", err[0], err[1]))
return(ret)
# ---------------------------------------------------------
# MAIN PROGRAM
# ---------------------------------------------------------
def main():
ret = True
global db_conf
try:
# ---------------------------------------------------------
# Define directories
# ---------------------------------------------------------
(ret, dir_study, db_conf, debug) = mapping_util.get_parameters()
if ret == True and dir_study != '':
dir_study = dir_study + "\\"
dir_downloaded = dir_study + '_downloaded\\'
dir_data = dir_study + 'data\\'
# ---------------------------------------------------------
# Create all necessary folders
# ---------------------------------------------------------
ret = sort_dirs(dir_study, dir_downloaded, dir_data)
# ---------------------------------------------------------
# Unzip files in folders
# ---------------------------------------------------------
if ret == True:
# ---------------------------------------------------------
# 7zip command 'e' = Extract
# 7zip command 'x' = eXtract with full paths
file_list = sorted(glob.iglob(dir_downloaded + '*.7z'))
extraction_method = []
extraction_folder = []
for fname in file_list:
name = os.path.splitext(os.path.basename(fname))[0].lower()
if name in db_conf['tbl_gold']:
extraction_method.append('x')
extraction_folder.append(dir_data)
elif name in db_conf['tbl_cprd']:
extraction_method.append('x')
extraction_folder.append(dir_study)
else:
extraction_method.append('e')
for tbl in db_conf['tbl_gold']:
if name.startswith(tbl):
extraction_folder.append(dir_data + tbl)
break
ret = mapping_util.execute_unzip_parallel(file_list, extraction_method, extraction_folder)
except:
print(str(sys.exc_info()[1]))
# ---------------------------------------------------------
# Protect entry point
# ---------------------------------------------------------
if __name__ == "__main__":
main()