Skip to content

Commit 201c432

Browse files
authored
Merge pull request #130 from oxford-pharmacoepi/teen_dev
Teen dev
2 parents 0b404da + 7cd81e2 commit 201c432

27 files changed

+325
-384
lines changed

2_load_lookup.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ def main():
9292
try:
9393
(ret, dir_study, db_conf, debug) = mapping_util.get_parameters()
9494
if ret == True and dir_study != '':
95+
data_provider = db_conf['data_provider']
9596
database_type = db_conf['database_type']
9697
if database_type[:3].upper() == 'HES':
9798
database_type = 'hes'
@@ -135,7 +136,7 @@ def main():
135136
if ret == True:
136137
tbl_lookup = 'tbl_' + database_type + '_lookup'
137138
tbl_lookup_list = [tbl for tbl in db_conf[tbl_lookup]]
138-
if 'ukb' == database_type:
139+
if 'ukb' == data_provider:
139140
file_lookup_list = [[dir_lookup + '*' + tbl.replace("lookup", "coding") + '*.tsv'] for tbl in tbl_lookup_list]
140141
else:
141142
file_lookup_list = [[dir_lookup + '*' + tbl + '*.txt'] for tbl in tbl_lookup_list]

3_load_cdm_vocabulary.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ def main():
373373
data_provider = db_conf['data_provider']
374374
prefix = ''
375375
with_quotes = True
376-
if data_provider == 'cprd':
376+
if data_provider == 'cprd' or data_provider == 'ukb':
377377
extension = '.csv'
378378
separator = ' '
379379
with_quotes = False

__postgres_db_conf.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
'password': 'pwd',
44
'database': 'database_name',
55
'data_provider': 'name_of_provider', #e.g. 'cprd', 'thin', 'ukb'('ukbiobank' for the exisiting mapped one)
6-
'database_type': 'type of database', #e.g. 'gold, 'aurum', 'hesapc'
6+
'database_type': 'type of database', #e.g. 'gold, 'aurum', 'hesapc', 'ukb'(baseline), 'ukb_gp', 'ukb_hesin'
77
'source_release_date': 'date', #e.g. '2022-05-01'
88
'cdm_version': '5.3', #e.g. '5.3', '5.4'
99
'cdm_etl_reference': 'name of developer',
@@ -35,8 +35,10 @@
3535
'tbl_hesae': ['hesae_patient', 'hesae_attendance', 'hesae_diagnosis', 'hesae_investigation', 'hesae_treatment', 'hesae_hrg', 'hesae_pathway'],
3636
'tbl_ons': ['ons_death'],
3737
'tbl_ncras': ['tumour', 'treatment'],
38-
'tbl_ukb_lookup': ['lookup269', 'lookup270', 'lookup626', 'lookup10'],
39-
'tbl_ukb': ['baseline', 'death', 'death_cause', 'gp_clinical', 'gp_registrations', 'gp_scripts', 'hesin', 'hesin_critical', 'hesin_delivery', 'hesin_diag', 'hesin_maternity', 'hesin_oper', 'hesin_psych', 'cancer'],
38+
'tbl_ukb_gp_lookup': ['lookup626'],
39+
'tbl_ukb': ['baseline', 'death', 'death_cause', 'cancer'],
40+
'tbl_ukb_gp': ['baseline', 'death', 'death_cause', 'gp_clinical', 'gp_registrations', 'gp_scripts'],
41+
'tbl_ukb_hesin': ['baseline', 'death', 'death_cause', 'hesin', 'hesin_critical', 'hesin_delivery', 'hesin_diag', 'hesin_maternity', 'hesin_oper', 'hesin_psych'],
4042
'tbl_cprd': ['denominator', 'documentation', 'lookups', 'reference'],
4143
'tbl_cdm': ['care_site', 'condition_era', 'condition_occurrence', 'death', 'device_exposure', 'dose_era', 'drug_era', 'drug_exposure', 'location', 'measurement', 'observation', 'observation_period', 'person', 'procedure_occurrence', 'provider', 'visit_detail', 'visit_occurrence'],
4244
'tbl_cdm_voc': ['drug_strength', 'concept', 'concept_relationship', 'concept_ancestor', 'concept_synonym', 'vocabulary', 'relationship', 'concept_class', 'domain']

sql_scripts/1a_ukb_drop.sql

-15
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,4 @@
11
drop table if exists {SOURCE_SCHEMA}.baseline CASCADE;
22
drop table if exists {SOURCE_SCHEMA}.death CASCADE;
33
drop table if exists {SOURCE_SCHEMA}.death_cause CASCADE;
4-
drop table if exists {SOURCE_SCHEMA}.gp_registrations CASCADE;
5-
drop table if exists {SOURCE_SCHEMA}.gp_clinical CASCADE;
6-
drop table if exists {SOURCE_SCHEMA}.gp_scripts CASCADE;
7-
drop table if exists {SOURCE_SCHEMA}.hesin CASCADE;
8-
drop table if exists {SOURCE_SCHEMA}.hesin_critical CASCADE;
9-
drop table if exists {SOURCE_SCHEMA}.hesin_delivery CASCADE;
10-
drop table if exists {SOURCE_SCHEMA}.hesin_diag CASCADE;
11-
drop table if exists {SOURCE_SCHEMA}.hesin_maternity CASCADE;
12-
drop table if exists {SOURCE_SCHEMA}.hesin_oper CASCADE;
13-
drop table if exists {SOURCE_SCHEMA}.hesin_psych CASCADE;
144
drop table if exists {SOURCE_SCHEMA}.cancer CASCADE;
15-
drop table if exists {SOURCE_SCHEMA}.covid19_result_england CASCADE;
16-
drop table if exists {SOURCE_SCHEMA}.covid19_result_scotland CASCADE;
17-
drop table if exists {SOURCE_SCHEMA}.covid19_result_wales CASCADE;
18-
drop table if exists {SOURCE_SCHEMA}.covid19_vaccination CASCADE;
19-
drop table if exists {SOURCE_SCHEMA}.covid19_misc CASCADE;

sql_scripts/1a_ukb_gp_drop.sql

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
drop table if exists {SOURCE_SCHEMA}.baseline CASCADE;
2+
drop table if exists {SOURCE_SCHEMA}.death CASCADE;
3+
drop table if exists {SOURCE_SCHEMA}.death_cause CASCADE;
4+
drop table if exists {SOURCE_SCHEMA}.gp_registrations CASCADE;
5+
drop table if exists {SOURCE_SCHEMA}.gp_clinical CASCADE;
6+
drop table if exists {SOURCE_SCHEMA}.gp_scripts CASCADE;

sql_scripts/1a_ukb_hesin_drop.sql

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
drop table if exists {SOURCE_SCHEMA}.baseline CASCADE;
2+
drop table if exists {SOURCE_SCHEMA}.death CASCADE;
3+
drop table if exists {SOURCE_SCHEMA}.death_cause CASCADE;
4+
drop table if exists {SOURCE_SCHEMA}.hesin CASCADE;
5+
drop table if exists {SOURCE_SCHEMA}.hesin_critical CASCADE;
6+
drop table if exists {SOURCE_SCHEMA}.hesin_delivery CASCADE;
7+
drop table if exists {SOURCE_SCHEMA}.hesin_diag CASCADE;
8+
drop table if exists {SOURCE_SCHEMA}.hesin_maternity CASCADE;
9+
drop table if exists {SOURCE_SCHEMA}.hesin_oper CASCADE;
10+
drop table if exists {SOURCE_SCHEMA}.hesin_psych CASCADE;

sql_scripts/1b_ukb_create.sql

-176
Original file line numberDiff line numberDiff line change
@@ -40,182 +40,6 @@ CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.death_cause (
4040
cause_icd10 VARCHAR(8)
4141
)TABLESPACE pg_default;
4242

43-
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.gp_registrations (
44-
eid bigint,
45-
data_provider int,
46-
reg_date date,
47-
deduct_date date
48-
49-
)TABLESPACE pg_default;
50-
51-
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.gp_clinical (
52-
eid bigint,
53-
data_provider int,
54-
event_dt date,
55-
read_2 varchar(7),
56-
read_3 varchar(7),
57-
value1 VARCHAR(800),
58-
value2 VARCHAR(800),
59-
value3 VARCHAR(800)
60-
)TABLESPACE pg_default;
61-
62-
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.gp_scripts (
63-
eid bigint,
64-
data_provider int,
65-
issue_date date,
66-
read_2 varchar(7),
67-
bnf_code varchar(15),
68-
dmd_code varchar(20),
69-
drug_name varchar(600),
70-
quantity varchar(250)
71-
)TABLESPACE pg_default;
72-
73-
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.hesin (
74-
eid bigint,
75-
ins_index bigint,
76-
dsource varchar(10),
77-
source int,
78-
epistart date,
79-
epiend date,
80-
epidur int,
81-
bedyear int,
82-
epistat int,
83-
epitype int,
84-
epiorder int,
85-
spell_index int,
86-
spell_seq int,
87-
spelbgin int,
88-
spelend varchar(10),
89-
speldur int,
90-
pctcode varchar(10),
91-
gpprpct varchar(10),
92-
category int,
93-
elecdate date,
94-
elecdur int,
95-
admidate date,
96-
admimeth_uni int,
97-
admimeth varchar(4),
98-
admisorc_uni int,
99-
admisorc varchar(4),
100-
firstreg int,
101-
classpat_uni int,
102-
classpat varchar(4),
103-
intmanag_uni int,
104-
intmanag int,
105-
mainspef_uni int,
106-
mainspef varchar(10),
107-
tretspef_uni int,
108-
tretspef varchar(10),
109-
operstat int,
110-
disdate date,
111-
dismeth_uni int,
112-
dismeth int,
113-
disdest_uni int,
114-
disdest varchar(2),
115-
carersi int
116-
)TABLESPACE pg_default;
117-
118-
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.hesin_critical (
119-
eid bigint,
120-
ins_index bigint,
121-
arr_index bigint,
122-
dsource varchar(10),
123-
source int,
124-
ccstartdate date,
125-
ccadmitype int,
126-
ccadmisorc int,
127-
ccsorcloc int,
128-
ccdisdate date,
129-
ccdisrdydate date,
130-
ccdisstat int,
131-
ccdisdest int,
132-
ccdisloc int,
133-
ccapcrel int,
134-
bressupdays int,
135-
aressupdays int,
136-
bcardsupdays int,
137-
acardsupdays int,
138-
rensupdays int,
139-
neurosupdays int,
140-
gisupdays int,
141-
dermsupdays int,
142-
liversupdays int,
143-
orgsupmax int,
144-
cclev2days int,
145-
cclev3days int,
146-
ccunitfun int,
147-
unitbedconfig int
148-
)TABLESPACE pg_default;
149-
150-
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.hesin_delivery (
151-
eid bigint,
152-
ins_index bigint,
153-
arr_index bigint,
154-
gestat int,
155-
delplac int,
156-
delmeth varchar(10),
157-
birordr varchar(10),
158-
birstat int,
159-
biresus int,
160-
sexbaby int,
161-
birweight int,
162-
delstat int
163-
)TABLESPACE pg_default;
164-
165-
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.hesin_diag (
166-
eid bigint,
167-
ins_index bigint,
168-
arr_index bigint,
169-
level int,
170-
diag_icd9 varchar(10),
171-
diag_icd9_nb varchar(10),
172-
diag_icd10 varchar(10),
173-
diag_icd10_nb varchar(10)
174-
)TABLESPACE pg_default;
175-
176-
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.hesin_maternity (
177-
eid bigint,
178-
ins_index bigint,
179-
numbaby varchar(1),
180-
numpreg int,
181-
anasdate date,
182-
anagest int,
183-
antedur int,
184-
delinten int,
185-
delchang int,
186-
delprean int,
187-
delposan int,
188-
delonset int,
189-
postdur int,
190-
matage int
191-
)TABLESPACE pg_default;
192-
193-
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.hesin_oper (
194-
eid bigint,
195-
ins_index bigint,
196-
arr_index bigint,
197-
level int,
198-
opdate date,
199-
oper3 varchar(10),
200-
oper3_nb varchar(10),
201-
oper4 varchar(10),
202-
oper4_nb varchar(10),
203-
posopdur int,
204-
preopdur int
205-
)TABLESPACE pg_default;
206-
207-
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.hesin_psych (
208-
eid bigint,
209-
ins_index bigint,
210-
detncat_uni int,
211-
detncat int,
212-
detndate date,
213-
mentcat int,
214-
admistat_uni int,
215-
admistat int,
216-
leglstat int
217-
)TABLESPACE pg_default;
218-
21943
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.cancer (
22044
eid bigint not null,
22145
p40005_i0 date,

sql_scripts/1b_ukb_gp_create.sql

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.baseline (
2+
eid bigint not null,
3+
p31 smallint,
4+
p34 smallint,
5+
p52 smallint,
6+
p53_i0 date,
7+
p53_i1 date,
8+
p53_i2 date,
9+
p53_i3 date,
10+
p54_i0 NUMERIC,
11+
p54_i1 NUMERIC,
12+
p54_i2 NUMERIC,
13+
p54_i3 NUMERIC,
14+
p200 date,
15+
p20143 date,
16+
p21000_i0 NUMERIC,
17+
p21000_i1 NUMERIC,
18+
p21000_i2 NUMERIC,
19+
p21000_i3 NUMERIC,
20+
p21022 int,
21+
p22189 NUMERIC,
22+
p26410 NUMERIC,
23+
p26426 NUMERIC,
24+
p26427 NUMERIC
25+
)TABLESPACE pg_default;
26+
27+
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.death (
28+
eid bigint,
29+
ins_index int,
30+
dsource VARCHAR(4),
31+
source int,
32+
date_of_death date
33+
)TABLESPACE pg_default;
34+
35+
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.death_cause (
36+
eid bigint,
37+
ins_index int,
38+
arr_index int,
39+
level int,
40+
cause_icd10 VARCHAR(8)
41+
)TABLESPACE pg_default;
42+
43+
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.gp_registrations (
44+
eid bigint,
45+
data_provider int,
46+
reg_date date,
47+
deduct_date date
48+
49+
)TABLESPACE pg_default;
50+
51+
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.gp_clinical (
52+
eid bigint,
53+
data_provider int,
54+
event_dt date,
55+
read_2 varchar(7),
56+
read_3 varchar(7),
57+
value1 VARCHAR(800),
58+
value2 VARCHAR(800),
59+
value3 VARCHAR(800)
60+
)TABLESPACE pg_default;
61+
62+
CREATE TABLE IF NOT EXISTS {SOURCE_SCHEMA}.gp_scripts (
63+
eid bigint,
64+
data_provider int,
65+
issue_date date,
66+
read_2 varchar(7),
67+
bnf_code varchar(15),
68+
dmd_code varchar(20),
69+
drug_name varchar(600),
70+
quantity varchar(250)
71+
)TABLESPACE pg_default;

0 commit comments

Comments
 (0)