1
+ -- ------------------------------
2
+ -- PERSON
3
+ -- ------------------------------
4
+ with cte1 as (
5
+ select distinct (eid) as eid
6
+ from {SOURCE_SCHEMA}.hesin as t1
7
+ ),
8
+ ukb AS (
9
+ select
10
+ t1 .concept_id as source_concept_id,
11
+ t1 .concept_code as source_code,
12
+ COALESCE(t2 .target_concept_id , 0 ) as target_concept_id,
13
+ t2 .target_domain_id
14
+ from {VOCABULARY_SCHEMA}.concept as t1
15
+ left join {VOCABULARY_SCHEMA}.source_to_standard_vocab_map as t2 on t1 .concept_id = t2 .source_concept_id and t2 .source_vocabulary_id = ' UK Biobank'
16
+ where t1 .vocabulary_id = ' UK Biobank' and (t1 .concept_code like ' 1001-%' or t1 .concept_code like ' 9-%' )
17
+ )
18
+ INSERT INTO {TARGET_SCHEMA}.person (
19
+ person_id ,
20
+ gender_concept_id ,
21
+ year_of_birth ,
22
+ month_of_birth ,
23
+ day_of_birth ,
24
+ birth_datetime ,
25
+ race_concept_id ,
26
+ ethnicity_concept_id ,
27
+ location_id ,
28
+ provider_id ,
29
+ care_site_id ,
30
+ person_source_value ,
31
+ gender_source_value ,
32
+ gender_source_concept_id ,
33
+ race_source_value ,
34
+ race_source_concept_id ,
35
+ ethnicity_source_value ,
36
+ ethnicity_source_concept_id
37
+ )
38
+ SELECT
39
+ t1 .eid AS person_id,
40
+ t3 .target_concept_id AS gender_concept_id,
41
+ t2 .p34 AS year_of_birth,
42
+ t2 .p52 AS month_of_birth,
43
+ NULL ::int AS day_of_birth,
44
+ NULL ::timestamp AS birth_datetime,
45
+ COALESCE(t4 .target_concept_id , 0 ) AS race_concept_id,
46
+ 0 AS ethnicity_concept_id,
47
+ NULL ::bigint AS location_id,
48
+ NULL ::bigint AS provider_id,
49
+ NULL ::int AS care_site_id,
50
+ t1 .eid ::varchar AS person_source_value,
51
+ CONCAT(' 9-' , t2 .p31 ) AS gender_source_value,
52
+ t3 .source_concept_id AS gender_source_concept_id,
53
+ CASE
54
+ WHEN t2 .p21000_i0 ::integer is not null then CONCAT(' 1001-' , t2 .p21000_i0 ::integer )
55
+ END
56
+ AS race_source_value,
57
+ t4 .source_concept_id AS race_source_concept_id,
58
+ NULL AS ethnicity_source_value,
59
+ NULL ::int AS ethnicity_source_concept_id
60
+ FROM cte1 as t1
61
+ INNER JOIN {SOURCE_SCHEMA}.baseline AS t2 ON t1 .eid = t2 .eid
62
+ JOIN ukb as t3 on CONCAT(' 9-' , t2 .p31 ) = t3 .source_code and t3 .target_domain_id = ' Gender'
63
+ LEFT JOIN ukb as t4 on CONCAT(' 1001-' , t2 .p21000_i0 ::integer ) = t4 .source_code ;
64
+
65
+ ALTER TABLE {TARGET_SCHEMA}.person ADD CONSTRAINT xpk_person PRIMARY KEY (person_id) USING INDEX TABLESPACE pg_default;
66
+ CREATE UNIQUE INDEX idx_person_id ON {TARGET_SCHEMA}.person (person_id ASC ) TABLESPACE pg_default;
67
+ CLUSTER {TARGET_SCHEMA}.person USING xpk_person;
68
+ CREATE INDEX idx_gender ON {TARGET_SCHEMA}.person (gender_concept_id ASC ) TABLESPACE pg_default;
69
+
70
+ -- ------------------------------
71
+ -- DEATH
72
+ -- ------------------------------
73
+ With ICD10_1 AS (
74
+ select source_concept_id, source_code
75
+ from {VOCABULARY_SCHEMA}.source_to_standard_vocab_map
76
+ where source_vocabulary_id = ' ICD10'
77
+ group by source_concept_id, source_code
78
+ having count (* )= 1
79
+ ), ICD10 AS (
80
+ select t1 .source_concept_id , t1 .target_concept_id , t1 .source_vocabulary_id
81
+ from {VOCABULARY_SCHEMA}.source_to_standard_vocab_map as t1
82
+ join ICD10_1 as t2 on t1 .source_concept_id = t2 .source_concept_id
83
+ where t1 .source_vocabulary_id = ' ICD10'
84
+
85
+ union
86
+
87
+ select source_concept_id, target_concept_id, source_vocabulary_id
88
+ from {VOCABULARY_SCHEMA}.source_to_standard_vocab_map
89
+ where source_vocabulary_id = ' UKB_DEATH_CAUSE_STCM'
90
+ ), death_cause_1 AS (
91
+ select t1 .eid , t1 .ins_index , t1 .cause_icd10 , t2 .concept_id , t2 .concept_id as source_concept_id, t2 .concept_code as source_code
92
+ from {SOURCE_SCHEMA}.death_cause as t1
93
+ join {VOCABULARY_SCHEMA}.concept as t2 on t1 .cause_icd10 = t2 .concept_code or t1 .cause_icd10 = replace(t2 .concept_code , ' .' , ' ' )
94
+ where t2 .vocabulary_id = ' ICD10'
95
+ ), death_cause_2 AS (
96
+ select t1 .eid , t1 .ins_index , t1 .cause_icd10 , t3 .concept_id , 0 as source_concept_id, t1 .cause_icd10 as source_code
97
+ from {SOURCE_SCHEMA}.death_cause as t1
98
+ left join death_cause_1 as t2 on t1 .eid = t2 .eid and t1 .ins_index = t2 .ins_index
99
+ join {VOCABULARY_SCHEMA}.concept as t3 on left(t1 .cause_icd10 , 3 ) = t3 .concept_code
100
+ where t2 .eid is null and t3 .vocabulary_id = ' ICD10'
101
+ ), death_cause AS (
102
+ select * from death_cause_1
103
+ union
104
+ select * from death_cause_2
105
+ ), dead_patient AS (
106
+ select distinct t1 .eid , t1 .date_of_death , t2 .cause_icd10 , t2 .source_code , t2 .concept_id , t2 .source_concept_id
107
+ from {SOURCE_SCHEMA}.death as t1
108
+ left join death_cause as t2 on t1 .eid = t2 .eid and t1 .ins_index = t2 .ins_index
109
+ )
110
+ INSERT INTO {TARGET_SCHEMA}.death(
111
+ person_id,
112
+ death_date,
113
+ death_datetime,
114
+ death_type_concept_id,
115
+ cause_concept_id,
116
+ cause_source_value,
117
+ cause_source_concept_id
118
+ )
119
+ select
120
+ t1 .eid ,
121
+ t1 .date_of_death ,
122
+ t1 .date_of_death ,
123
+ 32879 , -- same as cdm_ukb_202003
124
+ CASE
125
+ WHEN t1 .cause_icd10 is not null THEN COALESCE(t2 .target_concept_id , 0 )
126
+ END,
127
+ t1 .source_code ,
128
+ t1 .source_concept_id
129
+ from dead_patient as t1
130
+ left join ICD10 as t2 on t1 .concept_id = t2 .source_concept_id
131
+ and (t2 .source_vocabulary_id = ' UKB_DEATH_CAUSE_STCM' or t2 .source_vocabulary_id = ' ICD10' );
132
+
133
+ -- ------------------------------
134
+ -- OBSERVATION_PERIOD --
135
+ -- ------------------------------
136
+ DROP SEQUENCE IF EXISTS {TARGET_SCHEMA}.observation_period_seq;
137
+
138
+ CREATE SEQUENCE {TARGET_SCHEMA}.observation_period_seq;
139
+
140
+ with cte as (
141
+ SELECT
142
+ eid,
143
+ LEAST(MIN (admidate), MIN (epistart),MIN (disdate), MIN (epiend)) AS min_date,
144
+ GREATEST(MAX (disdate), MAX (epiend), MAX (admidate), MAX (epistart)) AS max_date
145
+ FROM
146
+ {SOURCE_SCHEMA}.hesin
147
+ GROUP BY
148
+ eid
149
+ HAVING
150
+ (MIN (admidate) IS NOT NULL OR MIN (epistart) IS NOT NULL OR MIN (disdate) IS NOT NULL OR MIN (epiend) IS NOT NULL ) AND
151
+ (MAX (disdate) IS NOT NULL OR MAX (epiend) IS NOT NULL OR MAX (admidate) IS NOT NULL OR MAX (epistart) IS NOT NULL )
152
+ )
153
+ INSERT INTO {TARGET_SCHEMA}.OBSERVATION_PERIOD
154
+ (
155
+ observation_period_id,
156
+ person_id,
157
+ observation_period_start_date,
158
+ observation_period_end_date,
159
+ period_type_concept_id
160
+ )
161
+ select
162
+ nextval(' {TARGET_SCHEMA}.observation_period_seq' ),
163
+ eid,
164
+ min_date as observation_period_start_date,
165
+ max_date as observation_period_end_date,
166
+ 32880
167
+ from cte;
168
+
169
+ DROP SEQUENCE IF EXISTS {TARGET_SCHEMA}.observation_period_seq;
170
+
171
+ ALTER TABLE {TARGET_SCHEMA}.observation_period ADD CONSTRAINT xpk_observation_period PRIMARY KEY (observation_period_id);
172
+ CREATE INDEX idx_observation_period_id ON {TARGET_SCHEMA}.observation_period (person_id ASC );
173
+ CLUSTER {TARGET_SCHEMA}.observation_period USING idx_observation_period_id;
0 commit comments