Skip to content

Commit e5c1824

Browse files
authored
Merge pull request #135 from oxford-pharmacoepi/mk_dev1
UKB HESIN Dataset Mapping.
2 parents 9295819 + 0cc02f6 commit e5c1824

12 files changed

+1744
-2
lines changed
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1+
--baseline
12
alter table {SOURCE_SCHEMA}.baseline add constraint pk_baseline primary key (eid) USING INDEX TABLESPACE pg_default;
+2-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
alter table {SOURCE_SCHEMA}.death add constraint pk_death primary key (eid, ins_index) USING INDEX TABLESPACE pg_default;
1+
--death
2+
alter table {SOURCE_SCHEMA}.death add constraint pk_death primary key (eid, ins_index) USING INDEX TABLESPACE pg_default;

sql_scripts/1c_ukb_hesin_pk_idx_death_cause.sql

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
--death_cause
12
alter table {SOURCE_SCHEMA}.death_cause add constraint pk_death_cause primary key (eid, ins_index, arr_index) USING INDEX TABLESPACE pg_default;
23
create index idx_death_eid_ins_idx on {SOURCE_SCHEMA}.death_cause(eid, ins_index) TABLESPACE pg_default;
34
create index idx_death_cause_level on {SOURCE_SCHEMA}.death_cause(level) TABLESPACE pg_default;
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
--hesin_oper
2-
ALTER TABLE {SOURCE_SCHEMA}.hesin_oper ADD CONSTRAINT pk_hesin_oper PRIMARY KEY (eid,ins_index) USING INDEX TABLESPACE pg_default;
2+
ALTER TABLE {SOURCE_SCHEMA}.hesin_oper ADD CONSTRAINT pk_hesin_oper PRIMARY KEY (eid,ins_index,arr_index) USING INDEX TABLESPACE pg_default;
33
create index idx_hesin_oper_eid on {SOURCE_SCHEMA}.hesin_oper (eid) TABLESPACE pg_default;
+173
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
--------------------------------
2+
-- PERSON
3+
--------------------------------
4+
with cte1 as (
5+
select distinct(eid) as eid
6+
from {SOURCE_SCHEMA}.hesin as t1
7+
),
8+
ukb AS(
9+
select
10+
t1.concept_id as source_concept_id,
11+
t1.concept_code as source_code,
12+
COALESCE(t2.target_concept_id, 0) as target_concept_id,
13+
t2.target_domain_id
14+
from {VOCABULARY_SCHEMA}.concept as t1
15+
left join {VOCABULARY_SCHEMA}.source_to_standard_vocab_map as t2 on t1.concept_id = t2.source_concept_id and t2.source_vocabulary_id = 'UK Biobank'
16+
where t1.vocabulary_id = 'UK Biobank' and (t1.concept_code like '1001-%' or t1.concept_code like '9-%')
17+
)
18+
INSERT INTO {TARGET_SCHEMA}.person (
19+
person_id ,
20+
gender_concept_id ,
21+
year_of_birth ,
22+
month_of_birth ,
23+
day_of_birth ,
24+
birth_datetime ,
25+
race_concept_id ,
26+
ethnicity_concept_id ,
27+
location_id ,
28+
provider_id ,
29+
care_site_id ,
30+
person_source_value ,
31+
gender_source_value ,
32+
gender_source_concept_id ,
33+
race_source_value ,
34+
race_source_concept_id ,
35+
ethnicity_source_value ,
36+
ethnicity_source_concept_id
37+
)
38+
SELECT
39+
t1.eid AS person_id,
40+
t3.target_concept_id AS gender_concept_id,
41+
t2.p34 AS year_of_birth,
42+
t2.p52 AS month_of_birth,
43+
NULL::int AS day_of_birth,
44+
NULL::timestamp AS birth_datetime,
45+
COALESCE(t4.target_concept_id, 0) AS race_concept_id,
46+
0 AS ethnicity_concept_id,
47+
NULL::bigint AS location_id,
48+
NULL::bigint AS provider_id,
49+
NULL::int AS care_site_id,
50+
t1.eid::varchar AS person_source_value,
51+
CONCAT('9-', t2.p31) AS gender_source_value,
52+
t3.source_concept_id AS gender_source_concept_id,
53+
CASE
54+
WHEN t2.p21000_i0::integer is not null then CONCAT('1001-', t2.p21000_i0::integer)
55+
END
56+
AS race_source_value,
57+
t4.source_concept_id AS race_source_concept_id,
58+
NULL AS ethnicity_source_value,
59+
NULL::int AS ethnicity_source_concept_id
60+
FROM cte1 as t1
61+
INNER JOIN {SOURCE_SCHEMA}.baseline AS t2 ON t1.eid = t2.eid
62+
JOIN ukb as t3 on CONCAT('9-', t2.p31) = t3.source_code and t3.target_domain_id = 'Gender'
63+
LEFT JOIN ukb as t4 on CONCAT('1001-', t2.p21000_i0::integer) = t4.source_code;
64+
65+
ALTER TABLE {TARGET_SCHEMA}.person ADD CONSTRAINT xpk_person PRIMARY KEY (person_id) USING INDEX TABLESPACE pg_default;
66+
CREATE UNIQUE INDEX idx_person_id ON {TARGET_SCHEMA}.person (person_id ASC) TABLESPACE pg_default;
67+
CLUSTER {TARGET_SCHEMA}.person USING xpk_person;
68+
CREATE INDEX idx_gender ON {TARGET_SCHEMA}.person (gender_concept_id ASC) TABLESPACE pg_default;
69+
70+
--------------------------------
71+
-- DEATH
72+
--------------------------------
73+
With ICD10_1 AS(
74+
select source_concept_id, source_code
75+
from {VOCABULARY_SCHEMA}.source_to_standard_vocab_map
76+
where source_vocabulary_id = 'ICD10'
77+
group by source_concept_id, source_code
78+
having count(*)=1
79+
), ICD10 AS(
80+
select t1.source_concept_id, t1.target_concept_id, t1.source_vocabulary_id
81+
from {VOCABULARY_SCHEMA}.source_to_standard_vocab_map as t1
82+
join ICD10_1 as t2 on t1.source_concept_id = t2.source_concept_id
83+
where t1.source_vocabulary_id = 'ICD10'
84+
85+
union
86+
87+
select source_concept_id, target_concept_id, source_vocabulary_id
88+
from {VOCABULARY_SCHEMA}.source_to_standard_vocab_map
89+
where source_vocabulary_id = 'UKB_DEATH_CAUSE_STCM'
90+
), death_cause_1 AS(
91+
select t1.eid, t1.ins_index, t1.cause_icd10, t2.concept_id, t2.concept_id as source_concept_id, t2.concept_code as source_code
92+
from {SOURCE_SCHEMA}.death_cause as t1
93+
join {VOCABULARY_SCHEMA}.concept as t2 on t1.cause_icd10 = t2.concept_code or t1.cause_icd10 = replace(t2.concept_code, '.', '')
94+
where t2.vocabulary_id = 'ICD10'
95+
), death_cause_2 AS(
96+
select t1.eid, t1.ins_index, t1.cause_icd10, t3.concept_id, 0 as source_concept_id, t1.cause_icd10 as source_code
97+
from {SOURCE_SCHEMA}.death_cause as t1
98+
left join death_cause_1 as t2 on t1.eid = t2.eid and t1.ins_index = t2.ins_index
99+
join {VOCABULARY_SCHEMA}.concept as t3 on left(t1.cause_icd10, 3) = t3.concept_code
100+
where t2.eid is null and t3.vocabulary_id = 'ICD10'
101+
), death_cause AS(
102+
select * from death_cause_1
103+
union
104+
select * from death_cause_2
105+
), dead_patient AS(
106+
select distinct t1.eid, t1.date_of_death, t2.cause_icd10, t2.source_code, t2.concept_id, t2.source_concept_id
107+
from {SOURCE_SCHEMA}.death as t1
108+
left join death_cause as t2 on t1.eid = t2.eid and t1.ins_index = t2.ins_index
109+
)
110+
INSERT INTO {TARGET_SCHEMA}.death(
111+
person_id,
112+
death_date,
113+
death_datetime,
114+
death_type_concept_id,
115+
cause_concept_id,
116+
cause_source_value,
117+
cause_source_concept_id
118+
)
119+
select
120+
t1.eid,
121+
t1.date_of_death,
122+
t1.date_of_death,
123+
32879, --same as cdm_ukb_202003
124+
CASE
125+
WHEN t1.cause_icd10 is not null THEN COALESCE(t2.target_concept_id, 0)
126+
END,
127+
t1.source_code,
128+
t1.source_concept_id
129+
from dead_patient as t1
130+
left join ICD10 as t2 on t1.concept_id = t2.source_concept_id
131+
and (t2.source_vocabulary_id = 'UKB_DEATH_CAUSE_STCM' or t2.source_vocabulary_id = 'ICD10');
132+
133+
--------------------------------
134+
-- OBSERVATION_PERIOD --
135+
--------------------------------
136+
DROP SEQUENCE IF EXISTS {TARGET_SCHEMA}.observation_period_seq;
137+
138+
CREATE SEQUENCE {TARGET_SCHEMA}.observation_period_seq;
139+
140+
with cte as (
141+
SELECT
142+
eid,
143+
LEAST(MIN(admidate), MIN(epistart),MIN(disdate), MIN(epiend)) AS min_date,
144+
GREATEST(MAX(disdate), MAX(epiend), MAX(admidate), MAX(epistart)) AS max_date
145+
FROM
146+
{SOURCE_SCHEMA}.hesin
147+
GROUP BY
148+
eid
149+
HAVING
150+
(MIN(admidate) IS NOT NULL OR MIN(epistart) IS NOT NULL OR MIN(disdate) IS NOT NULL OR MIN(epiend) IS NOT NULL) AND
151+
(MAX(disdate) IS NOT NULL OR MAX(epiend) IS NOT NULL OR MAX(admidate) IS NOT NULL OR MAX(epistart) IS NOT NULL)
152+
)
153+
INSERT INTO {TARGET_SCHEMA}.OBSERVATION_PERIOD
154+
(
155+
observation_period_id,
156+
person_id,
157+
observation_period_start_date,
158+
observation_period_end_date,
159+
period_type_concept_id
160+
)
161+
select
162+
nextval('{TARGET_SCHEMA}.observation_period_seq'),
163+
eid,
164+
min_date as observation_period_start_date,
165+
max_date as observation_period_end_date,
166+
32880
167+
from cte;
168+
169+
DROP SEQUENCE IF EXISTS {TARGET_SCHEMA}.observation_period_seq;
170+
171+
ALTER TABLE {TARGET_SCHEMA}.observation_period ADD CONSTRAINT xpk_observation_period PRIMARY KEY (observation_period_id);
172+
CREATE INDEX idx_observation_period_id ON {TARGET_SCHEMA}.observation_period (person_id ASC);
173+
CLUSTER {TARGET_SCHEMA}.observation_period USING idx_observation_period_id;

0 commit comments

Comments
 (0)