-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget-blood-gasses-and-define-pf-cohort.R
executable file
·111 lines (95 loc) · 2.17 KB
/
get-blood-gasses-and-define-pf-cohort.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
library(RPostgres)
library(dplyr)
library(readr)
library(lubridate)
library(tidyr)
source("scripts/common/logger-setup.R")
source("scripts/common/setup-argparse.R")
parser$add_argument("--blood-gasses-query")
args <- parser$parse_args()
mimic <- dbConnect(
RPostgres::Postgres(),
dbname = "mimic",
host = "localhost",
port = 5432,
user = "amanderson",
password = "postgres",
timezone = "US/Eastern"
)
flog.info(
'mimic-example: querying blood gasses',
name = base_filename
)
res <- dbGetQuery(
mimic,
statement = read_file(args$blood_gasses_query)
)
tbl1 <- res %>%
select(
subject_id,
hadm_id,
icustay_id,
charttime,
specimen_pred,
pf = pao2fio2
) %>%
as_tibble() %>%
filter(!is.na(pf), specimen_pred == 'ART') %>%
drop_na()
icustays <- dbGetQuery(
mimic,
statement =
"SELECT
subject_id,
hadm_id,
icustay_id,
intime
FROM
mimiciii.icustays;"
)
# require the first n observations are above the threshold, avoids
# the case where the first or second observation is an outlier / clearly wrong
first_n_greater_than_k <- function(x, n, k) {
all(x[1 : n] > k)
}
time_between_measurements_less_than <- function(x, gap_days = 2) {
max(diff(x)) < gap_days
}
tbl2 <- tbl1 %>%
left_join(icustays) %>%
group_by(icustay_id) %>%
mutate(
time_since_icu_adm = time_length(
charttime - intime,
unit = 'day'
)
)
# initial filter speed up later computations? Possibly no longer needed
# now that we filter to at least 15 measurements.
min_obs <- 5
cohort_with_min_obs <- tbl2 %>%
group_by(icustay_id) %>%
count() %>%
filter(n >= min_obs)
tbl3 <- tbl2 %>%
filter(icustay_id %in% cohort_with_min_obs$icustay_id)
tbl4 <- tbl3 %>%
group_by(icustay_id) %>%
filter(
pf < 600, ## infeasible pf ratios
time_between_measurements_less_than(time_since_icu_adm, gap_days = 2),
first_n_greater_than_k(pf, n = 6, k = 350)
) %>%
count(icustay_id, sort = TRUE) %>%
ungroup() %>%
filter(between(n, 12, 500))
pf_tbl <- tbl2 %>%
filter(icustay_id %in% tbl4$icustay_id)
flog.info(
'mimic-example: writing pf data',
name = base_filename
)
saveRDS(
file = args$output,
object = pf_tbl
)