-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctions.R
157 lines (128 loc) · 4.79 KB
/
functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
between <- function(x, left, right) {
x >= left & x <= right
}
diagnose_diabetes <- function(Q029, Q030, Z034) {
stopifnot(length(Q029) == length(Q030), length(Q030) == length(Z034))
# Q030: Did any doctor diagnose you as having diabetes?
# 1) yes; 2) only during pregnancy; 3) No
reported <- Q030 == 1
# Q029: when was the last time you had your blood glucose measured?
# 1 through 5) various time frames; 6) never
reported[is.na(reported) & Q029 == 6] <- FALSE
# Z034: glycated hemoglobin
measured <- Z034 >= 6.5
# When either the reported or the measured values are not missing,
# use what we have.
res <- rep(NA, times = length(Q029))
res <- measured %in% TRUE | reported %in% TRUE
res[is.na(measured) & is.na(reported)] <- NA
return(as.integer(res))
}
impute_bp_meds <- function(Q001, Q002, Q006) {
stopifnot(length(Q001) == length(Q002), length(Q002) == length(Q006))
# Q001: When was your blood pressure last measured?
# 6 is never; the questionnaire skips the other questions.
# Q002: Did any doctor diagnose you as having hypertension?
# 1) yes, 2) only during pregnancy, 3) no
# if 2 or 3 skip the other question.
# Q006: During the last two weeks, did you take any medicines
# because of hypertension?
# 1) yes, 2) no
res <- 2L - as.integer(Q006)
res[is.na(res) & Q002 > 1] <- 0
res[is.na(res) & Q001 == 6] <- 0
return(res)
}
tabulate_fig3 <- function(from, to, weight = 1) {
if (!("ggalluvial" %in% installed.packages())) {
stop("Function tabulate_fig3() requires installing package \"ggalluvial\"")
}
stopifnot(is.ordered(from), is.ordered(to))
stopifnot(length(from) == length(to))
if (length(weight) != length(from)) {
stop("weight must be missing, length 1, or as long as from and to")
}
xtabs(weight ~ to + from) |>
as.data.frame() |>
transform(Freq = Freq / sum(Freq),
fill = to) |>
ggalluvial::to_lodes_form(res, axes = c("from", "to")) |>
transform(stratum = ordered(stratum, risk_levels),
fill = ordered(fill, risk_levels))
}
# Function improved from: https://doi.org/10.5281/zenodo.7121199
weighted.quantile <- function(x, w = 1, probs = seq(0, 1, 0.25),
na.rm = FALSE, names = TRUE) {
if (any(probs > 1) | any(probs < 0)) stop("'probs' outside [0,1]")
if (length(w) == 1) w <- rep(w, length(x))
if (length(w) != length(x)) stop("w must have length 1 or be as long as x")
not.missing <- !is.na(w) & !is.na(x)
if (isTRUE(na.rm)) {
if (!identical(is.na(x), is.na(w))) warning(paste(
"One or more observations with missing x but not missing w",
"or vice-versa! Using only observations with non-missing values",
"for both."
))
w <- w[not.missing]
x <- x[not.missing]
} else if (any(is.na(x) | is.na(w))) {
warning("One or more missing values in x or w.")
return(rep(NA_real_, length(x)))
}
w <- w[order(x)] / sum(w)
x <- x[order(x)]
if (length(x) == 1) {
res <- rep(x, length(probs))
} else {
cum_w <- cumsum(w) - w * (1 - (seq_along(w) - 1) / (length(w) - 1))
res <- approx(x = cum_w, y = x, xout = probs, ties = "ordered")$y
}
# Accessing non-exported functions is not great practice, generally,
# but we want to ensure the labels are identical to those of
# non-weighted quantiles.
if (isTRUE(names)) names(res) <- stats:::format_perc(probs, "%")
res
}
#' Gwet's generalized first-order agreement coefficient (AC1)
#' but with survey weights
#'
#' @param a assessment of the first rater
#' @param b assessment of the second rater
#' @param w survey weights
#'
#' @return Gwet's AC1
#'
weighted.ac1 <- function(a, b, w = 1) {
stopifnot(length(a) == length(b))
if (length(w) == 1) w <- rep(w, length(a))
stopifnot(length(a) == length(w))
ok <- !is.na(a) & !is.na(b) & !is.na(w)
if (any(!ok)) {
warning("Removing observations with missing values")
a <- a[ok]
b <- b[ok]
w <- w[ok]
}
if (is.factor(a) & is.factor(b)) {
stopifnot(levels(a) == levels(b))
ll <- levels(a)
} else {
stopifnot(is.factor(a) == is.factor(b))
ll <- unique(c(a, b))
}
# Observed agreement
pa <- weighted.mean(a == b, w)
# Expected chance agreement by category
pihat_k <- sapply(ll, \(x) weighted.mean(((a==x) + (b==x)) / 2, w))
# Overall expected chance agreement
pe <- sum(pihat_k * (1 - pihat_k)) / (length(ll) - 1)
(pa - pe) / (1 - pe)
}
weighted.var <- function(x, w) {
# Implementation rom MSWD_w in:
# https://en.wikipedia.org/wiki/Reduced_chi-squared_statistic
# Also see:
# https://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Frequency_weights
sum(w * (x - weighted.mean(x, w))^2) * sum(w) / (sum(w)^2 - sum(w^2))
}
weighted.sd <- function(x, w) sqrt(weighted.var(x, w))