@@ -65,41 +65,46 @@ check_tidy <- function(x, std_col = FALSE) {
65
65
if (std_col ) {
66
66
std_candidates <- colnames(x ) %in% std_exp
67
67
std_candidates <- colnames(x )[std_candidates ]
68
+ re_name <- list (std_err = std_candidates )
68
69
if (has_id ) {
69
70
x <-
70
- dplyr :: select(x , term , estimate , id , tidyselect :: one_of(std_candidates )) %> %
71
- mutate(id = (id == " Apparent" )) %> %
72
- setNames(c(" term" , " estimate" , " orig" , " std_err" ))
71
+ dplyr :: select(x , term , estimate , id , tidyselect :: one_of(std_candidates ),
72
+ dplyr :: starts_with(" ." )) %> %
73
+ mutate(orig = (id == " Apparent" )) %> %
74
+ dplyr :: rename(!!! re_name )
73
75
} else {
74
76
x <-
75
- dplyr :: select(x , term , estimate , tidyselect :: one_of(std_candidates )) %> %
76
- setNames(c(" term" , " estimate" , " std_err" ))
77
+ dplyr :: select(x , term , estimate , tidyselect :: one_of(std_candidates ),
78
+ dplyr :: starts_with(" ." )) %> %
79
+ dplyr :: rename(!!! re_name )
77
80
}
78
81
} else {
79
82
if (has_id ) {
80
83
x <-
81
- dplyr :: select(x , term , estimate , id ) %> %
84
+ dplyr :: select(x , term , estimate , id , dplyr :: starts_with( " . " ) ) %> %
82
85
mutate(orig = (id == " Apparent" )) %> %
83
86
dplyr :: select(- id )
84
87
} else {
85
- x <- dplyr :: select(x , term , estimate )
88
+ x <- dplyr :: select(x , term , estimate , dplyr :: starts_with( " . " ) )
86
89
}
87
90
}
88
91
89
92
x
90
93
}
91
94
92
95
93
- get_p0 <- function (x , alpha = 0.05 ) {
96
+ get_p0 <- function (x , alpha = 0.05 , groups ) {
97
+ group_sym <- rlang :: syms(groups )
98
+
94
99
orig <- x %> %
95
- group_by(term ) %> %
100
+ group_by(!!! group_sym ) %> %
96
101
dplyr :: filter(orig ) %> %
97
- dplyr :: select(term , theta_0 = estimate ) %> %
102
+ dplyr :: select(!!! group_sym , theta_0 = estimate ) %> %
98
103
ungroup()
99
104
x %> %
100
105
dplyr :: filter(! orig ) %> %
101
- inner_join(orig , by = " term " ) %> %
102
- group_by(term ) %> %
106
+ inner_join(orig , by = groups ) %> %
107
+ group_by(!!! group_sym ) %> %
103
108
summarize(p0 = mean(estimate < = theta_0 , na.rm = TRUE )) %> %
104
109
mutate(
105
110
Z0 = stats :: qnorm(p0 ),
@@ -172,9 +177,10 @@ pctl_single <- function(stats, alpha = 0.05) {
172
177
# ' @param statistics An unquoted column name or `dplyr` selector that identifies
173
178
# ' a single column in the data set containing the individual bootstrap
174
179
# ' estimates. This must be a list column of tidy tibbles (with columns
175
- # ' `term` and `estimate`). For t-intervals, a
176
- # ' standard tidy column (usually called `std.err`) is required.
177
- # ' See the examples below.
180
+ # ' `term` and `estimate`). Optionally, users can include columns whose names
181
+ # ' begin with a period and the intervals will be created for each combination
182
+ # ' of these variables and the `term` column. For t-intervals, a standard tidy
183
+ # ' column (usually called `std.err`) is required. See the examples below.
178
184
# ' @param alpha Level of significance.
179
185
# ' @param .fn A function to calculate statistic of interest. The
180
186
# ' function should take an `rsplit` as the first argument and the `...` are
@@ -200,12 +206,15 @@ pctl_single <- function(stats, alpha = 0.05) {
200
206
# ' Application_. Cambridge: Cambridge University Press.
201
207
# ' doi:10.1017/CBO9780511802843
202
208
# '
203
- # ' @examplesIf rlang::is_installed("broom")
209
+ # ' @examplesIf rlang::is_installed("broom") & rlang::is_installed("modeldata")
204
210
# ' \donttest{
205
211
# ' library(broom)
206
212
# ' library(dplyr)
207
213
# ' library(purrr)
208
214
# ' library(tibble)
215
+ # ' library(tidyr)
216
+ # '
217
+ # ' # ------------------------------------------------------------------------------
209
218
# '
210
219
# ' lm_est <- function(split, ...) {
211
220
# ' lm(mpg ~ disp + hp, data = analysis(split)) %>%
@@ -221,6 +230,8 @@ pctl_single <- function(stats, alpha = 0.05) {
221
230
# ' int_t(car_rs, results)
222
231
# ' int_bca(car_rs, results, .fn = lm_est)
223
232
# '
233
+ # ' # ------------------------------------------------------------------------------
234
+ # '
224
235
# ' # putting results into a tidy format
225
236
# ' rank_corr <- function(split) {
226
237
# ' dat <- analysis(split)
@@ -237,6 +248,31 @@ pctl_single <- function(stats, alpha = 0.05) {
237
248
# ' bootstraps(Sacramento, 1000, apparent = TRUE) %>%
238
249
# ' mutate(correlations = map(splits, rank_corr)) %>%
239
250
# ' int_pctl(correlations)
251
+ # '
252
+ # ' # ------------------------------------------------------------------------------
253
+ # ' # An example of computing the interval for each value of a custom grouping
254
+ # ' # factor (type of house in this example)
255
+ # '
256
+ # ' # Get regression estimates for each house type
257
+ # ' lm_est <- function(split, ...) {
258
+ # ' analysis(split) %>%
259
+ # ' tidyr::nest(.by = c(type)) %>%
260
+ # ' # Compute regression estimates for each house type
261
+ # ' mutate(
262
+ # ' betas = purrr::map(data, ~ lm(log10(price) ~ sqft, data = .x) %>% tidy())
263
+ # ' ) %>%
264
+ # ' # Convert the column name to begin with a period
265
+ # ' rename(.type = type) %>%
266
+ # ' select(.type, betas) %>%
267
+ # ' unnest(cols = betas)
268
+ # ' }
269
+ # '
270
+ # ' set.seed(52156)
271
+ # ' house_rs <-
272
+ # ' bootstraps(Sacramento, 1000, apparent = TRUE) %>%
273
+ # ' mutate(results = map(splits, lm_est))
274
+ # '
275
+ # ' int_pctl(house_rs, results)
240
276
# ' }
241
277
# ' @export
242
278
int_pctl <- function (.data , ... ) {
@@ -263,8 +299,11 @@ int_pctl.bootstraps <- function(.data, statistics, alpha = 0.05, ...) {
263
299
264
300
check_num_resamples(stats , B = 1000 )
265
301
302
+ stat_groups <- c(" term" , grep(" ^\\ ." , names(stats ), value = TRUE ))
303
+ stat_groups <- rlang :: syms(stat_groups )
304
+
266
305
vals <- stats %> %
267
- dplyr :: group_by(term ) %> %
306
+ dplyr :: group_by(!!! stat_groups ) %> %
268
307
dplyr :: do(pctl_single(. $ estimate , alpha = alpha )) %> %
269
308
dplyr :: ungroup()
270
309
vals
@@ -343,9 +382,10 @@ int_t.bootstraps <- function(.data, statistics, alpha = 0.05, ...) {
343
382
344
383
check_num_resamples(stats , B = 500 )
345
384
346
- vals <-
347
- stats %> %
348
- dplyr :: group_by(term ) %> %
385
+ stat_groups <- c(" term" , grep(" ^\\ ." , names(stats ), value = TRUE ))
386
+ stat_groups <- rlang :: syms(stat_groups )
387
+ vals <- stats %> %
388
+ dplyr :: group_by(!!! stat_groups ) %> %
349
389
dplyr :: do(t_single(. $ estimate , . $ std_err , . $ orig , alpha = alpha )) %> %
350
390
dplyr :: ungroup()
351
391
vals
@@ -361,8 +401,11 @@ bca_calc <- function(stats, orig_data, alpha = 0.05, .fn, ...) {
361
401
cli_abort(" All statistics have missing values." )
362
402
}
363
403
404
+ stat_groups_chr <- c(" term" , grep(" ^\\ ." , names(stats ), value = TRUE ))
405
+ stat_groups_sym <- rlang :: syms(stat_groups_chr )
406
+
364
407
# ## Estimating Z0 bias-correction
365
- bias_corr_stats <- get_p0(stats , alpha = alpha )
408
+ bias_corr_stats <- get_p0(stats , alpha = alpha , groups = stat_groups_chr )
366
409
367
410
# need the original data frame here
368
411
loo_rs <- loo_cv(orig_data )
@@ -380,16 +423,16 @@ bca_calc <- function(stats, orig_data, alpha = 0.05, .fn, ...) {
380
423
381
424
loo_estimate <-
382
425
loo_res %> %
383
- dplyr :: group_by(term ) %> %
426
+ dplyr :: group_by(!!! stat_groups_sym ) %> %
384
427
dplyr :: summarize(loo = mean(estimate , na.rm = TRUE )) %> %
385
- dplyr :: inner_join(loo_res , by = " term " , multiple = " all" ) %> %
386
- dplyr :: group_by(term ) %> %
428
+ dplyr :: inner_join(loo_res , by = stat_groups_chr , multiple = " all" ) %> %
429
+ dplyr :: group_by(!!! stat_groups_sym ) %> %
387
430
dplyr :: summarize(
388
431
cubed = sum((loo - estimate )^ 3 ),
389
432
squared = sum((loo - estimate )^ 2 )
390
433
) %> %
391
434
dplyr :: ungroup() %> %
392
- dplyr :: inner_join(bias_corr_stats , by = " term " ) %> %
435
+ dplyr :: inner_join(bias_corr_stats , by = stat_groups_chr ) %> %
393
436
dplyr :: mutate(
394
437
a = cubed / (6 * (squared ^ (3 / 2 ))),
395
438
Zu = (Z0 + Za ) / (1 - a * (Z0 + Za )) + Z0 ,
@@ -400,21 +443,25 @@ bca_calc <- function(stats, orig_data, alpha = 0.05, .fn, ...) {
400
443
401
444
terms <- loo_estimate $ term
402
445
stats <- stats %> % dplyr :: filter(! orig )
403
- for (i in seq_along(terms )) {
404
- tmp <- new_stats(stats $ estimate [stats $ term == terms [i ]],
405
- lo = loo_estimate $ lo [i ],
406
- hi = loo_estimate $ hi [i ]
407
- )
408
- tmp $ term <- terms [i ]
446
+
447
+ keys <- stats %> % dplyr :: distinct(!!! stat_groups_sym )
448
+ for (i in seq_len(nrow(keys ))) {
449
+ tmp_stats <- dplyr :: inner_join(stats , keys [i ,], by = stat_groups_chr )
450
+ tmp_loo <- dplyr :: inner_join(loo_estimate , keys [i ,], by = stat_groups_chr )
451
+
452
+ tmp <- new_stats(tmp_stats $ estimate ,
453
+ lo = tmp_loo $ lo ,
454
+ hi = tmp_loo $ hi )
455
+ tmp <- dplyr :: bind_cols(tmp , keys [i ,])
409
456
if (i == 1 ) {
410
457
ci_bca <- tmp
411
458
} else {
412
- ci_bca <- bind_rows(ci_bca , tmp )
459
+ ci_bca <- dplyr :: bind_rows(ci_bca , tmp )
413
460
}
414
461
}
415
462
ci_bca <-
416
463
ci_bca %> %
417
- dplyr :: select(term , .lower , .estimate , .upper ) %> %
464
+ dplyr :: select(!!! stat_groups_sym , .lower , .estimate , .upper ) %> %
418
465
dplyr :: mutate(
419
466
.alpha = alpha ,
420
467
.method = " BCa"
@@ -441,7 +488,7 @@ int_bca.bootstraps <- function(.data, statistics, alpha = 0.05, .fn, ...) {
441
488
if (length(column_name ) != 1 ) {
442
489
cli_abort(stat_fmt_err )
443
490
}
444
- stats <- .data %> % dplyr :: select(!! column_name , id )
491
+ stats <- .data %> % dplyr :: select(!! column_name , id , dplyr :: starts_with( " . " ) )
445
492
stats <- check_tidy(stats )
446
493
447
494
check_num_resamples(stats , B = 1000 )
0 commit comments