Merge pull request #281 from tidymodels/t-test-levels

ismayc · web-flow · commit 8041866771c3 · 2020-01-31T17:03:10.000-08:00
specifying order in difference-based tests (#275)
diff --git a/R/calculate.R b/R/calculate.R
@@ -100,7 +100,7 @@ calculate <- function(x,
       (attr(x, "theory_type") %in% c("Two sample props z", "Two sample t"))
     )
   ) {
-    check_order(x, explanatory_variable(x), order)
+    order <- check_order(x, explanatory_variable(x), order)
   }
 
   if (!(
diff --git a/R/utils.R b/R/utils.R
@@ -121,19 +121,25 @@ null_transformer <- function(text, envir) {
 }
 
 check_order <- function(x, explanatory_variable, order) {
-  unique_explanatory_variable <- unique(explanatory_variable)
-  if (length(unique_explanatory_variable) != 2) {
+  unique_ex <- sort(unique(explanatory_variable))
+  if (length(unique_ex) != 2) {
     stop_glue(
       "Statistic is based on a difference; the explanatory variable should ",
       "have two levels."
     )
   }
   if (is.null(order)) {
-    stop_glue(
-      "Statistic is based on a difference; specify the `order` in which to ",
-      "subtract the levels of the explanatory variable. ",
-      '`order = c("first", "second")` means `("first" - "second")`. ',
-      "Check `?calculate` for details."
+    # Default to subtracting the first (alphabetically) level from the second,
+    # unless the explanatory variable is a factor (in which case order is 
+    # preserved); raise a warning if this was done implicitly.
+    order <- as.character(unique_ex)
+    warning_glue(
+      "The statistic is based on a difference; by default, the ",
+      "explanatory variable has been subtracted in the order ", 
+      "\"{unique_ex[1]}\" - \"{unique_ex[2]}\". To specify the ",
+      "order yourself, provide `order = c(\"{unique_ex[1]}\", ",
+      "\"{unique_ex[2]}\")` (to subtract in the order ",
+      "\"{unique_ex[1]}\" - \"{unique_ex[2]}\") to the calculate() function."
     )
   } else {
     if (xor(is.na(order[1]), is.na(order[2]))) {
@@ -144,13 +150,15 @@ check_order <- function(x, explanatory_variable, order) {
     if (length(order) > 2) {
       stop_glue("`order` is expecting only two entries.")
     }
-    if (order[1] %in% unique_explanatory_variable == FALSE) {
+    if (order[1] %in% unique_ex == FALSE) {
       stop_glue("{order[1]} is not a level of the explanatory variable.")
     }
-    if (order[2] %in% unique_explanatory_variable == FALSE) {
+    if (order[2] %in% unique_ex == FALSE) {
       stop_glue("{order[2]} is not a level of the explanatory variable.")
     }
   }
+  # return the order as given (unless the argument was invalid or NULL)
+  order
 }
 
 check_args_and_attr <- function(x, explanatory_variable, response_variable,
diff --git a/R/wrappers.R b/R/wrappers.R
@@ -80,7 +80,7 @@ t_test <- function(x, formula,
     #                                                                  order[2]),
     #                                                       ordered = TRUE)
     # }
-    check_order(x, explanatory_variable(x), order)
+    order <- check_order(x, explanatory_variable(x), order)
     prelim <- stats::t.test(formula = as.formula(paste0(attr(x, "response"),
                                                         " ~ ",
                                                         attr(x, "explanatory"))),
@@ -189,7 +189,7 @@ t_stat <- function(x, formula,
     #                                                                  order[2]),
     #                                                       ordered = TRUE)
     # }
-    check_order(x, explanatory_variable(x), order)
+    order <- check_order(x, explanatory_variable(x), order)
     prelim <- stats::t.test(formula = as.formula(paste0(attr(x, "response"),
                                                         " ~ ",
                                                         attr(x, "explanatory"))),
diff --git a/tests/testthat.R b/tests/testthat.R
@@ -2,3 +2,5 @@ library(testthat)
 library(infer)
 
 test_check("infer")
+
+
diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R
@@ -125,7 +125,7 @@ test_that("response variable is a factor (two var problems)", {
   expect_silent(
     calculate(gen_iris4a, stat = "z", order = c("large", "small"))
   )
-  expect_error(calculate(gen_iris4a, stat = "z"))
+  expect_warning(calculate(gen_iris4a, stat = "z"))
 })
 
 gen_iris5 <- iris %>%
@@ -144,11 +144,11 @@ test_that("two sample mean-type problems are working", {
     specify(Sepal.Width ~ Sepal.Length.Group) %>%
     hypothesize(null = "independence") %>%
     generate(reps = 10, type = "permute")
-  expect_error(calculate(gen_iris5a, stat = "diff in means"))
+  expect_warning(calculate(gen_iris5a, stat = "diff in means"))
   expect_silent(
     calculate(gen_iris5a, stat = "diff in means", order = c(">5", "<=5"))
   )
-  expect_error(calculate(gen_iris5a, stat = "t"))
+  expect_warning(calculate(gen_iris5a, stat = "t"))
   expect_silent(calculate(gen_iris5a, stat = "t", order = c(">5", "<=5")))
 })
 
@@ -270,7 +270,8 @@ test_that("`order` is working", {
     calculate(gen_iris11, stat = "diff in means", order = c(">5", "<=4", ">4"))
   )
   # order not given
-  expect_error(calculate(gen_iris11, stat = "diff in means"))
+  expect_warning(calculate(gen_iris11, stat = "diff in means"),
+                 "by default, the explanatory variable has been subtracted")
 })
 
 gen_iris12 <- iris %>%
diff --git a/tests/testthat/test-wrappers.R b/tests/testthat/test-wrappers.R
@@ -11,7 +11,7 @@ iris3 <- iris %>%
 
 test_that("t_test works", {
   # Two Sample
-  expect_error(iris2 %>% t_test(Sepal.Width ~ Species))
+  expect_warning(iris2 %>% t_test(Sepal.Width ~ Species))
 
   expect_error(
     iris2 %>% t_test(response = "Sepal.Width", explanatory = "Species")

Original file line number	Diff line number	Diff line change
`@@ -100,7 +100,7 @@ calculate <- function(x,`
`100`	`100`	`(attr(x, "theory_type") %in% c("Two sample props z", "Two sample t"))`
`101`	`101`	`)`
`102`	`102`	`) {`
`103`		`- check_order(x, explanatory_variable(x), order)`
	`103`	`+ order <- check_order(x, explanatory_variable(x), order)`
`104`	`104`	`}`
`105`	`105`
`106`	`106`	`if (!(`
Original file line number	Diff line number	Diff line change
`@@ -121,19 +121,25 @@ null_transformer <- function(text, envir) {`
`121`	`121`	`}`
`122`	`122`
`123`	`123`	`check_order <- function(x, explanatory_variable, order) {`
`124`		`- unique_explanatory_variable <- unique(explanatory_variable)`
`125`		`- if (length(unique_explanatory_variable) != 2) {`
	`124`	`+ unique_ex <- sort(unique(explanatory_variable))`
	`125`	`+ if (length(unique_ex) != 2) {`
`126`	`126`	`stop_glue(`
`127`	`127`	`"Statistic is based on a difference; the explanatory variable should ",`
`128`	`128`	`"have two levels."`
`129`	`129`	`)`
`130`	`130`	`}`
`131`	`131`	`if (is.null(order)) {`
`132`		`- stop_glue(`
`133`		- "Statistic is based on a difference; specify the `order` in which to ",
`134`		`- "subtract the levels of the explanatory variable. ",`
`135`		- '`order = c("first", "second")` means `("first" - "second")`. ',
`136`		- "Check `?calculate` for details."
	`132`	`+ # Default to subtracting the first (alphabetically) level from the second,`
	`133`	`+ # unless the explanatory variable is a factor (in which case order is`
	`134`	`+ # preserved); raise a warning if this was done implicitly.`
	`135`	`+ order <- as.character(unique_ex)`
	`136`	`+ warning_glue(`
	`137`	`+ "The statistic is based on a difference; by default, the ",`
	`138`	`+ "explanatory variable has been subtracted in the order ",`
	`139`	`+ "\"{unique_ex[1]}\" - \"{unique_ex[2]}\". To specify the ",`
	`140`	+ "order yourself, provide `order = c(\"{unique_ex[1]}\", ",
	`141`	+ "\"{unique_ex[2]}\")` (to subtract in the order ",
	`142`	`+ "\"{unique_ex[1]}\" - \"{unique_ex[2]}\") to the calculate() function."`
`137`	`143`	`)`
`138`	`144`	`} else {`
`139`	`145`	`if (xor(is.na(order[1]), is.na(order[2]))) {`
`@@ -144,13 +150,15 @@ check_order <- function(x, explanatory_variable, order) {`
`144`	`150`	`if (length(order) > 2) {`
`145`	`151`	stop_glue("`order` is expecting only two entries.")
`146`	`152`	`}`
`147`		`- if (order[1] %in% unique_explanatory_variable == FALSE) {`
	`153`	`+ if (order[1] %in% unique_ex == FALSE) {`
`148`	`154`	`stop_glue("{order[1]} is not a level of the explanatory variable.")`
`149`	`155`	`}`
`150`		`- if (order[2] %in% unique_explanatory_variable == FALSE) {`
	`156`	`+ if (order[2] %in% unique_ex == FALSE) {`
`151`	`157`	`stop_glue("{order[2]} is not a level of the explanatory variable.")`
`152`	`158`	`}`
`153`	`159`	`}`
	`160`	`+ # return the order as given (unless the argument was invalid or NULL)`
	`161`	`+ order`
`154`	`162`	`}`
`155`	`163`
`156`	`164`	`check_args_and_attr <- function(x, explanatory_variable, response_variable,`