mlr-org
diff --git a/‎.ignore
+5 b/‎.ignore
+5
diff --git a/‎DESCRIPTION
+3-3 b/‎DESCRIPTION
+3-3
diff --git a/‎NEWS.md
+7 b/‎NEWS.md
+7
diff --git a/‎R/bibentries.R
+63 b/‎R/bibentries.R
+63
diff --git a/‎R/helpers.R
+37 b/‎R/helpers.R
+37
diff --git a/‎R/learner_obliqueRSF_surv_obliqueRSF.R
+8-5 b/‎R/learner_obliqueRSF_surv_obliqueRSF.R
+8-5
diff --git a/‎R/learner_partykit_classif_cforest.R
+9-9 b/‎R/learner_partykit_classif_cforest.R
+9-9
diff --git a/‎R/learner_partykit_regr_cforest.R
+5-9 b/‎R/learner_partykit_regr_cforest.R
+5-9
diff --git a/‎R/learner_partykit_surv_cforest.R
+5-9 b/‎R/learner_partykit_surv_cforest.R
+5-9
diff --git a/‎R/learner_randomForestSRC_classif_rfsrc.R
+13-2 b/‎R/learner_randomForestSRC_classif_rfsrc.R
+13-2
diff --git a/‎R/learner_randomForestSRC_regr_rfsrc.R
+6-7 b/‎R/learner_randomForestSRC_regr_rfsrc.R
+6-7
@@ -0,0 +1,5 @@
+man/
+attic/
+pkgdown/
+revdep/
+docs/
@@ -1,6 +1,6 @@
 Package: mlr3extralearners
 Title: Extra Learners For mlr3
-Version: 0.5.6
+Version: 0.5.7
 Authors@R:
     c(person(given = "Raphael",
              family = "Sonabend",
@@ -44,7 +44,7 @@ Imports:
     data.table,
     methods,
     mlr3 (>= 0.6.0),
-    mlr3misc,
+    mlr3misc (>= 0.9.4),
     paradox,
     R6
 Suggests:
@@ -108,4 +108,4 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 NeedsCompilation: no
 Roxygen: list(markdown = TRUE, r6 = TRUE)
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.2
@@ -1,7 +1,14 @@
+# mlr3extralearners 0.5.7
+
+* Introduced new custom hyperparameters for `randomForestSRC::rfsrc()`,
+  `partykit::cforest()` and `obliqueRSF::ORSF()` to conveniently tune
+  hyperparameters whose upper limit depends on data dimensions.
+  
 # mlr3extralearners 0.5.6
 
 * Fix learners requiring distr6. distr6 1.6.0 now forced and param6 added to suggests
 
+
 # mlr3extralearners 0.5.5
 
 * Bugfix `regr.gausspr`
 
@@ -0,0 +1,63 @@
+bibentries = c( # nolint start
+  breiman_2001  = bibentry("article",
+    title       = "Random Forests",
+    author      = "Breiman, Leo",
+    year        = "2001",
+    journal     = "Machine Learning",
+    volume      = "45",
+    number      = "1",
+    pages       = "5--32",
+    doi         = "10.1023/A:1010933404324",
+    issn        = "1573-0565"
+  ),
+
+  ishwaran_2008 = bibentry("article",
+    doi         = "10.1214/08-aoas169",
+    url         = "https://doi.org/10.1214/08-aoas169",
+    year        = "2008",
+    month       = "9",
+    publisher   = "Institute of Mathematical Statistics",
+    volume      = "2",
+    number      = "3",
+    author      = "Hemant Ishwaran and Udaya B. Kogalur and Eugene H. Blackstone and Michael S. Lauer",
+    title       = "Random survival forests",
+    journal     = "The Annals of Applied Statistics"
+    ),
+
+  hothorn_2015  = bibentry("article",
+    author      = "Torsten Hothorn and Achim Zeileis",
+    title       = "partykit: A Modular Toolkit for Recursive Partytioning in R",
+    journal     = "Journal of Machine Learning Research",
+    year        = "2015",
+    volume      = "16",
+    number      = "118",
+    pages       = "3905-3909",
+    url         = "http://jmlr.org/papers/v16/hothorn15a.html"
+    ),
+
+  hothorn_2006  = bibentry("article",
+    doi         = "10.1198/106186006x133933",
+    url         = "https://doi.org/10.1198/106186006x133933",
+    year        = "2006",
+    month       = "9",
+    publisher   = "Informa {UK} Limited",
+    volume      = "15",
+    number      = "3",
+    pages       = "651--674",
+    author      = "Torsten Hothorn and Kurt Hornik and Achim Zeileis",
+    title       = "Unbiased Recursive Partitioning: A Conditional Inference Framework",
+    journal     = "Journal of Computational and Graphical Statistics"
+  ),
+
+  jaeger_2019   = bibentry("article",
+  doi           = "10.1214/19-aoas1261",
+  year          = "2019",
+  month         = "9",
+  publisher     = "Institute of Mathematical Statistics",
+  volume        = "13",
+  number        = "3",
+  author        = "Byron C. Jaeger and D. Leann Long and Dustin M. Long and Mario Sims and Jeff M. Szychowski and Yuan-I Min and Leslie A. Mcclure and George Howard and Noah Simon",
+  title         = "Oblique random survival forests",
+  journal       = "The Annals of Applied Statistics"
+  )
+) # nolint end
@@ -57,3 +57,40 @@ pprob_to_matrix <- function(pp, task) {
   colnames(y) <- task$class_names
   y
 }
+
+#' @title Convert a Ratio Hyperparameter
+#'
+#' @description
+#' Given the named list `pv` (values of a [ParamSet]), converts a possibly provided hyperparameter
+#' called `ratio` to an integer hyperparameter `target`.
+#' If both are found in `pv`, an exception is thrown.
+#'
+#' @param pv (named `list()`).
+#' @param target (`character(1)`)\cr
+#'   Name of the integer hyperparameter.
+#' @param ratio (`character(1)`)\cr
+#'   Name of the ratio hyperparameter.
+#' @param n (`integer(1)`)\cr
+#'   Ratio of what?
+#'
+#' @return (named `list()`) with new hyperparameter settings.
+#' @noRd
+convert_ratio = function(pv, target, ratio, n) {
+  switch(to_decimal(c(target, ratio) %in% names(pv)) + 1L,
+    # !mtry && !mtry.ratio
+    pv,
+
+    # !mtry && mtry.ratio
+    {
+      pv[[target]] = max(ceiling(pv[[ratio]] * n), 1)
+      remove_named(pv, ratio)
+    },
+
+
+    # mtry && !mtry.ratio
+    pv,
+
+    # mtry && mtry.ratio
+    stopf("Hyperparameters '%s' and '%s' are mutually exclusive", target, ratio)
+  )
+}
@@ -12,12 +12,13 @@
 #'   - Actual default: `TRUE`
 #'   - Adjusted default: `FALSE`
 #'   - Reason for change: mlr3 already has it's own verbose set to `TRUE` by default
+#' - `mtry`:
+#'   - This hyperparameter can alternatively be set via the added hyperparameter `mtry_ratio`
+#'     as `mtry = max(ceiling(mtry_ratio * n_features), 1)`.
+#'     Note that `mtry` and `mtry_ratio` are mutually exclusive.
 #'
 #' @references
-#' Jaeger BC, Long DL, Long DM, Sims M, Szychowski JM, Min Y, Mcclure LA, Howard G, Simon N (2019).
-#' “Oblique random survival forests.” The Annals of Applied Statistics, 13(3), 1847–1883.
-#' ISSN 1932-6157, 1941-7330, doi: 10.1214/19-AOAS1261,
-#' https://projecteuclid.org/euclid.aoas/1571277776.
+#' `r format_bib("jaeger_2019")`
 #'
 #' @template seealso_learner
 #' @template example
@@ -42,6 +43,7 @@ LearnerSurvObliqueRSF = R6Class("LearnerSurvObliqueRSF",
           max_pval_to_split_node = p_dbl(lower = 0, upper = 1, default = 0.5,
             tags = "train"),
           mtry = p_int(lower = 1, tags = "train"),
+          mtry_ratio = p_dbl(0, 1, tags = "train"),
           dfmax = p_int(lower = 1, tags = "train"),
           use.cv = p_lgl(default = FALSE, tags = "train"),
           verbose = p_lgl(default = TRUE, tags = "train"),
@@ -76,11 +78,12 @@ LearnerSurvObliqueRSF = R6Class("LearnerSurvObliqueRSF",
   private = list(
     .train = function(task) {
       pv = self$param_set$get_values(tags = "train")
+      pv = convert_ratio(pv, "mtry", "mtry_ratio", length(task$feature_names))
       targets = task$target_names
 
       mlr3misc::invoke(
         obliqueRSF::ORSF,
-        data     = as.data.frame(task$data()),
+        data     = data.table::setDF(task$data()),
         time     = targets[1L],
         status   = targets[2L],
         .args    = pv
 
@@ -6,16 +6,14 @@
 #' @templateVar id classif.cforest
 #' @templateVar caller cforest
 #'
-#' @references
-#' Hothorn T, Zeileis A (2015).
-#' “partykit: A Modular Toolkit for Recursive Partytioning in R.”
-#' Journal of Machine Learning Research, 16(118), 3905-3909.
-#' \url{http://jmlr.org/papers/v16/hothorn15a.html}
+#' @section Custom mlr3 defaults:
+#' - `mtry`:
+#'   - This hyperparameter can alternatively be set via the added hyperparameter `mtryratio`
+#'     as `mtry = max(ceiling(mtryratio * n_features), 1)`.
+#'     Note that `mtry` and `mtryratio` are mutually exclusive.
 #'
-#' Hothorn T, Hornik K, Zeileis A (2006).
-#' “Unbiased Recursive Partitioning: A Conditional Inference Framework.”
-#' Journal of Computational and Graphical Statistics, 15(3), 651–674.
-#' \doi{10.1198/106186006x133933}
+#' @references
+#' `r format_bib(c("hothorn_2015", "hothorn_2006"))
 #'
 #' @export
 #' @template seealso_learner
@@ -37,6 +35,7 @@ LearnerClassifCForest = R6Class("LearnerClassifCForest",
           tags = "train"),
         mtry = p_int(lower = 0L, special_vals = list(Inf),
           tags = "train"), # default actually "ceiling(sqrt(nvar))"
+        mtryratio = p_dbl(lower = 0, upper = 1, tags = "train"),
         applyfun = p_uty(tags = c("train", "importance")),
         cores = p_int(default = NULL, special_vals = list(NULL),
           tags = c("train", "importance")),
@@ -167,6 +166,7 @@ LearnerClassifCForest = R6Class("LearnerClassifCForest",
     .train = function(task) {
 
       pars = self$param_set$get_values(tags = "train")
+      pars = convert_ratio(pars, "mtry", "mtryratio", length(task$feature_names))
       pars_control = pars[which(names(pars) %in%
         setdiff(methods::formalArgs(partykit::ctree_control),
           c("mtry", "applyfun", "cores")
 
@@ -6,16 +6,10 @@
 #' @templateVar id regr.cforest
 #' @templateVar caller cforest
 #'
-#' @references
-#' Hothorn T, Zeileis A (2015).
-#' “partykit: A Modular Toolkit for Recursive Partytioning in R.”
-#' Journal of Machine Learning Research, 16(118), 3905-3909.
-#' \url{http://jmlr.org/papers/v16/hothorn15a.html}
+#' @inheritSection mlr_learners_classif.cforest Custom mlr3 defaults
 #'
-#' Hothorn T, Hornik K, Zeileis A (2006).
-#' “Unbiased Recursive Partitioning: A Conditional Inference Framework.”
-#' Journal of Computational and Graphical Statistics, 15(3), 651–674.
-#' \doi{10.1198/106186006x133933}
+#' @references
+#' `r format_bib(c("hothorn_2015", "hothorn_2006"))
 #'
 #' @export
 #' @template seealso_learner
@@ -37,6 +31,7 @@ LearnerRegrCForest = R6Class("LearnerRegrCForest",
           tags = "train"),
         mtry = p_int(lower = 0L, special_vals = list(Inf),
           tags = "train"), # default actually "ceiling(sqrt(nvar))"
+        mtryratio = p_dbl(lower = 0, upper = 1, tags = "train"),
         applyfun = p_uty(tags = c("train", "importance")),
         cores = p_int(default = NULL, special_vals = list(NULL),
           tags = c("train", "importance")),
@@ -163,6 +158,7 @@ LearnerRegrCForest = R6Class("LearnerRegrCForest",
     .train = function(task) {
 
       pars = self$param_set$get_values(tags = "train")
+      pars = convert_ratio(pars, "mtry", "mtryratio", length(task$feature_names))
       pars_control = pars[which(names(pars) %in%
         setdiff(methods::formalArgs(partykit::ctree_control),
           c("mtry", "applyfun", "cores")
 
@@ -6,16 +6,10 @@
 #' @templateVar id surv.cforest
 #' @templateVar caller cforest
 #'
-#' @references
-#' Hothorn T, Zeileis A (2015).
-#' “partykit: A Modular Toolkit for Recursive Partytioning in R.”
-#' Journal of Machine Learning Research, 16(118), 3905-3909.
-#' \url{http://jmlr.org/papers/v16/hothorn15a.html}
+#' @inheritSection mlr_learners_classif.cforest Custom mlr3 defaults
 #'
-#' Hothorn T, Hornik K, Zeileis A (2006).
-#' “Unbiased Recursive Partitioning: A Conditional Inference Framework.”
-#' Journal of Computational and Graphical Statistics, 15(3), 651–674.
-#' \doi{10.1198/106186006x133933}
+#' @references
+#' `r format_bib(c("hothorn_2015", "hothorn_2006"))
 #'
 #' @export
 #' @template seealso_learner
@@ -34,6 +28,7 @@ LearnerSurvCForest = R6Class("LearnerSurvCForest",
           tags = c("train", "perturb")),
         mtry = p_int(lower = 0L, special_vals = list(Inf),
           tags = "train"), # default actually "ceiling(sqrt(nvar))"
+        mtryratio = p_dbl(lower = 0, upper = 1, tags = "train"),
         applyfun = p_uty(tags = c("train", "importance")),
         cores = p_int(default = NULL, special_vals = list(NULL),
           tags = c("train", "importance")),
@@ -127,6 +122,7 @@ LearnerSurvCForest = R6Class("LearnerSurvCForest",
     .train = function(task) {
 
       pars = self$param_set$get_values(tags = "train")
+      pars = convert_ratio(pars, "mtry", "mtryratio", length(task$feature_names))
 
       if ("weights" %in% task$properties) {
         pars$weights = task$weights$weight
 
@@ -11,10 +11,17 @@
 #'   - Actual default: Auto-detecting the number of cores
 #'   - Adjusted default: 1
 #'   - Reason for change: Threading conflicts with explicit parallelization via \CRANpkg{future}.
+#' - `mtry`:
+#'   - This hyperparameter can alternatively be set via the added hyperparameter `mtry.ratio`
+#'     as `mtry = max(ceiling(mtry.ratio * n_features), 1)`.
+#'     Note that `mtry` and `mtry.ratio` are mutually exclusive.
+#' - `sampsize`:
+#'   - This hyperparameter can alternatively be set via the added hyperparameter `sampsize.ratio`
+#'     as `sampsize = max(ceiling(sampsize.ratio * n_obs), 1)`.
+#'     Note that `sampsize` and `sampsize.ratio` are mutually exclusive.
 #'
 #' @references
-#' Breiman L (2001). “Random Forests.”
-#' Machine Learning, 45(1), 5–32. ISSN 1573-0565, doi: 10.1023/A:1010933404324.
+#' `r format_bib("breiman_2001")`
 #'
 #' @template seealso_learner
 #' @template example
@@ -29,6 +36,7 @@ LearnerClassifRandomForestSRC = R6Class("LearnerClassifRandomForestSRC",
       ps = ps(
           ntree = p_int(default = 1000, lower = 1L, tags = c("train", "predict")),
           mtry = p_int(lower = 1L, tags = "train"),
+          mtry.ratio = p_dbl(lower = 0, upper = 1, tags = "train"),
           nodesize = p_int(default = 15L, lower = 1L, tags = "train"),
           nodedepth = p_int(lower = 1L, tags = "train"),
           splitrule = p_fct(
@@ -52,6 +60,7 @@ LearnerClassifRandomForestSRC = R6Class("LearnerClassifRandomForestSRC",
           samp = p_uty(tags = "train"),
           membership = p_lgl(default = FALSE, tags = c("train", "predict")),
           sampsize = p_uty(tags = "train"),
+          sampsize.ratio = p_dbl(0, 1, tags = "train"),
           na.action = p_fct(
             default = "na.omit", levels = c("na.omit", "na.impute"),
             tags = c("train", "predict")),
@@ -140,6 +149,8 @@ LearnerClassifRandomForestSRC = R6Class("LearnerClassifRandomForestSRC",
   private = list(
     .train = function(task) {
       pv = self$param_set$get_values(tags = "train")
+      pv = convert_ratio(pv, "mtry", "mtry.ratio", length(task$feature_names))
+      pv = convert_ratio(pv, "sampsize", "sampsize.ratio", task$nrow)
       cores = pv$cores %??% 1L
 
       if ("weights" %in% task$properties) {
 
@@ -6,15 +6,10 @@
 #' @templateVar id regr.rfsrc
 #' @templateVar caller rfsrc
 #'
-#' @section Custom mlr3 defaults:
-#' - `cores`:
-#'   - Actual default: Auto-detecting the number of cores
-#'   - Adjusted default: 1
-#'   - Reason for change: Threading conflicts with explicit parallelization via \CRANpkg{future}.
+#' @inheritSection mlr_learners_classif.rfsrc Custom mlr3 defaults
 #'
 #' @references
-#' Breiman L (2001). “Random Forests.”
-#' Machine Learning, 45(1), 5–32. ISSN 1573-0565, \doi{10.1023/A:1010933404324}
+#' `r format_bib("breiman_2001")`
 #'
 #' @template seealso_learner
 #' @template example
@@ -29,6 +24,7 @@ LearnerRegrRandomForestSRC = R6Class("LearnerRegrRandomForestSRC",
       ps = ps(
           ntree = p_int(default = 1000, lower = 1L, tags = c("train", "predict")),
           mtry = p_int(lower = 1L, tags = "train"),
+          mtry.ratio = p_dbl(lower = 0, upper = 1, tags = "train"),
           nodesize = p_int(default = 15L, lower = 1L, tags = "train"),
           nodedepth = p_int(lower = 1L, tags = "train"),
           splitrule = p_fct(
@@ -52,6 +48,7 @@ LearnerRegrRandomForestSRC = R6Class("LearnerRegrRandomForestSRC",
           samp = p_uty(tags = "train"),
           membership = p_lgl(default = FALSE, tags = c("train", "predict")),
           sampsize = p_uty(tags = "train"),
+          sampsize.ratio = p_dbl(0, 1, tags = "train"),
           na.action = p_fct(
             default = "na.omit", levels = c("na.omit", "na.impute"),
             tags = c("train", "predict")),
@@ -137,6 +134,8 @@ LearnerRegrRandomForestSRC = R6Class("LearnerRegrRandomForestSRC",
   private = list(
     .train = function(task) {
       pv = self$param_set$get_values(tags = "train")
+      pv = convert_ratio(pv, "mtry", "mtry.ratio", length(task$feature_names))
+      pv = convert_ratio(pv, "sampsize", "sampsize.ratio", task$nrow)
       cores = pv$cores %??% 1L
 
       if ("weights" %in% task$properties) {
-Original file line number
+Diff line change
 +man/
 +attic/
 +pkgdown/
 +revdep/
 +docs/