Skip to content

Commit 166a169

Browse files
feat(learner): Priority Lasso (#266)
* priority lasso regr, classif and surv learners added * fix: add catboost to DESCRIPTION Because of a bug in pak, the package installation in the CI failed when catboost was in the DESCRIPTION, because of a bug in pak. With pak 4.0.0 that should be resolved. r-lib/pak#385 * chore(tests): remove unneeded setup and teardown files Initially the idea was to enable additional check for partial argument matching. But this throws too many warnings as the dependencies violate this. * build(remotes): Update remotes for aorsf and catboost * Fix catboost remote to latest release * Use the CRAN version of aorsf again * ci(cron): Run rcmdcheck every week This is useful because it prepares the cache. * fix(vignette): fix error * fix(priority_lasso): fix learner bugs and improves tests The previous tests from the author were to weak and did not catch the errors in the implementations. With a workaround to provide task-dependent parameters (quite hacky), we could still run the autotests and correct the errors in the implementation. * style(prioritlasso): address linting issues * fix(syntax): solve merge conflict * tests(priority_lasso): add missing parameter * feat(priority_lasso): add prob response type to classif * chore(tests): correct comments in parameter tests * fix(tests): minor correction * fix(priority_lasso): fix test Co-authored-by: HarutyunyanLiana <lh.lianaharutyunyan@gmail.com>
1 parent a0910f8 commit 166a169

15 files changed

+1124
-5
lines changed

NAMESPACE

+3
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ export(LearnerClassifLightGBM)
2727
export(LearnerClassifMob)
2828
export(LearnerClassifOneR)
2929
export(LearnerClassifPART)
30+
export(LearnerClassifPriorityLasso)
3031
export(LearnerClassifRandomForest)
3132
export(LearnerClassifRandomForestSRC)
3233
export(LearnerDensKDEks)
@@ -59,6 +60,7 @@ export(LearnerRegrLmer)
5960
export(LearnerRegrM5Rules)
6061
export(LearnerRegrMars)
6162
export(LearnerRegrMob)
63+
export(LearnerRegrPriorityLasso)
6264
export(LearnerRegrRSM)
6365
export(LearnerRegrRVM)
6466
export(LearnerRegrRandomForest)
@@ -87,6 +89,7 @@ export(LearnerSurvObliqueRSF)
8789
export(LearnerSurvPCHazard)
8890
export(LearnerSurvParametric)
8991
export(LearnerSurvPenalized)
92+
export(LearnerSurvPriorityLasso)
9093
export(LearnerSurvRandomForestSRC)
9194
export(LearnerSurvRanger)
9295
export(LearnerSurvSVM)

NEWS.md

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
1-
# mlr3extralearners 0.6.1
2-
3-
41
# mlr3extralearners 0.6.0-9000
52

63
* BREAKING CHANGE: lightgbm's early stopping mechanism now uses the task's test set.
74
* feat: Add two new learners `regr.abess` and `classif.abess` (thanks to @bbayukari)
85
* feat: Added learner `LearnerClassifImbalancedRandomForestSRC` (thanks to
96
@HarutyunyanLiana)
10-
7+
* Feat: Added learners `LearnerClassifPriorityLasso`, `LearnerRegrPriorityLasso`, `LearnerSurvPriorityLasso` (thanks to
8+
@HarutyunyanLiana)
119

1210
# mlr3extralearners 0.6.0
1311

R/bibentries.R

+9-1
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,14 @@ bibentries = c( # nolint start
550550
pages = "1--17",
551551
year = "2010"
552552
),
553+
klau2018priolasso = bibentry("article",
554+
title = "Priority-Lasso: a simple hierarchical approach to the prediction of clinical outcome using multi-omics data",
555+
author = "Klau Simon, Jurinovic Vindi, Hornung Roman, Herold Tobias, Boulesteix Anne-Laure",
556+
journal = "BMC Bioinformatics",
557+
volume = "19",
558+
year = "2018",
559+
doi = "10.1186/s12859-018-2344-6"
560+
),
553561
obrien2019imbrfsrc = bibentry("article",
554562
title = "A random forests quantile classifier for class imbalanced data",
555563
author = "Robert O\xe2\x80\x99Brien and Hemant Ishwaran",
@@ -566,4 +574,4 @@ bibentries = c( # nolint start
566574
month = "01",
567575
journal = "University of California, Berkeley"
568576
)
569-
) # nolint end
577+
) # nolint end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
#' @title Classification Priority Lasso Learner
2+
#' @author HarutyunyanLiana
3+
#' @name mlr_learners_classif.priority_lasso
4+
#'
5+
#' @description
6+
#' Patient outcome prediction based on multi-omics data taking practitioners’ preferences into account.
7+
#' Calls [prioritylasso::prioritylasso()] from \CRANpkg{prioritylasso}.
8+
#'
9+
#' @templateVar id classif.priority_lasso
10+
#' @template learner
11+
#'
12+
#' @references
13+
#' `r format_bib("klau2018priolasso")`
14+
#'
15+
#' @template seealso_learner
16+
#' @template example
17+
#' @export
18+
LearnerClassifPriorityLasso = R6Class("LearnerClassifPriorityLasso",
19+
inherit = LearnerClassif,
20+
public = list(
21+
#' @description
22+
#' Creates a new instance of this [R6][R6::R6Class] class.
23+
initialize = function() {
24+
param_set = ps(
25+
blocks = p_uty(default = NULL, tags = c("train", "required")),
26+
type.measure = p_fct(default = "class", levels = c("class", "auc"), tags = c("train", "required")),
27+
max.coef = p_uty(default = NULL, tags = "train"),
28+
block1.penalization = p_lgl(default = TRUE, tags = "train"),
29+
lambda.type = p_fct(default = "lambda.min", levels = c("lambda.min", "lambda.1se"), tags = c("train", "predict")), #nolint
30+
standardize = p_lgl(default = TRUE, tags = "train"),
31+
nfolds = p_int(default = 5L, lower = 1L, tags = "train"),
32+
foldid = p_uty(default = NULL, tags = "train"),
33+
cvoffset = p_lgl(default = FALSE, tags = "train"),
34+
cvoffsetnfolds = p_int(default = 10, lower = 1L, tags = "train"),
35+
36+
# params from cv.glmnet
37+
alignment = p_fct(c("lambda", "fraction"), default = "lambda", tags = "train"),
38+
alpha = p_dbl(0, 1, default = 1, tags = "train"),
39+
big = p_dbl(default = 9.9e35, tags = "train"),
40+
devmax = p_dbl(0, 1, default = 0.999, tags = "train"),
41+
dfmax = p_int(0L, tags = "train"),
42+
eps = p_dbl(0, 1, default = 1.0e-6, tags = "train"),
43+
epsnr = p_dbl(0, 1, default = 1.0e-8, tags = "train"),
44+
exclude = p_uty(tags = "train"),
45+
exmx = p_dbl(default = 250.0, tags = "train"),
46+
fdev = p_dbl(0, 1, default = 1.0e-5, tags = "train"),
47+
gamma = p_uty(tags = "train"),
48+
grouped = p_lgl(default = TRUE, tags = "train"),
49+
intercept = p_lgl(default = TRUE, tags = "train"),
50+
keep = p_lgl(default = FALSE, tags = "train"),
51+
lambda = p_uty(tags = "train"),
52+
lambda.min.ratio = p_dbl(0, 1, tags = "train"),
53+
lower.limits = p_uty(default = -Inf, tags = "train"),
54+
maxit = p_int(1L, default = 100000L, tags = "train"),
55+
mnlam = p_int(1L, default = 5L, tags = "train"),
56+
mxit = p_int(1L, default = 100L, tags = "train"),
57+
mxitnr = p_int(1L, default = 25L, tags = "train"),
58+
nlambda = p_int(1L, default = 100L, tags = "train"),
59+
offset = p_uty(default = NULL, tags = "train"),
60+
parallel = p_lgl(default = FALSE, tags = "train"),
61+
penalty.factor = p_uty(tags = "train"),
62+
pmax = p_int(0L, tags = "train"),
63+
pmin = p_dbl(0, 1, default = 1.0e-9, tags = "train"),
64+
prec = p_dbl(default = 1e-10, tags = "train"),
65+
predict.gamma = p_dbl(default = "gamma.1se", special_vals = list("gamma.1se", "gamma.min"), tags = "predict"), #nolint
66+
relax = p_lgl(default = FALSE, tags = "train"),
67+
s = p_dbl(0, 1, special_vals = list("lambda.1se", "lambda.min"), default = "lambda.1se", tags = "predict"), #nolint
68+
standardize.response = p_lgl(default = FALSE, tags = "train"),
69+
thresh = p_dbl(0, default = 1e-07, tags = "train"),
70+
trace.it = p_int(0, 1, default = 0, tags = "train"),
71+
type.gaussian = p_fct(c("covariance", "naive"), tags = "train"),
72+
type.logistic = p_fct(c("Newton", "modified.Newton"), default = "Newton", tags = "train"),
73+
type.multinomial = p_fct(c("ungrouped", "grouped"), default = "ungrouped", tags = "train"),
74+
upper.limits = p_uty(default = Inf, tags = "train")
75+
)
76+
77+
super$initialize(
78+
id = "classif.priority_lasso",
79+
packages = "prioritylasso",
80+
feature_types = c("logical", "integer", "numeric"),
81+
predict_types = c("response", "prob"),
82+
param_set = param_set,
83+
properties = c("weights", "selected_features", "twoclass"),
84+
man = "mlr3extralearners::mlr_learners_classif.priority_lasso",
85+
label = "Priority Lasso"
86+
)
87+
},
88+
89+
#' @description
90+
#' Selected features, i.e. those where the coefficient is positive.
91+
#' @return `character()`.
92+
selected_features = function() {
93+
if (is.null(self$model)) {
94+
stopf("No model stored")
95+
}
96+
coefs = self$model$coefficients
97+
coefs = coefs[coefs != 0]
98+
names(coefs)
99+
}
100+
),
101+
private = list(
102+
.train = function(task) {
103+
# get parameters for training
104+
pars = self$param_set$get_values(tags = "train")
105+
pars$family = "binomial"
106+
107+
if ("weights" %in% task$properties) {
108+
pars$weights = task$weights$weight
109+
}
110+
data = as_numeric_matrix(task$data(cols = task$feature_names))
111+
target = task$truth()
112+
invoke(prioritylasso::prioritylasso,
113+
X = data, Y = target,
114+
.args = pars)
115+
},
116+
.predict = function(task) {
117+
newdata = as_numeric_matrix(ordered_features(task, self))
118+
pv = self$param_set$get_values(tags = "predict")
119+
pv = rename(pv, "predict.gamma", "gamma")
120+
121+
p = invoke(predict, self$model,
122+
newdata = newdata, type = "response",
123+
.args = pv)
124+
p = drop(p)
125+
classnames = self$model$glmnet.fit[[1L]]$classnames
126+
if (self$predict_type == "response") {
127+
response = ifelse(p <= 0.5, classnames[1L], classnames[2L])
128+
list(response = drop(response))
129+
} else {
130+
prob = cbind(1 - p, p)
131+
colnames(prob) = classnames
132+
list(prob = prob)
133+
}
134+
}
135+
)
136+
)
137+
138+
.extralrns_dict$add("classif.priority_lasso", LearnerClassifPriorityLasso)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#' @title Regression Priority Lasso Learner
2+
#' @author HarutyunyanLiana
3+
#' @name mlr_learners_regr.priority_lasso
4+
#'
5+
#' @description
6+
#' Patient outcome prediction based on multi-omics data taking practitioners’ preferences into account.
7+
#' Calls [prioritylasso::prioritylasso()] from \CRANpkg{prioritylasso}.
8+
#'
9+
#'
10+
#' @templateVar id regr.priority_lasso
11+
#' @template learner
12+
#'
13+
#' @references
14+
#' `r format_bib("klau2018priolasso")`
15+
#'
16+
#' @template seealso_learner
17+
#' @template example
18+
#' @export
19+
LearnerRegrPriorityLasso = R6Class("LearnerRegrPriorityLasso",
20+
inherit = LearnerRegr,
21+
public = list(
22+
#' @description
23+
#' Creates a new instance of this [R6][R6::R6Class] class.
24+
initialize = function() {
25+
param_set = ps(
26+
blocks = p_uty(default = NULL, tags = c("train", "required")),
27+
max.coef = p_uty(default = NULL, tags = "train"),
28+
block1.penalization = p_lgl(default = TRUE, tags = "train"),
29+
lambda.type = p_fct(default = "lambda.min", levels = c("lambda.min", "lambda.1se"), tags = c("train", "predict")),
30+
standardize = p_lgl(default = TRUE, tags = "train"),
31+
nfolds = p_int(default = 5L, lower = 1L, tags = "train"),
32+
foldid = p_uty(default = NULL, tags = "train"),
33+
cvoffset = p_lgl(default = FALSE, tags = "train"),
34+
cvoffsetnfolds = p_int(default = 10, lower = 1L, tags = "train"),
35+
36+
# params from cv.glmnet
37+
alignment = p_fct(c("lambda", "fraction"), default = "lambda", tags = "train"),
38+
alpha = p_dbl(0, 1, default = 1, tags = "train"),
39+
big = p_dbl(default = 9.9e35, tags = "train"),
40+
devmax = p_dbl(0, 1, default = 0.999, tags = "train"),
41+
dfmax = p_int(0L, tags = "train"),
42+
eps = p_dbl(0, 1, default = 1.0e-6, tags = "train"),
43+
epsnr = p_dbl(0, 1, default = 1.0e-8, tags = "train"),
44+
exclude = p_uty(tags = "train"),
45+
exmx = p_dbl(default = 250.0, tags = "train"),
46+
fdev = p_dbl(0, 1, default = 1.0e-5, tags = "train"),
47+
gamma = p_uty(tags = "train"),
48+
grouped = p_lgl(default = TRUE, tags = "train"),
49+
intercept = p_lgl(default = TRUE, tags = "train"),
50+
keep = p_lgl(default = FALSE, tags = "train"),
51+
lambda = p_uty(tags = "train"),
52+
lambda.min.ratio = p_dbl(0, 1, tags = "train"),
53+
lower.limits = p_uty(default = -Inf, tags = "train"),
54+
maxit = p_int(1L, default = 100000L, tags = "train"),
55+
mnlam = p_int(1L, default = 5L, tags = "train"),
56+
mxit = p_int(1L, default = 100L, tags = "train"),
57+
mxitnr = p_int(1L, default = 25L, tags = "train"),
58+
nlambda = p_int(1L, default = 100L, tags = "train"),
59+
offset = p_uty(default = NULL, tags = "train"),
60+
parallel = p_lgl(default = FALSE, tags = "train"),
61+
penalty.factor = p_uty(tags = "train"),
62+
pmax = p_int(0L, tags = "train"),
63+
pmin = p_dbl(0, 1, default = 1.0e-9, tags = "train"),
64+
prec = p_dbl(default = 1e-10, tags = "train"),
65+
predict.gamma = p_dbl(default = "gamma.1se", special_vals = list("gamma.1se", "gamma.min"), tags = "predict"),
66+
relax = p_lgl(default = FALSE, tags = "train"),
67+
s = p_dbl(0, 1, special_vals = list("lambda.1se", "lambda.min"), default = "lambda.1se", tags = "predict"),
68+
standardize.response = p_lgl(default = FALSE, tags = "train"),
69+
thresh = p_dbl(0, default = 1e-07, tags = "train"),
70+
trace.it = p_int(0, 1, default = 0, tags = "train"),
71+
type.gaussian = p_fct(c("covariance", "naive"), tags = "train"),
72+
type.logistic = p_fct(c("Newton", "modified.Newton"), default = "Newton", tags = "train"),
73+
type.multinomial = p_fct(c("ungrouped", "grouped"), default = "ungrouped", tags = "train"),
74+
upper.limits = p_uty(default = Inf, tags = "train")
75+
)
76+
77+
super$initialize(
78+
id = "regr.priority_lasso",
79+
packages = "prioritylasso",
80+
feature_types = c("logical", "integer", "numeric"),
81+
predict_types = "response",
82+
param_set = param_set,
83+
properties = c("weights", "selected_features"),
84+
man = "mlr3extralearners::mlr_learners_regr.priority_lasso",
85+
label = "Priority Lasso"
86+
)
87+
},
88+
89+
#' @description
90+
#' Selected features when coef is positive
91+
#' @return `character()`.
92+
selected_features = function() {
93+
if (is.null(self$model)) {
94+
stopf("No model stored")
95+
}
96+
coefs = self$model$coefficients
97+
coefs = coefs[coefs != 0]
98+
names(coefs)
99+
}
100+
),
101+
private = list(
102+
.train = function(task) {
103+
# get parameters for training
104+
pars = self$param_set$get_values(tags = "train")
105+
pars$family = "gaussian"
106+
pars$type.measure = "mse"
107+
108+
if ("weights" %in% task$properties) {
109+
pars$weights = as.numeric(task$weights$weight)
110+
}
111+
data = as.matrix(task$data(cols = task$feature_names))
112+
target = task$truth()
113+
invoke(prioritylasso::prioritylasso,
114+
X = data, Y = target,
115+
.args = pars)
116+
},
117+
.predict = function(task) {
118+
# get parameters with tag "predict"
119+
pars = self$param_set$get_values(tags = "predict")
120+
pars = rename(pars, "predict.gamma", "gamma")
121+
122+
# get newdata and ensure same ordering in train and predict
123+
newdata = as.matrix(ordered_features(task, self))
124+
125+
pred = invoke(predict, self$model, newdata = newdata, type = "response", .args = pars)
126+
127+
list(response = pred)
128+
}
129+
)
130+
)
131+
132+
.extralrns_dict$add("regr.priority_lasso", LearnerRegrPriorityLasso)

0 commit comments

Comments
 (0)