-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbayesopt_parego.R
186 lines (174 loc) · 7.19 KB
/
bayesopt_parego.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#' @title Multi-Objective Bayesian Optimization via ParEGO
#'
#' @include mlr_loop_functions.R
#' @name mlr_loop_functions_parego
#'
#' @description
#' Loop function for multi-objective Bayesian Optimization via ParEGO.
#' Normally used inside an [OptimizerMbo].
#'
#' In each iteration after the initial design, the observed objective function values are normalized and `q` candidates are
#' obtained by scalarizing these values via the augmented Tchebycheff function, updating the surrogate with respect to
#' these scalarized values and optimizing the acquisition function.
#'
#' @param instance ([bbotk::OptimInstanceBatchMultiCrit])\cr
#' The [bbotk::OptimInstanceBatchMultiCrit] to be optimized.
#' @param init_design_size (`NULL` | `integer(1)`)\cr
#' Size of the initial design.
#' If `NULL` and the [bbotk::ArchiveBatch] contains no evaluations, \code{4 * d} is used with \code{d} being the
#' dimensionality of the search space.
#' Points are generated via a Sobol sequence.
#' @param surrogate ([SurrogateLearner])\cr
#' [SurrogateLearner] to be used as a surrogate.
#' @param acq_function ([AcqFunction])\cr
#' [AcqFunction] to be used as acquisition function.
#' @param acq_optimizer ([AcqOptimizer])\cr
#' [AcqOptimizer] to be used as acquisition function optimizer.
#' @param q (`integer(1)`)\cr
#' Batch size, i.e., the number of candidates to be obtained for a single batch.
#' Default is `1`.
#' @param s (`integer(1)`)\cr
#' \eqn{s} in Equation 1 in Knowles (2006).
#' Determines the total number of possible random weight vectors.
#' Default is `100`.
#' @param rho (`numeric(1)`)\cr
#' \eqn{\rho} in Equation 2 in Knowles (2006) scaling the linear part of the augmented Tchebycheff function.
#' Default is `0.05`
#' @param random_interleave_iter (`integer(1)`)\cr
#' Every `random_interleave_iter` iteration (starting after the initial design), a point is
#' sampled uniformly at random and evaluated (instead of a model based proposal).
#' For example, if `random_interleave_iter = 2`, random interleaving is performed in the second,
#' fourth, sixth, ... iteration.
#' Default is `0`, i.e., no random interleaving is performed at all.
#'
#' @note
#' * The `acq_function$surrogate`, even if already populated, will always be overwritten by the `surrogate`.
#' * The `acq_optimizer$acq_function`, even if already populated, will always be overwritten by `acq_function`.
#' * The `surrogate$archive`, even if already populated, will always be overwritten by the [bbotk::ArchiveBatch] of the [bbotk::OptimInstanceBatchMultiCrit].
#' * The scalarizations of the objective function values are stored as the `y_scal` column in the
#' [bbotk::ArchiveBatch] of the [bbotk::OptimInstanceBatchMultiCrit].
#' * To make use of parallel evaluations in the case of `q > 1, the objective
#' function of the [bbotk::OptimInstanceBatchMultiCrit] must be implemented accordingly.
#'
#' @return invisible(instance)\cr
#' The original instance is modified in-place and returned invisible.
#'
#' @references
#' * `r format_bib("knowles_2006")`
#'
#' @family Loop Function
#' @export
#' @examples
#' \donttest{
#' if (requireNamespace("mlr3learners") &
#' requireNamespace("DiceKriging") &
#' requireNamespace("rgenoud")) {
#'
#' library(bbotk)
#' library(paradox)
#' library(mlr3learners)
#'
#' fun = function(xs) {
#' list(y1 = xs$x^2, y2 = (xs$x - 2) ^ 2)
#' }
#' domain = ps(x = p_dbl(lower = -10, upper = 10))
#' codomain = ps(y1 = p_dbl(tags = "minimize"), y2 = p_dbl(tags = "minimize"))
#' objective = ObjectiveRFun$new(fun = fun, domain = domain, codomain = codomain)
#'
#' instance = OptimInstanceBatchMultiCrit$new(
#' objective = objective,
#' terminator = trm("evals", n_evals = 5))
#'
#' surrogate = default_surrogate(instance, n_learner = 1)
#'
#' acq_function = acqf("ei")
#'
#' acq_optimizer = acqo(
#' optimizer = opt("random_search", batch_size = 100),
#' terminator = trm("evals", n_evals = 100))
#'
#' optimizer = opt("mbo",
#' loop_function = bayesopt_parego,
#' surrogate = surrogate,
#' acq_function = acq_function,
#' acq_optimizer = acq_optimizer)
#'
#' optimizer$optimize(instance)
#' }
#' }
bayesopt_parego = function(
instance,
surrogate,
acq_function,
acq_optimizer,
init_design_size = NULL,
q = 1L,
s = 100L,
rho = 0.05,
random_interleave_iter = 0L
) {
# assertions
assert_r6(instance, "OptimInstanceBatchMultiCrit")
assert_r6(surrogate, classes = "SurrogateLearner")
assert_r6(acq_function, classes = "AcqFunction")
assert_r6(acq_optimizer, classes = "AcqOptimizer")
assert_int(init_design_size, lower = 1L, null.ok = TRUE)
assert_int(q, lower = 1L)
assert_int(s, lower = 1L)
assert_number(rho, lower = 0, upper = 1)
assert_int(random_interleave_iter, lower = 0L)
# initial design
search_space = instance$search_space
if (is.null(init_design_size) && instance$archive$n_evals == 0L) {
init_design_size = 4L * search_space$length
}
if (!is.null(init_design_size) && instance$archive$n_evals == 0L) {
design = generate_design_sobol(search_space, n = init_design_size)$data
instance$eval_batch(design)
}
# completing initialization
surrogate$archive = instance$archive
surrogate$cols_y = "y_scal"
acq_function$surrogate = surrogate
acq_optimizer$acq_function = acq_function
k = length(instance$archive$cols_y) # codomain can hold non targets since #08116aa02204980f87c8c08841176ae8f664980a
lambdas = calculate_parego_weights(s, k = k)
qs = seq_len(q)
# actual loop
repeat {
data = instance$archive$data
ydt = data[, instance$archive$cols_y, with = FALSE]
ydt = Map("*", ydt, mult_max_to_min(instance$archive$codomain)) # we always assume minimization
ydt = Map(function(y) (y - min(y, na.rm = TRUE)) / diff(range(y, na.rm = TRUE)), ydt) # scale y to [0, 1]
xdt = map_dtr(qs, function(q) {
# scalarize y
lambda = lambdas[sample.int(nrow(lambdas), 1L), , drop = TRUE]
mult = Map("*", ydt, lambda)
y_scal = Reduce("+", mult)
y_scal = do.call(pmax, mult) + rho * y_scal # augmented Tchebycheff function
set(data, j = "y_scal", value = y_scal)
tryCatch({
# random interleaving is handled here
if (isTRUE((instance$archive$n_evals - init_design_size + 1L) %% random_interleave_iter == 0)) {
stop(set_class(list(message = "Random interleaving", call = NULL), classes = c("random_interleave", "mbo_error", "error", "condition")))
}
acq_function$surrogate$update()
acq_function$update()
acq_optimizer$optimize()
}, mbo_error = function(mbo_error_condition) {
lg$info(paste0(class(mbo_error_condition), collapse = " / "))
lg$info("Proposing a randomly sampled point")
generate_design_random(search_space, n = 1L)$data
})
}, .fill = TRUE)
instance$eval_batch(xdt)
if (instance$is_terminated) break
}
return(invisible(instance))
}
class(bayesopt_parego) = "loop_function"
attr(bayesopt_parego, "id") = "bayesopt_parego"
attr(bayesopt_parego, "label") = "ParEGO"
attr(bayesopt_parego, "instance") = "multi-crit"
attr(bayesopt_parego, "man") = "mlr3mbo::mlr_loop_functions_parego"
mlr_loop_functions$add("bayesopt_parego", bayesopt_parego)