diff --git a/experiments/001_reproduce_lmm_cma_es/README.md b/experiments/001_reproduce_lmm_cma_es/README.md new file mode 100644 index 0000000..fb0d754 --- /dev/null +++ b/experiments/001_reproduce_lmm_cma_es/README.md @@ -0,0 +1,50 @@ +# Name of the experiment + +## 1. Introduction + +- brief introduction +- motivation - why this experiment is done +- what it contributes to the project +- what will be done +- what will be produced as a result of this experiment +- what would be considered a success + +## 2. Problem Definition and Algorithm + +### 2.1 Task Definition + +Precisely define the problem you are addressing (i.e. formally specify the inputs and outputs). Elaborate on why this is an interesting and important problem. + +### 2.2 Algorithm Definition + +Describe in reasonable detail the algorithm you are using to address this problem. A psuedocode description of the algorithm you are using is frequently useful. Trace through a concrete example, showing how your algorithm processes this example. The example should be complex enough to illustrate all of the important aspects of the problem but simple enough to be easily understood. If possible, an intuitively meaningful example is better than one with meaningless symbols. + +## 3. Experimental Evaluation + +### 3.1 Methodology + +What are criteria you are using to evaluate your method? What specific hypotheses does your experiment test? Describe the experimental methodology that you used. What are the dependent and independent variables? What is the training/test data that was used, and why is it realistic or interesting? Exactly what performance data did you collect and how are you presenting and analyzing it? Comparisons to competing methods that address the same problem are particularly useful. + +### 3.2 Results + +Present the quantitative results of your experiments. Graphical data presentation such as graphs and histograms are frequently better than tables. What are the basic differences revealed in the data. Are they statistically significant? + +### 3.3 Discussion + +Is your hypothesis supported? What conclusions do the results support about the strengths and weaknesses of your method compared to other methods? How can the results be explained in terms of the underlying properties of the algorithm and/or the data. + +## 4. Related Work + +Answer the following questions for each piece of related work that addresses the same or a similar problem. What is their problem and method? How is your problem and method different? Why is your problem and method better? + +## 5. Future Work + +What are the major shortcomings of your current method? For each shortcoming, propose additions or enhancements that would help overcome it. + +## 6. Conclusion + +Briefly summarize the important results and conclusions presented in the paper. What are the most important points illustrated by your work? How will your results improve future research and applications in the area? + +## Bilbiography + +Be sure to include a standard, well-formated, comprehensive bibliography with citations from the text referring to previously published papers in the scientific literature that you utilized or are related to your work. \ No newline at end of file diff --git a/experiments/001_reproduce_lmm_cma_es/cmaes_variations.py b/experiments/001_reproduce_lmm_cma_es/cmaes_variations.py new file mode 100644 index 0000000..d9cb2f5 --- /dev/null +++ b/experiments/001_reproduce_lmm_cma_es/cmaes_variations.py @@ -0,0 +1,108 @@ +""" +Variations of CMA-ES encapsulated in easy to call functions. +""" + +# pylint: disable=too-many-arguments, too-many-locals +import cma + +from optilab.data_classes import Bounds, PointList +from optilab.functions import ObjectiveFunction +from optilab.functions.surrogate import ( + LocallyWeightedPolynomialRegression, + SurrogateObjectiveFunction, +) +from optilab.metamodels import ApproximateRankingMetamodel + + +def cma_es( + function: ObjectiveFunction, + population_size: int, + call_budget: int, + bounds: Bounds, + *, + sigma0: float = 1, + target: float = 0.0, + tolerance: float = 1e-8, +) -> PointList: + """ + Run optimization with regular CMA-ES. + """ + + x0 = bounds.random_point(function.dim).x + + res_log = PointList(points=[]) + + es = cma.CMAEvolutionStrategy( + x0, + sigma0, + { + "popsize": population_size, + "bounds": bounds.to_list(), + "maxfevals": call_budget, + "ftarget": target, + "verbose": -9, + "tolfun": tolerance, + }, + ) + + while not es.stop(): + solutions = PointList.from_list(es.ask()) + results = PointList(points=[function(x) for x in solutions.points]) + res_log.extend(results) + x, y = results.pairs() + es.tell(x, y) + + return res_log + + +def lmm_cma_es( + function: ObjectiveFunction, + population_size: int, + call_budget: int, + bounds: Bounds, + *, + sigma0: float = 1, + target: float = 0.0, + tolerance: float = 1e-8, + polynomial_dim: int = 2, +) -> PointList: + """ + Run optimization with LMM-CMA-ES + """ + + n = function.dim + num_neighbors = n * (n+3) + 2 + + metamodel = ApproximateRankingMetamodel( + population_size, + population_size // 2, + function, + LocallyWeightedPolynomialRegression(polynomial_dim, num_neighbors), + ) + + x0 = bounds.random_point(function.dim).x + + es = cma.CMAEvolutionStrategy( + x0, + sigma0, + { + "popsize": population_size, + "bounds": bounds.to_list(), + "maxfevals": call_budget, + "ftarget": target, + "verbose": -9, + "tolfun": tolerance, + }, + ) + + while ( + metamodel.get_log().best_y() > 1e-8 and len(metamodel.get_log()) <= call_budget + ): + solutions = PointList.from_list(es.ask()) + metamodel.surrogate_function.set_covariance_matrix(es.C) + metamodel.adapt(solutions) + xy_pairs = metamodel(solutions) + x, y = xy_pairs.pairs() + es.tell(x, y) + + return metamodel.get_log() diff --git a/experiments/001_reproduce_lmm_cma_es/main.py b/experiments/001_reproduce_lmm_cma_es/main.py new file mode 100644 index 0000000..8a2de31 --- /dev/null +++ b/experiments/001_reproduce_lmm_cma_es/main.py @@ -0,0 +1,95 @@ +""" +Benchmarking CMA-ES algorithm on CEC 2017 +""" + +# pylint: disable=import-error + +from cmaes_variations import cma_es, lmm_cma_es +from tqdm import tqdm +import numpy as np + +from optilab.data_classes import Bounds +from optilab.functions.surrogate import ( + KNNSurrogateObjectiveFunction, + PolynomialRegression, +) +from optilab.functions.unimodal import CumulativeSquaredSums, SphereFunction +from optilab.functions.multimodal import RosenbrockFunction +from optilab.functions import NoisyFunction +from optilab.metamodels import IEPolationSurrogate +from optilab.plotting import plot_ecdf_curves +from optilab.data_classes import OptimizationRun, OptimizerMetadata +from optilab.utils import dump_to_pickle + +if __name__ == "__main__": + # hyperparams: + DIM = 2 + POPSIZE = 6 + NUM_RUNS = 20 + CALL_BUDGET = 1e4 * DIM + TOL = 1e-10 + + # optimized problem + BOUNDS = Bounds(-10, 10) + FUNC = CumulativeSquaredSums(DIM) + + # perform optimization with vanilla cmaes + cmaes_logs = [ + cma_es(FUNC, POPSIZE, CALL_BUDGET, BOUNDS, tolerance=TOL) + for _ in tqdm(range(NUM_RUNS), unit="run") + ] + + cmaes_run = OptimizationRun( + model_metadata=OptimizerMetadata( + name='CMA_ES', + population_size=POPSIZE, + hyperparameters={ + 'sigma0': 1 + } + ), + function_metadata=FUNC.get_metadata(), + bounds=BOUNDS, + tolerance=TOL, + logs=cmaes_logs + ) + + # perform optimization with lmm cmaes + lmm_cmaes_logs = [ + lmm_cma_es(FUNC, POPSIZE, CALL_BUDGET, BOUNDS, tolerance=TOL) + for _ in tqdm(range(NUM_RUNS), unit="run") + ] + + lmm_cmaes_run = OptimizationRun( + model_metadata=OptimizerMetadata( + name='LMM_CMA_ES', + population_size=POPSIZE, + hyperparameters={ + 'sigma0': 1, + 'degree': 2, + 'num_neighbor': DIM * (DIM + 3) + 2 + } + ), + function_metadata=FUNC.get_metadata(), + bounds=BOUNDS, + tolerance=TOL, + logs=lmm_cmaes_logs + ) + + # print stats + vanilla_times = [len(log.slice_to_best()) for log in cmaes_logs] + lmm_times = [len(log.slice_to_best()) for log in lmm_cmaes_logs] + + print(f'vanilla {np.average(vanilla_times)} {np.std(vanilla_times)}') + print(f'lmm {np.average(lmm_times)} {np.std(lmm_times)}') + + # plot results + plot_ecdf_curves( + { + "cma-es": cmaes_logs, + "lmm-cma-es": lmm_cmaes_logs, + }, + n_dimensions=DIM, + savepath="ecdf.png", + ) + + dump_to_pickle([cmaes_run, lmm_cmaes_run], f'lmm_reproduction_{FUNC.name}_{DIM}.pkl')