Skip to content

Commit

Permalink
Add option to substitute values below tolerance with tolerance threshold
Browse files Browse the repository at this point in the history
  • Loading branch information
mlojek committed Feb 25, 2025
1 parent 37a98f7 commit d7a6856
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 12 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Learn how to use optilab by using our demo notebook. See `demo/tutorial.ipynb`.
## CLI tool
Optilab comes with a powerful CLI tool to easily summarize your experiments. It allows for plotting the results and performing statistical testing to check for statistical significance in optimization results.
```
usage: Optilab CLI utility. [-h] [--hide_plots] [--test_y] [--test_evals] [--entries ENTRIES [ENTRIES ...]] pickle_path
usage: Optilab CLI utility. [-h] [--hide_plots] [--test_y] [--test_evals] [--entries ENTRIES [ENTRIES ...]] [--raw_values] pickle_path
positional arguments:
pickle_path Path to pickle file or directory with optimization runs.
Expand All @@ -40,6 +40,7 @@ options:
--test_evals Perform Mann-Whitney U test on eval values.
--entries ENTRIES [ENTRIES ...]
Space separated list of indexes of entries to include in analysis.
--raw_values If specified, y values below tolerance are not substituted by tolerance value.
```

## Docker
Expand Down
13 changes: 11 additions & 2 deletions src/optilab/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ def mann_whitney_u_test_grid(data_lists: List[List[float]]) -> str:
type=int,
help="Space separated list of indexes of entries to include in analysis.",
)
parser.add_argument(
"--raw_values",
action="store_true",
help="If specified, y values below tolerance are not substituted by tolerance value.",
)
args = parser.parse_args()

file_path_list = []
Expand Down Expand Up @@ -110,14 +115,18 @@ def mann_whitney_u_test_grid(data_lists: List[List[float]]) -> str:
)

plot_box_plot(
data={run.model_metadata.name: run.bests_y() for run in data},
data={
run.model_metadata.name: run.bests_y(args.raw_values) for run in data
},
savepath=f"{filename_stem}.box_plot.png",
show=not args.hide_plots,
function_name=data[0].function_metadata.name,
)

# stats
stats = pd.concat([run.stats() for run in data], ignore_index=True)
stats = pd.concat(
[run.stats(args.raw_values) for run in data], ignore_index=True
)
stats_evals = stats.filter(like="evals_", axis=1)
stats_y = stats.filter(like="y_", axis=1)
stats_df = stats.drop(columns=stats_evals.columns.union(stats_y.columns))
Expand Down
27 changes: 18 additions & 9 deletions src/optilab/data_classes/optimization_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,19 @@ class OptimizationRun:
logs: List[PointList]
"Logs of points from the optimization runs."

def bests_y(self) -> List[float]:
def bests_y(self, raw_values: bool = False) -> List[float]:
"""
Get a list of best y values from each log.
Args:
raw_values (bool): If false, values below tolerance values are set to tolerance, else
return real y values. Default is false.
Returns:
List[float]: List of the best values from each log.
"""
return [log.best_y() for log in self.logs]
tolerance = -np.inf if raw_values else self.tolerance
return [max(log.best_y(), tolerance) for log in self.logs]

def log_lengths(self) -> List[float]:
"""
Expand All @@ -54,10 +59,14 @@ def log_lengths(self) -> List[float]:
"""
return [len(log) for log in self.logs]

def stats(self) -> pd.DataFrame:
def stats(self, raw_values: bool = False) -> pd.DataFrame:
"""
Make a summary of the run.
Args:
raw_values (bool): If false, values below tolerance values are set to tolerance, else
return real y values. Default is false.
:Returns:
pd.DataFrame: Dataframe containing stats and summary of the run.
"""
Expand All @@ -74,11 +83,11 @@ def stats(self) -> pd.DataFrame:
"evals_max": [max(self.log_lengths())],
"evals_mean": [np.mean(self.log_lengths())],
"evals_std": [np.std(self.log_lengths())],
"y_min": [min(self.bests_y())],
"y_max": [max(self.bests_y())],
"y_mean": [np.mean(self.bests_y())],
"y_std": [np.std(self.bests_y())],
"y_median": [np.median(self.bests_y())],
"y_iqr": [scipy.stats.iqr(self.bests_y())],
"y_min": [min(self.bests_y(raw_values))],
"y_max": [max(self.bests_y(raw_values))],
"y_mean": [np.mean(self.bests_y(raw_values))],
"y_std": [np.std(self.bests_y(raw_values))],
"y_median": [np.median(self.bests_y(raw_values))],
"y_iqr": [scipy.stats.iqr(self.bests_y(raw_values))],
}
)

0 comments on commit d7a6856

Please sign in to comment.