From 83d50bfed8ce7b6709632b9148b31a70715a3321 Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Wed, 15 May 2024 15:16:02 +0200 Subject: [PATCH 001/190] Add changelog (#28) Add changelog of develops so far including those since last tagged commit (`v0.1.0`) --- CHANGELOG.md | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..19ecdd41 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,51 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + + +## [unreleased](https://github.com/joeloskarsson/neural-lam/compare/v0.1.0...HEAD) + +### Added + +- new metrics (`nll` and `crps_gauss`) and `metrics` submodule, stddiv output option + [c14b6b4](https://github.com/joeloskarsson/neural-lam/commit/c14b6b4323e6b56f1f18632b6ca8b0d65c3ce36a) + @joeloskarsson + +- ability to "watch" metrics and log + [c14b6b4](https://github.com/joeloskarsson/neural-lam/commit/c14b6b4323e6b56f1f18632b6ca8b0d65c3ce36a) + @joeloskarsson + +- pre-commit setup for linting and formatting + [\#6](https://github.com/joeloskarsson/neural-lam/pull/6), [\#8](https://github.com/joeloskarsson/neural-lam/pull/8) + @sadamov, @joeloskarsson + +### Changed + +- moved batch-static features ("water cover") into forcing component return by `WeatherDataset` + [\#13](https://github.com/joeloskarsson/neural-lam/pull/13) + @joeloskarsson + +- change validation metric from `mae` to `rmse` + [c14b6b4](https://github.com/joeloskarsson/neural-lam/commit/c14b6b4323e6b56f1f18632b6ca8b0d65c3ce36a) + @joeloskarsson + +- change RMSE definition to compute sqrt after all averaging + [\#10](https://github.com/joeloskarsson/neural-lam/pull/10) + @joeloskarsson + +### Removed + +- `WeatherDataset(torch.Dataset)` no longer returns "batch-static" component of + training item (only `prev_state`, `target_state` and `forcing`), the batch static features are + instead included in forcing + [\#13](https://github.com/joeloskarsson/neural-lam/pull/13) + @joeloskarsson + + +## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0) + +First tagged release of `neural-lam`, matching Oskarsson et al 2023 publication +(https://arxiv.org/abs/2309.17370) From 4a97a1209e8fceadef8162f13db9e4805681d22e Mon Sep 17 00:00:00 2001 From: sadamov <45732287+sadamov@users.noreply.github.com> Date: Wed, 22 May 2024 10:22:16 +0200 Subject: [PATCH 002/190] Replace constants.py with data_config.yaml (#31) **Summary** This PR replaces the `constants.py` file with a `data_config.yaml` file. Dataset related settings can be defined by the user in the new yaml file. Training specific settings were added as additional flags to the `train_model.py` routine. All respective calls to the old files were replaced. **Rationale** - Using a Yaml file for data config gives much more flexibility for various datasets used in the community. It also facilitates the future use of forcing and boundary datasets. In a follow-up PR the dataset paths will be defined in the yaml file, removing the dependency on a pre-structured `/data` folder. - It is best practice to define user input in a yaml file, the usage of python scripts for that purpose is not common. - The old `constants.py` actually combined both constants and variables, many "constants" should rather be flags to `train_models.py` - The introduction of a new ConfigClass in `utils.py` allows for very specific queries of the yaml and calculations based thereon. This branch shows future possibilities of such a class https://github.com/joeloskarsson/neural-lam/tree/feature_dataset_yaml **Testing** Both training and evaluation of the model were succesfully tested with the `meps_example` dataset. **Note** @leifdenby Could you invite Thomas R. to this repo, in case he wanted to give his input on the yaml file? This PR should mostly serve as a basis for discussion. Maybe we should add more information to the yaml file as you outline in https://github.com/mllam/mllam-data-prep. I think we should always keep in mind how the repository will look like with realistic boundary conditions and zarr-archives as data-input. This PR solves parts of https://github.com/joeloskarsson/neural-lam/issues/23 --------- Co-authored-by: Simon Adamov --- .gitignore | 1 + CHANGELOG.md | 16 ++++- README.md | 5 +- create_grid_features.py | 12 ++-- create_mesh.py | 13 ++-- create_parameter_weights.py | 20 +++--- neural_lam/config.py | 62 ++++++++++++++++++ neural_lam/constants.py | 120 ---------------------------------- neural_lam/data_config.yaml | 64 ++++++++++++++++++ neural_lam/models/ar_model.py | 68 ++++++++++--------- neural_lam/utils.py | 7 +- neural_lam/vis.py | 30 +++++---- neural_lam/weather_dataset.py | 6 +- plot_graph.py | 11 ++-- train_model.py | 51 +++++++++++---- 15 files changed, 274 insertions(+), 212 deletions(-) create mode 100644 neural_lam/config.py delete mode 100644 neural_lam/constants.py create mode 100644 neural_lam/data_config.yaml diff --git a/.gitignore b/.gitignore index 7bb826a2..c9d914c2 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ graphs *.sif sweeps test_*.sh +.vscode ### Python ### # Byte-compiled / optimized / DLL files diff --git a/CHANGELOG.md b/CHANGELOG.md index 19ecdd41..823ac8b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,11 +5,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - ## [unreleased](https://github.com/joeloskarsson/neural-lam/compare/v0.1.0...HEAD) ### Added +- Replaced `constants.py` with `data_config.yaml` for data configuration management + [\#31](https://github.com/joeloskarsson/neural-lam/pull/31) + @sadamov + - new metrics (`nll` and `crps_gauss`) and `metrics` submodule, stddiv output option [c14b6b4](https://github.com/joeloskarsson/neural-lam/commit/c14b6b4323e6b56f1f18632b6ca8b0d65c3ce36a) @joeloskarsson @@ -24,6 +27,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- Updated scripts and modules to use `data_config.yaml` instead of `constants.py` + [\#31](https://github.com/joeloskarsson/neural-lam/pull/31) + @sadamov + +- Added new flags in `train_model.py` for configuration previously in `constants.py` + [\#31](https://github.com/joeloskarsson/neural-lam/pull/31) + @sadamov + - moved batch-static features ("water cover") into forcing component return by `WeatherDataset` [\#13](https://github.com/joeloskarsson/neural-lam/pull/13) @joeloskarsson @@ -44,8 +55,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [\#13](https://github.com/joeloskarsson/neural-lam/pull/13) @joeloskarsson - ## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0) First tagged release of `neural-lam`, matching Oskarsson et al 2023 publication -(https://arxiv.org/abs/2309.17370) +() diff --git a/README.md b/README.md index 67d9d9b1..ba0bb3fe 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ Still, some restrictions are inevitable: ## A note on the limited area setting Currently we are using these models on a limited area covering the Nordic region, the so called MEPS area (see [paper](https://arxiv.org/abs/2309.17370)). There are still some parts of the code that is quite specific for the MEPS area use case. -This is in particular true for the mesh graph creation (`create_mesh.py`) and some of the constants used (`neural_lam/constants.py`). +This is in particular true for the mesh graph creation (`create_mesh.py`) and some of the constants set in a `data_config.yaml` file (path specified in `train_model.py --data_config` ). If there is interest to use Neural-LAM for other areas it is not a substantial undertaking to refactor the code to be fully area-agnostic. We would be happy to support such enhancements. See the issues https://github.com/joeloskarsson/neural-lam/issues/2, https://github.com/joeloskarsson/neural-lam/issues/3 and https://github.com/joeloskarsson/neural-lam/issues/4 for some initial ideas on how this could be done. @@ -104,13 +104,12 @@ The graph-related files are stored in a directory called `graphs`. ### Create remaining static features To create the remaining static files run the scripts `create_grid_features.py` and `create_parameter_weights.py`. -The main option to set for these is just which dataset to use. ## Weights & Biases Integration The project is fully integrated with [Weights & Biases](https://www.wandb.ai/) (W&B) for logging and visualization, but can just as easily be used without it. When W&B is used, training configuration, training/test statistics and plots are sent to the W&B servers and made available in an interactive web interface. If W&B is turned off, logging instead saves everything locally to a directory like `wandb/dryrun...`. -The W&B project name is set to `neural-lam`, but this can be changed in `neural_lam/constants.py`. +The W&B project name is set to `neural-lam`, but this can be changed in the flags of `train_model.py` (using argsparse). See the [W&B documentation](https://docs.wandb.ai/) for details. If you would like to login and use W&B, run: diff --git a/create_grid_features.py b/create_grid_features.py index c9038103..c3714368 100644 --- a/create_grid_features.py +++ b/create_grid_features.py @@ -6,6 +6,9 @@ import numpy as np import torch +# First-party +from neural_lam import config + def main(): """ @@ -13,14 +16,15 @@ def main(): """ parser = ArgumentParser(description="Training arguments") parser.add_argument( - "--dataset", + "--data_config", type=str, - default="meps_example", - help="Dataset to compute weights for (default: meps_example)", + default="neural_lam/data_config.yaml", + help="Path to data config file (default: neural_lam/data_config.yaml)", ) args = parser.parse_args() + config_loader = config.Config.from_file(args.data_config) - static_dir_path = os.path.join("data", args.dataset, "static") + static_dir_path = os.path.join("data", config_loader.dataset.name, "static") # -- Static grid node features -- grid_xy = torch.tensor( diff --git a/create_mesh.py b/create_mesh.py index cb524cd6..f04b4d4b 100644 --- a/create_mesh.py +++ b/create_mesh.py @@ -12,6 +12,9 @@ import torch_geometric as pyg from torch_geometric.utils.convert import from_networkx +# First-party +from neural_lam import config + def plot_graph(graph, title=None): fig, axis = plt.subplots(figsize=(8, 8), dpi=200) # W,H @@ -153,11 +156,10 @@ def prepend_node_index(graph, new_index): def main(): parser = ArgumentParser(description="Graph generation arguments") parser.add_argument( - "--dataset", + "--data_config", type=str, - default="meps_example", - help="Dataset to load grid point coordinates from " - "(default: meps_example)", + default="neural_lam/data_config.yaml", + help="Path to data config file (default: neural_lam/data_config.yaml)", ) parser.add_argument( "--graph", @@ -187,7 +189,8 @@ def main(): args = parser.parse_args() # Load grid positions - static_dir_path = os.path.join("data", args.dataset, "static") + config_loader = config.Config.from_file(args.data_config) + static_dir_path = os.path.join("data", config_loader.dataset.name, "static") graph_dir_path = os.path.join("graphs", args.graph) os.makedirs(graph_dir_path, exist_ok=True) diff --git a/create_parameter_weights.py b/create_parameter_weights.py index 494a5e81..cae1ae3e 100644 --- a/create_parameter_weights.py +++ b/create_parameter_weights.py @@ -8,7 +8,7 @@ from tqdm import tqdm # First-party -from neural_lam import constants +from neural_lam import config from neural_lam.weather_dataset import WeatherDataset @@ -18,10 +18,10 @@ def main(): """ parser = ArgumentParser(description="Training arguments") parser.add_argument( - "--dataset", + "--data_config", type=str, - default="meps_example", - help="Dataset to compute weights for (default: meps_example)", + default="neural_lam/data_config.yaml", + help="Path to data config file (default: neural_lam/data_config.yaml)", ) parser.add_argument( "--batch_size", @@ -43,7 +43,8 @@ def main(): ) args = parser.parse_args() - static_dir_path = os.path.join("data", args.dataset, "static") + config_loader = config.Config.from_file(args.data_config) + static_dir_path = os.path.join("data", config_loader.dataset.name, "static") # Create parameter weights based on height # based on fig A.1 in graph cast paper @@ -56,7 +57,10 @@ def main(): "500": 0.03, } w_list = np.array( - [w_dict[par.split("_")[-2]] for par in constants.PARAM_NAMES] + [ + w_dict[par.split("_")[-2]] + for par in config_loader.dataset.var_longnames + ] ) print("Saving parameter weights...") np.save( @@ -66,7 +70,7 @@ def main(): # Load dataset without any subsampling ds = WeatherDataset( - args.dataset, + config_loader.dataset.name, split="train", subsample_step=1, pred_length=63, @@ -113,7 +117,7 @@ def main(): # Compute mean and std.-dev. of one-step differences across the dataset print("Computing mean and std.-dev. for one-step differences...") ds_standard = WeatherDataset( - args.dataset, + config_loader.dataset.name, split="train", subsample_step=1, pred_length=63, diff --git a/neural_lam/config.py b/neural_lam/config.py new file mode 100644 index 00000000..5891ea74 --- /dev/null +++ b/neural_lam/config.py @@ -0,0 +1,62 @@ +# Standard library +import functools +from pathlib import Path + +# Third-party +import cartopy.crs as ccrs +import yaml + + +class Config: + """ + Class for loading configuration files. + + This class loads a configuration file and provides a way to access its + values as attributes. + """ + + def __init__(self, values): + self.values = values + + @classmethod + def from_file(cls, filepath): + """Load a configuration file.""" + if filepath.endswith(".yaml"): + with open(filepath, encoding="utf-8", mode="r") as file: + return cls(values=yaml.safe_load(file)) + else: + raise NotImplementedError(Path(filepath).suffix) + + def __getattr__(self, name): + keys = name.split(".") + value = self.values + for key in keys: + if key in value: + value = value[key] + else: + return None + if isinstance(value, dict): + return Config(values=value) + return value + + def __getitem__(self, key): + value = self.values[key] + if isinstance(value, dict): + return Config(values=value) + return value + + def __contains__(self, key): + return key in self.values + + def num_data_vars(self): + """Return the number of data variables for a given key.""" + return len(self.dataset.var_names) + + @functools.cached_property + def coords_projection(self): + """Return the projection.""" + proj_config = self.values["projection"] + proj_class_name = proj_config["class"] + proj_class = getattr(ccrs, proj_class_name) + proj_params = proj_config.get("kwargs", {}) + return proj_class(**proj_params) diff --git a/neural_lam/constants.py b/neural_lam/constants.py deleted file mode 100644 index 527c31d8..00000000 --- a/neural_lam/constants.py +++ /dev/null @@ -1,120 +0,0 @@ -# Third-party -import cartopy -import numpy as np - -WANDB_PROJECT = "neural-lam" - -SECONDS_IN_YEAR = ( - 365 * 24 * 60 * 60 -) # Assuming no leap years in dataset (2024 is next) - -# Log prediction error for these lead times -VAL_STEP_LOG_ERRORS = np.array([1, 2, 3, 5, 10, 15, 19]) - -# Log these metrics to wandb as scalar values for -# specific variables and lead times -# List of metrics to watch, including any prefix (e.g. val_rmse) -METRICS_WATCH = [] -# Dict with variables and lead times to log watched metrics for -# Format is a dictionary that maps from a variable index to -# a list of lead time steps -VAR_LEADS_METRICS_WATCH = { - 6: [2, 19], # t_2 - 14: [2, 19], # wvint_0 - 15: [2, 19], # z_1000 -} - -# Variable names -PARAM_NAMES = [ - "pres_heightAboveGround_0_instant", - "pres_heightAboveSea_0_instant", - "nlwrs_heightAboveGround_0_accum", - "nswrs_heightAboveGround_0_accum", - "r_heightAboveGround_2_instant", - "r_hybrid_65_instant", - "t_heightAboveGround_2_instant", - "t_hybrid_65_instant", - "t_isobaricInhPa_500_instant", - "t_isobaricInhPa_850_instant", - "u_hybrid_65_instant", - "u_isobaricInhPa_850_instant", - "v_hybrid_65_instant", - "v_isobaricInhPa_850_instant", - "wvint_entireAtmosphere_0_instant", - "z_isobaricInhPa_1000_instant", - "z_isobaricInhPa_500_instant", -] - -PARAM_NAMES_SHORT = [ - "pres_0g", - "pres_0s", - "nlwrs_0", - "nswrs_0", - "r_2", - "r_65", - "t_2", - "t_65", - "t_500", - "t_850", - "u_65", - "u_850", - "v_65", - "v_850", - "wvint_0", - "z_1000", - "z_500", -] -PARAM_UNITS = [ - "Pa", - "Pa", - "W/m\\textsuperscript{2}", - "W/m\\textsuperscript{2}", - "-", # unitless - "-", - "K", - "K", - "K", - "K", - "m/s", - "m/s", - "m/s", - "m/s", - "kg/m\\textsuperscript{2}", - "m\\textsuperscript{2}/s\\textsuperscript{2}", - "m\\textsuperscript{2}/s\\textsuperscript{2}", -] - -# Projection and grid -# Hard coded for now, but should eventually be part of dataset desc. files -GRID_SHAPE = (268, 238) # (y, x) - -LAMBERT_PROJ_PARAMS = { - "a": 6367470, - "b": 6367470, - "lat_0": 63.3, - "lat_1": 63.3, - "lat_2": 63.3, - "lon_0": 15.0, - "proj": "lcc", -} - -GRID_LIMITS = [ # In projection - -1059506.5523409774, # min x - 1310493.4476590226, # max x - -1331732.4471934352, # min y - 1338267.5528065648, # max y -] - -# Create projection -LAMBERT_PROJ = cartopy.crs.LambertConformal( - central_longitude=LAMBERT_PROJ_PARAMS["lon_0"], - central_latitude=LAMBERT_PROJ_PARAMS["lat_0"], - standard_parallels=( - LAMBERT_PROJ_PARAMS["lat_1"], - LAMBERT_PROJ_PARAMS["lat_2"], - ), -) - -# Data dimensions -GRID_FORCING_DIM = 5 * 3 + 1 # 5 feat. for 3 time-step window + 1 batch-static -GRID_STATE_DIM = 17 diff --git a/neural_lam/data_config.yaml b/neural_lam/data_config.yaml new file mode 100644 index 00000000..f16a4a30 --- /dev/null +++ b/neural_lam/data_config.yaml @@ -0,0 +1,64 @@ +dataset: + name: meps_example + var_names: + - pres_0g + - pres_0s + - nlwrs_0 + - nswrs_0 + - r_2 + - r_65 + - t_2 + - t_65 + - t_500 + - t_850 + - u_65 + - u_850 + - v_65 + - v_850 + - wvint_0 + - z_1000 + - z_500 + var_units: + - Pa + - Pa + - r"$\mathrm{W}/\mathrm{m}^2$" + - r"$\mathrm{W}/\mathrm{m}^2$" + - "" + - "" + - K + - K + - K + - K + - m/s + - m/s + - m/s + - m/s + - r"$\mathrm{kg}/\mathrm{m}^2$" + - r"$\mathrm{m}^2/\mathrm{s}^2$" + - r"$\mathrm{m}^2/\mathrm{s}^2$" + var_longnames: + - pres_heightAboveGround_0_instant + - pres_heightAboveSea_0_instant + - nlwrs_heightAboveGround_0_accum + - nswrs_heightAboveGround_0_accum + - r_heightAboveGround_2_instant + - r_hybrid_65_instant + - t_heightAboveGround_2_instant + - t_hybrid_65_instant + - t_isobaricInhPa_500_instant + - t_isobaricInhPa_850_instant + - u_hybrid_65_instant + - u_isobaricInhPa_850_instant + - v_hybrid_65_instant + - v_isobaricInhPa_850_instant + - wvint_entireAtmosphere_0_instant + - z_isobaricInhPa_1000_instant + - z_isobaricInhPa_500_instant + num_forcing_features: 16 +grid_shape_state: [268, 238] +projection: + class: LambertConformal + kwargs: + central_longitude: 15.0 + central_latitude: 63.3 + standard_parallels: [63.3, 63.3] diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 7d0a8320..9cda9fc2 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -9,7 +9,7 @@ import wandb # First-party -from neural_lam import constants, metrics, utils, vis +from neural_lam import config, metrics, utils, vis class ARModel(pl.LightningModule): @@ -24,10 +24,13 @@ class ARModel(pl.LightningModule): def __init__(self, args): super().__init__() self.save_hyperparameters() - self.lr = args.lr + self.args = args + self.config_loader = config.Config.from_file(args.data_config) # Load static features for grid/data - static_data_dict = utils.load_static_data(args.dataset) + static_data_dict = utils.load_static_data( + self.config_loader.dataset.name + ) for static_data_name, static_data_tensor in static_data_dict.items(): self.register_buffer( static_data_name, static_data_tensor, persistent=False @@ -36,14 +39,11 @@ def __init__(self, args): # Double grid output dim. to also output std.-dev. self.output_std = bool(args.output_std) if self.output_std: - self.grid_output_dim = ( - 2 * constants.GRID_STATE_DIM - ) # Pred. dim. in grid cell + # Pred. dim. in grid cell + self.grid_output_dim = 2 * self.config_loader.num_data_vars() else: - self.grid_output_dim = ( - constants.GRID_STATE_DIM - ) # Pred. dim. in grid cell - + # Pred. dim. in grid cell + self.grid_output_dim = self.config_loader.num_data_vars() # Store constant per-variable std.-dev. weighting # Note that this is the inverse of the multiplicative weighting # in wMSE/wMAE @@ -57,11 +57,11 @@ def __init__(self, args): ( self.num_grid_nodes, grid_static_dim, - ) = self.grid_static_features.shape # 63784 = 268x238 + ) = self.grid_static_features.shape self.grid_dim = ( - 2 * constants.GRID_STATE_DIM + 2 * self.config_loader.num_data_vars() + grid_static_dim - + constants.GRID_FORCING_DIM + + self.config_loader.dataset.num_forcing_features ) # Instantiate loss function @@ -95,7 +95,7 @@ def __init__(self, args): def configure_optimizers(self): opt = torch.optim.AdamW( - self.parameters(), lr=self.lr, betas=(0.9, 0.95) + self.parameters(), lr=self.args.lr, betas=(0.9, 0.95) ) if self.opt_state: opt.load_state_dict(self.opt_state) @@ -246,7 +246,7 @@ def validation_step(self, batch, batch_idx): # Log loss per time step forward and mean val_log_dict = { f"val_loss_unroll{step}": time_step_loss[step - 1] - for step in constants.VAL_STEP_LOG_ERRORS + for step in self.args.val_steps_to_log } val_log_dict["val_mean_loss"] = mean_loss self.log_dict( @@ -294,7 +294,7 @@ def test_step(self, batch, batch_idx): # Log loss per time step forward and mean test_log_dict = { f"test_loss_unroll{step}": time_step_loss[step - 1] - for step in constants.VAL_STEP_LOG_ERRORS + for step in self.args.val_steps_to_log } test_log_dict["test_mean_loss"] = mean_loss @@ -328,7 +328,9 @@ def test_step(self, batch, batch_idx): spatial_loss = self.loss( prediction, target, pred_std, average_grid=False ) # (B, pred_steps, num_grid_nodes) - log_spatial_losses = spatial_loss[:, constants.VAL_STEP_LOG_ERRORS - 1] + log_spatial_losses = spatial_loss[ + :, [step - 1 for step in self.args.val_steps_to_log] + ] self.spatial_loss_maps.append(log_spatial_losses) # (B, N_log, num_grid_nodes) @@ -399,14 +401,15 @@ def plot_examples(self, batch, n_examples, prediction=None): pred_t[:, var_i], target_t[:, var_i], self.interior_mask[:, 0], + self.config_loader, title=f"{var_name} ({var_unit}), " - f"t={t_i} ({self.step_length*t_i} h)", + f"t={t_i} ({self.step_length * t_i} h)", vrange=var_vrange, ) for var_i, (var_name, var_unit, var_vrange) in enumerate( zip( - constants.PARAM_NAMES_SHORT, - constants.PARAM_UNITS, + self.config_loader.dataset.var_names, + self.config_loader.dataset.var_units, var_vranges, ) ) @@ -417,7 +420,7 @@ def plot_examples(self, batch, n_examples, prediction=None): { f"{var_name}_example_{example_i}": wandb.Image(fig) for var_name, fig in zip( - constants.PARAM_NAMES_SHORT, var_figs + self.config_loader.dataset.var_names, var_figs ) } ) @@ -453,7 +456,7 @@ def create_metric_log_dict(self, metric_tensor, prefix, metric_name): """ log_dict = {} metric_fig = vis.plot_error_map( - metric_tensor, step_length=self.step_length + metric_tensor, self.config_loader, step_length=self.step_length ) full_log_name = f"{prefix}_{metric_name}" log_dict[full_log_name] = wandb.Image(metric_fig) @@ -471,14 +474,14 @@ def create_metric_log_dict(self, metric_tensor, prefix, metric_name): ) # Check if metrics are watched, log exact values for specific vars - if full_log_name in constants.METRICS_WATCH: - for var_i, timesteps in constants.VAR_LEADS_METRICS_WATCH.items(): - var = constants.PARAM_NAMES_SHORT[var_i] + if full_log_name in self.args.metrics_watch: + for var_i, timesteps in self.args.var_leads_metrics_watch.items(): + var = self.config_loader.dataset.var_nums[var_i] log_dict.update( { f"{full_log_name}_{var}_step_{step}": metric_tensor[ step - 1, var_i - ] # 1-indexed in constants + ] # 1-indexed in data_config for step in timesteps } ) @@ -542,10 +545,11 @@ def on_test_epoch_end(self): vis.plot_spatial_error( loss_map, self.interior_mask[:, 0], - title=f"Test loss, t={t_i} ({self.step_length*t_i} h)", + self.config_loader, + title=f"Test loss, t={t_i} ({self.step_length * t_i} h)", ) for t_i, loss_map in zip( - constants.VAL_STEP_LOG_ERRORS, mean_spatial_loss + self.args.val_steps_to_log, mean_spatial_loss ) ] @@ -555,14 +559,14 @@ def on_test_epoch_end(self): # also make without title and save as pdf pdf_loss_map_figs = [ - vis.plot_spatial_error(loss_map, self.interior_mask[:, 0]) + vis.plot_spatial_error( + loss_map, self.interior_mask[:, 0], self.config_loader + ) for loss_map in mean_spatial_loss ] pdf_loss_maps_dir = os.path.join(wandb.run.dir, "spatial_loss_maps") os.makedirs(pdf_loss_maps_dir, exist_ok=True) - for t_i, fig in zip( - constants.VAL_STEP_LOG_ERRORS, pdf_loss_map_figs - ): + for t_i, fig in zip(self.args.val_steps_to_log, pdf_loss_map_figs): fig.savefig(os.path.join(pdf_loss_maps_dir, f"loss_t{t_i}.pdf")) # save mean spatial loss as .pt file also torch.save( diff --git a/neural_lam/utils.py b/neural_lam/utils.py index 31715502..836b04ed 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -7,9 +7,6 @@ from torch import nn from tueplots import bundles, figsizes -# First-party -from neural_lam import constants - def load_dataset_stats(dataset_name, device="cpu"): """ @@ -263,11 +260,11 @@ def fractional_plot_bundle(fraction): return bundle -def init_wandb_metrics(wandb_logger): +def init_wandb_metrics(wandb_logger, val_steps): """ Set up wandb metrics to track """ experiment = wandb_logger.experiment experiment.define_metric("val_mean_loss", summary="min") - for step in constants.VAL_STEP_LOG_ERRORS: + for step in val_steps: experiment.define_metric(f"val_loss_unroll{step}", summary="min") diff --git a/neural_lam/vis.py b/neural_lam/vis.py index cef34a84..2b6abf15 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -4,11 +4,11 @@ import numpy as np # First-party -from neural_lam import constants, utils +from neural_lam import utils @matplotlib.rc_context(utils.fractional_plot_bundle(1)) -def plot_error_map(errors, title=None, step_length=3): +def plot_error_map(errors, data_config, title=None, step_length=3): """ Plot a heatmap of errors of different variables at different predictions horizons @@ -51,7 +51,7 @@ def plot_error_map(errors, title=None, step_length=3): y_ticklabels = [ f"{name} ({unit})" for name, unit in zip( - constants.PARAM_NAMES_SHORT, constants.PARAM_UNITS + data_config.dataset.var_names, data_config.dataset.var_units ) ] ax.set_yticklabels(y_ticklabels, rotation=30, size=label_size) @@ -63,7 +63,9 @@ def plot_error_map(errors, title=None, step_length=3): @matplotlib.rc_context(utils.fractional_plot_bundle(1)) -def plot_prediction(pred, target, obs_mask, title=None, vrange=None): +def plot_prediction( + pred, target, obs_mask, data_config, title=None, vrange=None +): """ Plot example prediction and grond truth. Each has shape (N_grid,) @@ -76,23 +78,25 @@ def plot_prediction(pred, target, obs_mask, title=None, vrange=None): vmin, vmax = vrange # Set up masking of border region - mask_reshaped = obs_mask.reshape(*constants.GRID_SHAPE) + mask_reshaped = obs_mask.reshape(*data_config.grid_shape_state) pixel_alpha = ( mask_reshaped.clamp(0.7, 1).cpu().numpy() ) # Faded border region fig, axes = plt.subplots( - 1, 2, figsize=(13, 7), subplot_kw={"projection": constants.LAMBERT_PROJ} + 1, + 2, + figsize=(13, 7), + subplot_kw={"projection": data_config.coords_projection()}, ) # Plot pred and target for ax, data in zip(axes, (target, pred)): ax.coastlines() # Add coastline outlines - data_grid = data.reshape(*constants.GRID_SHAPE).cpu().numpy() + data_grid = data.reshape(*data_config.grid_shape_state).cpu().numpy() im = ax.imshow( data_grid, origin="lower", - extent=constants.GRID_LIMITS, alpha=pixel_alpha, vmin=vmin, vmax=vmax, @@ -112,7 +116,7 @@ def plot_prediction(pred, target, obs_mask, title=None, vrange=None): @matplotlib.rc_context(utils.fractional_plot_bundle(1)) -def plot_spatial_error(error, obs_mask, title=None, vrange=None): +def plot_spatial_error(error, obs_mask, data_config, title=None, vrange=None): """ Plot errors over spatial map Error and obs_mask has shape (N_grid,) @@ -125,22 +129,22 @@ def plot_spatial_error(error, obs_mask, title=None, vrange=None): vmin, vmax = vrange # Set up masking of border region - mask_reshaped = obs_mask.reshape(*constants.GRID_SHAPE) + mask_reshaped = obs_mask.reshape(*data_config.grid_shape_state) pixel_alpha = ( mask_reshaped.clamp(0.7, 1).cpu().numpy() ) # Faded border region fig, ax = plt.subplots( - figsize=(5, 4.8), subplot_kw={"projection": constants.LAMBERT_PROJ} + figsize=(5, 4.8), + subplot_kw={"projection": data_config.coords_projection()}, ) ax.coastlines() # Add coastline outlines - error_grid = error.reshape(*constants.GRID_SHAPE).cpu().numpy() + error_grid = error.reshape(*data_config.grid_shape_state).cpu().numpy() im = ax.imshow( error_grid, origin="lower", - extent=constants.GRID_LIMITS, alpha=pixel_alpha, vmin=vmin, vmax=vmax, diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index eeefc313..a782806b 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -8,7 +8,7 @@ import torch # First-party -from neural_lam import constants, utils +from neural_lam import utils class WeatherDataset(torch.utils.data.Dataset): @@ -218,9 +218,11 @@ def __getitem__(self, idx): # can roll over to next year, ok because periodicity # Encode as sin/cos + # ! Make this more flexible in a separate create_forcings.py script + seconds_in_year = 365 * 24 * 3600 hour_angle = (hour_of_day / 12) * torch.pi # (sample_len,) year_angle = ( - (second_into_year / constants.SECONDS_IN_YEAR) * 2 * torch.pi + (second_into_year / seconds_in_year) * 2 * torch.pi ) # (sample_len,) datetime_forcing = torch.stack( ( diff --git a/plot_graph.py b/plot_graph.py index 48427d5c..40b2b41d 100644 --- a/plot_graph.py +++ b/plot_graph.py @@ -7,7 +7,7 @@ import torch_geometric as pyg # First-party -from neural_lam import utils +from neural_lam import config, utils MESH_HEIGHT = 0.1 MESH_LEVEL_DIST = 0.2 @@ -20,10 +20,10 @@ def main(): """ parser = ArgumentParser(description="Plot graph") parser.add_argument( - "--dataset", + "--data_config", type=str, - default="meps_example", - help="Datast to load grid coordinates from (default: meps_example)", + default="neural_lam/data_config.yaml", + help="Path to data config file (default: neural_lam/data_config.yaml)", ) parser.add_argument( "--graph", @@ -44,6 +44,7 @@ def main(): ) args = parser.parse_args() + config_loader = config.Config.from_file(args.data_config) # Load graph data hierarchical, graph_ldict = utils.load_graph(args.graph) @@ -62,7 +63,7 @@ def main(): ) mesh_static_features = graph_ldict["mesh_static_features"] - grid_static_features = utils.load_static_data(args.dataset)[ + grid_static_features = utils.load_static_data(config_loader.dataset.name)[ "grid_static_features" ] diff --git a/train_model.py b/train_model.py index 96d21a3f..390da6d4 100644 --- a/train_model.py +++ b/train_model.py @@ -9,7 +9,7 @@ from lightning_fabric.utilities import seed # First-party -from neural_lam import constants, utils +from neural_lam import config, utils from neural_lam.models.graph_lam import GraphLAM from neural_lam.models.hi_lam import HiLAM from neural_lam.models.hi_lam_parallel import HiLAMParallel @@ -29,14 +29,11 @@ def main(): parser = ArgumentParser( description="Train or evaluate NeurWP models for LAM" ) - - # General options parser.add_argument( - "--dataset", + "--data_config", type=str, - default="meps_example", - help="Dataset, corresponding to name in data directory " - "(default: meps_example)", + default="neural_lam/data_config.yaml", + help="Path to data config file (default: neural_lam/data_config.yaml)", ) parser.add_argument( "--model", @@ -183,8 +180,36 @@ def main(): help="Number of example predictions to plot during evaluation " "(default: 1)", ) + + # Logger Settings + parser.add_argument( + "--wandb_project", + type=str, + default="neural_lam", + help="Wandb project name (default: neural_lam)", + ) + parser.add_argument( + "--val_steps_to_log", + type=list, + default=[1, 2, 3, 5, 10, 15, 19], + help="Steps to log val loss for (default: [1, 2, 3, 5, 10, 15, 19])", + ) + parser.add_argument( + "--metrics_watch", + type=list, + default=[], + help="List of metrics to watch, including any prefix (e.g. val_rmse)", + ) + parser.add_argument( + "--var_leads_metrics_watch", + type=dict, + default={}, + help="Dict with variables and lead times to log watched metrics for", + ) args = parser.parse_args() + config_loader = config.Config.from_file(args.data_config) + # Asserts for arguments assert args.model in MODELS, f"Unknown model: {args.model}" assert args.step_length <= 3, "Too high step length" @@ -203,7 +228,7 @@ def main(): # Load data train_loader = torch.utils.data.DataLoader( WeatherDataset( - args.dataset, + config_loader.dataset.name, pred_length=args.ar_steps, split="train", subsample_step=args.step_length, @@ -217,7 +242,7 @@ def main(): max_pred_length = (65 // args.step_length) - 2 # 19 val_loader = torch.utils.data.DataLoader( WeatherDataset( - args.dataset, + config_loader.dataset.name, pred_length=max_pred_length, split="val", subsample_step=args.step_length, @@ -264,7 +289,7 @@ def main(): save_last=True, ) logger = pl.loggers.WandbLogger( - project=constants.WANDB_PROJECT, name=run_name, config=args + project=args.wandb_project, name=run_name, config=args ) trainer = pl.Trainer( max_epochs=args.epochs, @@ -280,7 +305,9 @@ def main(): # Only init once, on rank 0 only if trainer.global_rank == 0: - utils.init_wandb_metrics(logger) # Do after wandb.init + utils.init_wandb_metrics( + logger, args.val_steps_to_log + ) # Do after wandb.init if args.eval: if args.eval == "val": @@ -288,7 +315,7 @@ def main(): else: # Test eval_loader = torch.utils.data.DataLoader( WeatherDataset( - args.dataset, + config_loader.dataset.name, pred_length=max_pred_length, split="test", subsample_step=args.step_length, From 5b71be3c68d815e0e376ee651c14f09d801f86de Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Wed, 22 May 2024 13:36:22 +0200 Subject: [PATCH 003/190] Simplify pre-commit setup (#29) This PR simplifies the pre-commit setup by: - removing code checks for imports against external packages, these checks should be done when running ci/cd integration tests (which will be added in a separate PR) where external dependencies are installed. After this PR is merged, pre-commit will therefore only check if code is clean and self-consistent, without the need to install external dependencies (this is the same approach taken in for example https://github.com/pydata/xarray/blob/main/.pre-commit-config.yaml). Testing imports is _also_ important, but splitting the code-cleaning out allows for much faster iteration (i.e. these linting tests will fail, avoiding the need to install all dependencies) - pinning versions of used linting tools. The current setup is errorprone because as linting tools evolve the linting rules will update. Pinning versions ensures that we all use the same versions when running `pre-commit` - moves linting tool versions to pre-commit config rather than `requirements.txt`. This allows pre-commit handle the linting in its own virtual env, without polluting the dev environment - using github action to install and run pre-commit rather than our own run instructions. This ensure that pre-commit is run identically both during local development and during ci/cd in github actions. --------- Co-authored-by: sadamov <45732287+sadamov@users.noreply.github.com> Co-authored-by: khintz --- .flake8 | 3 ++ .github/workflows/pre-commit.yml | 36 +++++-------- .pre-commit-config.yaml | 66 ++++++++++-------------- CHANGELOG.md | 11 ++++ neural_lam/models/base_hi_graph_model.py | 14 ++--- plot_graph.py | 6 +-- requirements.txt | 6 +-- 7 files changed, 63 insertions(+), 79 deletions(-) create mode 100644 .flake8 diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..b02dd545 --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 88 +ignore = E203, F811, I002, W503 diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index a6ad84f1..dc519e5b 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -1,33 +1,25 @@ -name: Run pre-commit job +name: lint on: - push: + # trigger on pushes to any branch, but not main + push: + branches-ignore: + - main + # and also on PRs to main + pull_request: branches: - - main - pull_request: - branches: - - main + - main jobs: - pre-commit-job: + pre-commit-job: runs-on: ubuntu-latest - defaults: - run: - shell: bash -l {0} + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: - python-version: 3.9 - - name: Install pre-commit hooks - run: | - pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 \ - --index-url https://download.pytorch.org/whl/cpu - pip install -r requirements.txt - pip install pyg-lib==0.2.0 torch-scatter==2.1.1 torch-sparse==0.6.17 \ - torch-cluster==1.6.1 torch-geometric==2.3.1 \ - -f https://pytorch-geometric.com/whl/torch-2.0.1+cpu.html - - name: Run pre-commit hooks - run: | - pre-commit run --all-files + python-version: ${{ matrix.python-version }} + - uses: pre-commit/action@v2.0.3 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f48eca67..815a92e1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,51 +1,37 @@ repos: -- repo: https://github.com/pre-commit/pre-commit-hooks + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 hooks: - - id: check-ast - - id: check-case-conflict - - id: check-docstring-first - - id: check-symlinks - - id: check-toml - - id: check-yaml - - id: debug-statements - - id: end-of-file-fixer - - id: trailing-whitespace -- repo: local + - id: check-ast + - id: check-case-conflict + - id: check-docstring-first + - id: check-symlinks + - id: check-toml + - id: check-yaml + - id: debug-statements + - id: end-of-file-fixer + - id: trailing-whitespace + + - repo: https://github.com/codespell-project/codespell + rev: v2.2.6 hooks: - - id: codespell - name: codespell + - id: codespell description: Check for spelling errors - language: system - entry: codespell -- repo: local + + - repo: https://github.com/psf/black + rev: 22.3.0 hooks: - - id: black - name: black + - id: black description: Format Python code - language: system - entry: black - types_or: [python, pyi] -- repo: local + + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 hooks: - - id: isort - name: isort + - id: isort description: Group and sort Python imports - language: system - entry: isort - types_or: [python, pyi, cython] -- repo: local + + - repo: https://github.com/PyCQA/flake8 + rev: 7.0.0 hooks: - - id: flake8 - name: flake8 + - id: flake8 description: Check Python code for correctness, consistency and adherence to best practices - language: system - entry: flake8 --max-line-length=80 --ignore=E203,F811,I002,W503 - types: [python] -- repo: local - hooks: - - id: pylint - name: pylint - entry: pylint -rn -sn - language: system - types: [python] diff --git a/CHANGELOG.md b/CHANGELOG.md index 823ac8b1..63feff96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [\#13](https://github.com/joeloskarsson/neural-lam/pull/13) @joeloskarsson +### Maintenance + +- simplify pre-commit setup by 1) reducing linting to only cover static + analysis excluding imports from external dependencies (this will be handled + in build/test cicd action introduced later), 2) pinning versions of linting + tools in pre-commit config (and remove from `requirements.txt`) and 3) using + github action to run pre-commit. + [\#29](https://github.com/mllam/neural-lam/pull/29) + @leifdenby + + ## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0) First tagged release of `neural-lam`, matching Oskarsson et al 2023 publication diff --git a/neural_lam/models/base_hi_graph_model.py b/neural_lam/models/base_hi_graph_model.py index 8ce87030..3fd30579 100644 --- a/neural_lam/models/base_hi_graph_model.py +++ b/neural_lam/models/base_hi_graph_model.py @@ -36,7 +36,7 @@ def __init__(self, args): if level_index < (self.num_levels - 1): up_edges = self.mesh_up_features[level_index].shape[0] down_edges = self.mesh_down_features[level_index].shape[0] - print(f" {level_index}<->{level_index+1}") + print(f" {level_index}<->{level_index + 1}") print(f" - {up_edges} up edges, {down_edges} down edges") # Embedders # Assume all levels have same static feature dimensionality @@ -179,9 +179,9 @@ def process_step(self, mesh_rep): ) # Update node and edge vectors in lists - mesh_rep_levels[level_l] = ( - new_node_rep # (B, num_mesh_nodes[l], d_h) - ) + mesh_rep_levels[ + level_l + ] = new_node_rep # (B, num_mesh_nodes[l], d_h) mesh_up_rep[level_l - 1] = new_edge_rep # (B, M_up[l-1], d_h) # - PROCESSOR - @@ -207,9 +207,9 @@ def process_step(self, mesh_rep): new_node_rep = gnn(send_node_rep, rec_node_rep, edge_rep) # Update node and edge vectors in lists - mesh_rep_levels[level_l] = ( - new_node_rep # (B, num_mesh_nodes[l], d_h) - ) + mesh_rep_levels[ + level_l + ] = new_node_rep # (B, num_mesh_nodes[l], d_h) # Return only bottom level representation return mesh_rep_levels[0] # (B, num_mesh_nodes[0], d_h) diff --git a/plot_graph.py b/plot_graph.py index 40b2b41d..90462194 100644 --- a/plot_graph.py +++ b/plot_graph.py @@ -48,11 +48,7 @@ def main(): # Load graph data hierarchical, graph_ldict = utils.load_graph(args.graph) - ( - g2m_edge_index, - m2g_edge_index, - m2m_edge_index, - ) = ( + (g2m_edge_index, m2g_edge_index, m2m_edge_index,) = ( graph_ldict["g2m_edge_index"], graph_ldict["m2g_edge_index"], graph_ldict["m2m_edge_index"], diff --git a/requirements.txt b/requirements.txt index 5a2111b2..f381d54f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,10 +10,6 @@ Cartopy>=0.22.0 pyproj>=3.4.1 tueplots>=0.0.8 plotly>=5.15.0 + # for dev -codespell>=2.0.0 -black>=21.9b0 -isort>=5.9.3 -flake8>=4.0.1 -pylint>=3.0.3 pre-commit>=2.15.0 From 879cfec1b49d0255ed963f44b3a9f55d42c9920a Mon Sep 17 00:00:00 2001 From: sadamov <45732287+sadamov@users.noreply.github.com> Date: Wed, 29 May 2024 16:07:36 +0200 Subject: [PATCH 004/190] Make restoration of optimizer and scheduler more robust (#17) ## Summary This pull request introduces specific enhancements to the model loading and optimizer/scheduler state restoration functionalities, improving flexibility and compatibility with multi-GPU setups. ## Detailed Changes - **Enhanced Model Loading for Multi-GPU**: Modified the model loading logic to better support multi-GPU environments by ensuring that optimizer states are only loaded when necessary and appropriate. - **Checkpoint Adjustments**: Adjusted how learning rate schedulers are restored from checkpoints to ensure they align correctly with the current training state ## Impact These changes provide users with greater control over how training states are restored and improve the script's functionality in distributed training environments. ## Testing [x] Changes have been tested in both single and multi-GPU setups ## Notes Further integration testing with different types of training configurations is recommended to fully validate the new functionalities. --------- Co-authored-by: Simon Adamov --- CHANGELOG.md | 4 ++++ neural_lam/models/ar_model.py | 10 +++++----- neural_lam/vis.py | 4 ++-- train_model.py | 12 +++--------- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 63feff96..061aa6bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- Robust restoration of optimizer and scheduler using `ckpt_path` + [\#17](https://github.com/mllam/neural-lam/pull/17) + @sadamov + - Updated scripts and modules to use `data_config.yaml` instead of `constants.py` [\#31](https://github.com/joeloskarsson/neural-lam/pull/31) @sadamov diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 9cda9fc2..29b169d4 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -83,8 +83,8 @@ def __init__(self, args): if self.output_std: self.test_metrics["output_std"] = [] # Treat as metric - # For making restoring of optimizer state optional (slight hack) - self.opt_state = None + # For making restoring of optimizer state optional + self.restore_opt = args.restore_opt # For example plotting self.n_example_pred = args.n_example_pred @@ -97,9 +97,6 @@ def configure_optimizers(self): opt = torch.optim.AdamW( self.parameters(), lr=self.args.lr, betas=(0.9, 0.95) ) - if self.opt_state: - opt.load_state_dict(self.opt_state) - return opt @property @@ -597,3 +594,6 @@ def on_load_checkpoint(self, checkpoint): ) loaded_state_dict[new_key] = loaded_state_dict[old_key] del loaded_state_dict[old_key] + if not self.restore_opt: + opt = self.configure_optimizers() + checkpoint["optimizer_states"] = [opt.state_dict()] diff --git a/neural_lam/vis.py b/neural_lam/vis.py index 2b6abf15..8c9ca77c 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -87,7 +87,7 @@ def plot_prediction( 1, 2, figsize=(13, 7), - subplot_kw={"projection": data_config.coords_projection()}, + subplot_kw={"projection": data_config.coords_projection}, ) # Plot pred and target @@ -136,7 +136,7 @@ def plot_spatial_error(error, obs_mask, data_config, title=None, vrange=None): fig, ax = plt.subplots( figsize=(5, 4.8), - subplot_kw={"projection": data_config.coords_projection()}, + subplot_kw={"projection": data_config.coords_projection}, ) ax.coastlines() # Add coastline outlines diff --git a/train_model.py b/train_model.py index 390da6d4..fe064384 100644 --- a/train_model.py +++ b/train_model.py @@ -265,14 +265,7 @@ def main(): # Load model parameters Use new args for model model_class = MODELS[args.model] - if args.load: - model = model_class.load_from_checkpoint(args.load, args=args) - if args.restore_opt: - # Save for later - # Unclear if this works for multi-GPU - model.opt_state = torch.load(args.load)["optimizer_states"][0] - else: - model = model_class(args) + model = model_class(args) prefix = "subset-" if args.subset_ds else "" if args.eval: @@ -327,13 +320,14 @@ def main(): ) print(f"Running evaluation on {args.eval}") - trainer.test(model=model, dataloaders=eval_loader) + trainer.test(model=model, dataloaders=eval_loader, ckpt_path=args.load) else: # Train model trainer.fit( model=model, train_dataloaders=train_loader, val_dataloaders=val_loader, + ckpt_path=args.load, ) From 9d558d1f0d343cfe6e0babaa8d9e6c45b852fe21 Mon Sep 17 00:00:00 2001 From: sadamov <45732287+sadamov@users.noreply.github.com> Date: Fri, 31 May 2024 12:12:58 +0200 Subject: [PATCH 005/190] Fix minor bugs in data_config.yaml workflow (#40) ### Summary https://github.com/mllam/neural-lam/pull/31 introduced three minor bugs that are fixed with this PR: - r"" strings are not required in units of `data_config.yaml` - dictionaries cannot be passed as argsparse, rather JSON strings. This bug is related to the flag `var_leads_metrics_watch` --------- Co-authored-by: joeloskarsson --- neural_lam/data_config.yaml | 10 +++++----- neural_lam/models/ar_model.py | 2 +- train_model.py | 13 +++++++++---- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/neural_lam/data_config.yaml b/neural_lam/data_config.yaml index f16a4a30..f1527849 100644 --- a/neural_lam/data_config.yaml +++ b/neural_lam/data_config.yaml @@ -21,8 +21,8 @@ dataset: var_units: - Pa - Pa - - r"$\mathrm{W}/\mathrm{m}^2$" - - r"$\mathrm{W}/\mathrm{m}^2$" + - $\mathrm{W}/\mathrm{m}^2$ + - $\mathrm{W}/\mathrm{m}^2$ - "" - "" - K @@ -33,9 +33,9 @@ dataset: - m/s - m/s - m/s - - r"$\mathrm{kg}/\mathrm{m}^2$" - - r"$\mathrm{m}^2/\mathrm{s}^2$" - - r"$\mathrm{m}^2/\mathrm{s}^2$" + - $\mathrm{kg}/\mathrm{m}^2$ + - $\mathrm{m}^2/\mathrm{s}^2$ + - $\mathrm{m}^2/\mathrm{s}^2$ var_longnames: - pres_heightAboveGround_0_instant - pres_heightAboveSea_0_instant diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 29b169d4..6ced211f 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -473,7 +473,7 @@ def create_metric_log_dict(self, metric_tensor, prefix, metric_name): # Check if metrics are watched, log exact values for specific vars if full_log_name in self.args.metrics_watch: for var_i, timesteps in self.args.var_leads_metrics_watch.items(): - var = self.config_loader.dataset.var_nums[var_i] + var = self.config_loader.dataset.var_names[var_i] log_dict.update( { f"{full_log_name}_{var}_step_{step}": metric_tensor[ diff --git a/train_model.py b/train_model.py index fe064384..cbd787f0 100644 --- a/train_model.py +++ b/train_model.py @@ -1,4 +1,5 @@ # Standard library +import json import random import time from argparse import ArgumentParser @@ -196,17 +197,21 @@ def main(): ) parser.add_argument( "--metrics_watch", - type=list, + nargs="+", default=[], help="List of metrics to watch, including any prefix (e.g. val_rmse)", ) parser.add_argument( "--var_leads_metrics_watch", - type=dict, - default={}, - help="Dict with variables and lead times to log watched metrics for", + type=str, + default="{}", + help="""JSON string with variable-IDs and lead times to log watched + metrics (e.g. '{"1": [1, 2], "3": [3, 4]}')""", ) args = parser.parse_args() + args.var_leads_metrics_watch = { + int(k): v for k, v in json.loads(args.var_leads_metrics_watch).items() + } config_loader = config.Config.from_file(args.data_config) From e5400bbfa92d959d0f4856b90786abb18d282754 Mon Sep 17 00:00:00 2001 From: Joel Oskarsson Date: Mon, 3 Jun 2024 14:39:41 +0200 Subject: [PATCH 006/190] Change copyright notice to specify all contributors (#47) ## Motivation As more people are now contributing to the code the copyright does not just belong to me and Tomas. To avoid having to update this with the name of every person contributing, I suggest to take some inspiration from e.g. https://github.com/pyg-team/pytorch_geometric/blob/master/LICENSE and https://github.com/numpy/numpy/blob/main/LICENSE.txt and use a general formulation "Neural-LAM contributors". ## Description of change Change the copyright notice in the MIT license to "Neural-LAM Contributors". The year can stay 2023, as that is the first year the work (the code) was published. As me and Tomas are included under "Neural-LAM Contributors" this is strictly expanding the number of copyright holders. --- CHANGELOG.md | 3 +++ LICENSE.txt | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 061aa6bb..fd836c7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,6 +69,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [\#29](https://github.com/mllam/neural-lam/pull/29) @leifdenby +- change copyright formulation in license to encompass all contributors + [\#47](https://github.com/mllam/neural-lam/pull/47) + @joeloskarsson ## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0) diff --git a/LICENSE.txt b/LICENSE.txt index 1bb69de2..ed176ba1 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023 Joel Oskarsson, Tomas Landelius +Copyright (c) 2023 Neural-LAM Contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 743c07ac9f20ff16f05fcac5528196b8c4639c17 Mon Sep 17 00:00:00 2001 From: sadamov <45732287+sadamov@users.noreply.github.com> Date: Mon, 3 Jun 2024 18:45:03 +0200 Subject: [PATCH 007/190] Parallelize parameter weight computation using PyTorch Distributed (#22) ## Description This PR introduces parallelization to the `create_parameter_weights.py` script using PyTorch Distributed. The main changes include: 1. Added functions `get_rank()`, `get_world_size()`, `setup()`, and `cleanup()` to initialize and manage the distributed process group. - `get_rank()` retrieves the rank of the current process in the distributed group. - `get_world_size()` retrieves the total number of processes in the distributed group. - `setup()` initializes the distributed process group using NCCL (for GPU) or gloo (for CPU) backend. - `cleanup()` destroys the distributed process group. 2. Modified the `main()` function to take `rank` and `world_size` as arguments and set up the distributed environment. - The device is set based on the rank and available GPUs. - The dataset is adjusted to ensure its size is divisible by `(world_size * batch_size)` using the `adjust_dataset_size()` function. - A `DistributedSampler` is used to partition the dataset among the processes. 3. Parallelized the computation of means and squared values across the dataset. - Each process computes the means and squared values for its assigned portion of the dataset. - The results are gathered from all processes using `dist.all_gather_object()`. - The root process (rank 0) computes the final mean, standard deviation, and flux statistics using the gathered results. 4. Parallelized the computation of one-step difference means and squared values. - Similar to step 3, each process computes the difference means and squared values for its assigned portion of the dataset. - The results are gathered from all processes using `dist.all_gather_object()`. - The final difference mean and standard deviation are computed using the gathered results. These changes enable the script to leverage multiple processes/GPUs to speed up the computation of parameter weights, means, and standard deviations. The dataset is partitioned among the processes, and the results are gathered and aggregated by the root process. To run the script in a distributed manner, it can be launched using Slurm. Please review the changes and provide any feedback or suggestions. --------- Co-authored-by: Simon Adamov --- .gitignore | 2 + create_parameter_weights.py | 374 +++++++++++++++++++++++++++++------- 2 files changed, 302 insertions(+), 74 deletions(-) diff --git a/.gitignore b/.gitignore index c9d914c2..65e9f6f8 100644 --- a/.gitignore +++ b/.gitignore @@ -2,12 +2,14 @@ wandb slurm_log* saved_models +lightning_logs data graphs *.sif sweeps test_*.sh .vscode +*slurm* ### Python ### # Byte-compiled / optimized / DLL files diff --git a/create_parameter_weights.py b/create_parameter_weights.py index cae1ae3e..c85cd5a3 100644 --- a/create_parameter_weights.py +++ b/create_parameter_weights.py @@ -1,10 +1,13 @@ # Standard library import os +import subprocess from argparse import ArgumentParser # Third-party import numpy as np import torch +import torch.distributed as dist +from torch.utils.data.distributed import DistributedSampler from tqdm import tqdm # First-party @@ -12,6 +15,117 @@ from neural_lam.weather_dataset import WeatherDataset +class PaddedWeatherDataset(torch.utils.data.Dataset): + def __init__(self, base_dataset, world_size, batch_size): + super().__init__() + self.base_dataset = base_dataset + self.world_size = world_size + self.batch_size = batch_size + self.total_samples = len(base_dataset) + self.padded_samples = ( + (self.world_size * self.batch_size) - self.total_samples + ) % self.world_size + self.original_indices = list(range(len(base_dataset))) + self.padded_indices = list( + range(self.total_samples, self.total_samples + self.padded_samples) + ) + + def __getitem__(self, idx): + return self.base_dataset[ + self.original_indices[-1] + if idx >= self.total_samples + else idx % len(self.base_dataset) + ] + + def __len__(self): + return self.total_samples + self.padded_samples + + def get_original_indices(self): + return self.original_indices + + def get_original_window_indices(self, step_length): + return [ + i // step_length + for i in range(len(self.original_indices) * step_length) + ] + + +def get_rank(): + return int(os.environ.get("SLURM_PROCID", 0)) + + +def get_world_size(): + return int(os.environ.get("SLURM_NTASKS", 1)) + + +def setup(rank, world_size): # pylint: disable=redefined-outer-name + """Initialize the distributed group.""" + if "SLURM_JOB_NODELIST" in os.environ: + master_node = ( + subprocess.check_output( + "scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1", + shell=True, + ) + .strip() + .decode("utf-8") + ) + else: + print( + "\033[91mCareful, you are running this script with --distributed " + "without any scheduler. In most cases this will result in slower " + "execution and the --distributed flag should be removed.\033[0m" + ) + master_node = "localhost" + os.environ["MASTER_ADDR"] = master_node + os.environ["MASTER_PORT"] = "12355" + dist.init_process_group( + "nccl" if torch.cuda.is_available() else "gloo", + rank=rank, + world_size=world_size, + ) + if rank == 0: + print( + f"Initialized {dist.get_backend()} " + f"process group with world size {world_size}." + ) + + +def save_stats( + static_dir_path, means, squares, flux_means, flux_squares, filename_prefix +): + means = ( + torch.stack(means) if len(means) > 1 else means[0] + ) # (N_batch, d_features,) + squares = ( + torch.stack(squares) if len(squares) > 1 else squares[0] + ) # (N_batch, d_features,) + mean = torch.mean(means, dim=0) # (d_features,) + second_moment = torch.mean(squares, dim=0) # (d_features,) + std = torch.sqrt(second_moment - mean**2) # (d_features,) + torch.save( + mean.cpu(), os.path.join(static_dir_path, f"{filename_prefix}_mean.pt") + ) + torch.save( + std.cpu(), os.path.join(static_dir_path, f"{filename_prefix}_std.pt") + ) + + if len(flux_means) == 0: + return + flux_means = ( + torch.stack(flux_means) if len(flux_means) > 1 else flux_means[0] + ) # (N_batch,) + flux_squares = ( + torch.stack(flux_squares) if len(flux_squares) > 1 else flux_squares[0] + ) # (N_batch,) + flux_mean = torch.mean(flux_means) # (,) + flux_second_moment = torch.mean(flux_squares) # (,) + flux_std = torch.sqrt(flux_second_moment - flux_mean**2) # (,) + torch.save( + torch.stack((flux_mean, flux_std)).cpu(), + os.path.join(static_dir_path, "flux_stats.pt"), + ) + + def main(): """ Pre-compute parameter weights to be used in loss function @@ -41,32 +155,52 @@ def main(): default=4, help="Number of workers in data loader (default: 4)", ) + parser.add_argument( + "--distributed", + type=int, + default=0, + help="Run the script in distributed mode (1) or not (0) (default: 0)", + ) args = parser.parse_args() + distributed = bool(args.distributed) + rank = get_rank() + world_size = get_world_size() config_loader = config.Config.from_file(args.data_config) - static_dir_path = os.path.join("data", config_loader.dataset.name, "static") - - # Create parameter weights based on height - # based on fig A.1 in graph cast paper - w_dict = { - "2": 1.0, - "0": 0.1, - "65": 0.065, - "1000": 0.1, - "850": 0.05, - "500": 0.03, - } - w_list = np.array( - [ - w_dict[par.split("_")[-2]] - for par in config_loader.dataset.var_longnames - ] - ) - print("Saving parameter weights...") - np.save( - os.path.join(static_dir_path, "parameter_weights.npy"), - w_list.astype("float32"), - ) + + if distributed: + + setup(rank, world_size) + device = torch.device( + f"cuda:{rank}" if torch.cuda.is_available() else "cpu" + ) + torch.cuda.set_device(device) if torch.cuda.is_available() else None + + if rank == 0: + static_dir_path = os.path.join( + "data", config_loader.dataset.name, "static" + ) + # Create parameter weights based on height + # based on fig A.1 in graph cast paper + w_dict = { + "2": 1.0, + "0": 0.1, + "65": 0.065, + "1000": 0.1, + "850": 0.05, + "500": 0.03, + } + w_list = np.array( + [ + w_dict[par.split("_")[-2]] + for par in config_loader.dataset.var_longnames + ] + ) + print("Saving parameter weights...") + np.save( + os.path.join(static_dir_path, "parameter_weights.npy"), + w_list.astype("float32"), + ) # Load dataset without any subsampling ds = WeatherDataset( @@ -75,47 +209,97 @@ def main(): subsample_step=1, pred_length=63, standardize=False, - ) # Without standardization + ) + if distributed: + ds = PaddedWeatherDataset( + ds, + world_size, + args.batch_size, + ) + sampler = DistributedSampler( + ds, num_replicas=world_size, rank=rank, shuffle=False + ) + else: + sampler = None loader = torch.utils.data.DataLoader( - ds, args.batch_size, shuffle=False, num_workers=args.n_workers + ds, + args.batch_size, + shuffle=False, + num_workers=args.n_workers, + sampler=sampler, ) - # Compute mean and std.-dev. of each parameter (+ flux forcing) - # across full dataset - print("Computing mean and std.-dev. for parameters...") - means = [] - squares = [] - flux_means = [] - flux_squares = [] + + if rank == 0: + print("Computing mean and std.-dev. for parameters...") + means, squares, flux_means, flux_squares = [], [], [], [] + for init_batch, target_batch, forcing_batch in tqdm(loader): - batch = torch.cat( - (init_batch, target_batch), dim=1 - ) # (N_batch, N_t, N_grid, d_features) - means.append(torch.mean(batch, dim=(1, 2))) # (N_batch, d_features,) + if distributed: + init_batch, target_batch, forcing_batch = ( + init_batch.to(device), + target_batch.to(device), + forcing_batch.to(device), + ) + # (N_batch, N_t, N_grid, d_features) + batch = torch.cat((init_batch, target_batch), dim=1) + # Flux at 1st windowed position is index 1 in forcing + flux_batch = forcing_batch[:, :, :, 1] + # (N_batch, d_features,) + means.append(torch.mean(batch, dim=(1, 2)).cpu()) squares.append( - torch.mean(batch**2, dim=(1, 2)) + torch.mean(batch**2, dim=(1, 2)).cpu() ) # (N_batch, d_features,) + flux_means.append(torch.mean(flux_batch).cpu()) # (,) + flux_squares.append(torch.mean(flux_batch**2).cpu()) # (,) - # Flux at 1st windowed position is index 1 in forcing - flux_batch = forcing_batch[:, :, :, 1] - flux_means.append(torch.mean(flux_batch)) # (,) - flux_squares.append(torch.mean(flux_batch**2)) # (,) + if distributed and world_size > 1: + means_gathered, squares_gathered = [None] * world_size, [ + None + ] * world_size + flux_means_gathered, flux_squares_gathered = [None] * world_size, [ + None + ] * world_size + dist.all_gather_object(means_gathered, torch.cat(means, dim=0)) + dist.all_gather_object(squares_gathered, torch.cat(squares, dim=0)) + dist.all_gather_object(flux_means_gathered, flux_means) + dist.all_gather_object(flux_squares_gathered, flux_squares) - mean = torch.mean(torch.cat(means, dim=0), dim=0) # (d_features) - second_moment = torch.mean(torch.cat(squares, dim=0), dim=0) - std = torch.sqrt(second_moment - mean**2) # (d_features) + if rank == 0: + means_gathered, squares_gathered = torch.cat( + means_gathered, dim=0 + ), torch.cat(squares_gathered, dim=0) + flux_means_gathered, flux_squares_gathered = torch.tensor( + flux_means_gathered + ), torch.tensor(flux_squares_gathered) - flux_mean = torch.mean(torch.stack(flux_means)) # (,) - flux_second_moment = torch.mean(torch.stack(flux_squares)) # (,) - flux_std = torch.sqrt(flux_second_moment - flux_mean**2) # (,) - flux_stats = torch.stack((flux_mean, flux_std)) + original_indices = ds.get_original_indices() + means, squares = [means_gathered[i] for i in original_indices], [ + squares_gathered[i] for i in original_indices + ] + flux_means, flux_squares = [ + flux_means_gathered[i] for i in original_indices + ], [flux_squares_gathered[i] for i in original_indices] + else: + means = [torch.cat(means, dim=0)] # (N_batch, d_features,) + squares = [torch.cat(squares, dim=0)] # (N_batch, d_features,) + flux_means = [torch.tensor(flux_means)] # (N_batch,) + flux_squares = [torch.tensor(flux_squares)] # (N_batch,) + + if rank == 0: + save_stats( + static_dir_path, + means, + squares, + flux_means, + flux_squares, + "parameter", + ) - print("Saving mean, std.-dev, flux_stats...") - torch.save(mean, os.path.join(static_dir_path, "parameter_mean.pt")) - torch.save(std, os.path.join(static_dir_path, "parameter_std.pt")) - torch.save(flux_stats, os.path.join(static_dir_path, "flux_stats.pt")) + if distributed: + dist.barrier() - # Compute mean and std.-dev. of one-step differences across the dataset - print("Computing mean and std.-dev. for one-step differences...") + if rank == 0: + print("Computing mean and std.-dev. for one-step differences...") ds_standard = WeatherDataset( config_loader.dataset.name, split="train", @@ -123,17 +307,35 @@ def main(): pred_length=63, standardize=True, ) # Re-load with standardization + if distributed: + ds_standard = PaddedWeatherDataset( + ds_standard, + world_size, + args.batch_size, + ) + sampler_standard = DistributedSampler( + ds_standard, num_replicas=world_size, rank=rank, shuffle=False + ) + else: + sampler_standard = None loader_standard = torch.utils.data.DataLoader( - ds_standard, args.batch_size, shuffle=False, num_workers=args.n_workers + ds_standard, + args.batch_size, + shuffle=False, + num_workers=args.n_workers, + sampler=sampler_standard, ) used_subsample_len = (65 // args.step_length) * args.step_length - diff_means = [] - diff_squares = [] - for init_batch, target_batch, _ in tqdm(loader_standard): - batch = torch.cat( - (init_batch, target_batch), dim=1 - ) # (N_batch, N_t', N_grid, d_features) + diff_means, diff_squares = [], [] + + for init_batch, target_batch, _ in tqdm(loader_standard, disable=rank != 0): + if distributed: + init_batch, target_batch = init_batch.to(device), target_batch.to( + device + ) + # (N_batch, N_t', N_grid, d_features) + batch = torch.cat((init_batch, target_batch), dim=1) # Note: batch contains only 1h-steps stepped_batch = torch.cat( [ @@ -144,24 +346,48 @@ def main(): ) # (N_batch', N_t, N_grid, d_features), # N_batch' = args.step_length*N_batch - batch_diffs = stepped_batch[:, 1:] - stepped_batch[:, :-1] # (N_batch', N_t-1, N_grid, d_features) + diff_means.append(torch.mean(batch_diffs, dim=(1, 2)).cpu()) + # (N_batch', d_features,) + diff_squares.append(torch.mean(batch_diffs**2, dim=(1, 2)).cpu()) + # (N_batch', d_features,) + + if distributed and world_size > 1: + dist.barrier() + diff_means_gathered, diff_squares_gathered = [None] * world_size, [ + None + ] * world_size + dist.all_gather_object( + diff_means_gathered, torch.cat(diff_means, dim=0) + ) + dist.all_gather_object( + diff_squares_gathered, torch.cat(diff_squares, dim=0) + ) + + if rank == 0: + diff_means_gathered, diff_squares_gathered = torch.cat( + diff_means_gathered, dim=0 + ).view(-1, *diff_means[0].shape), torch.cat( + diff_squares_gathered, dim=0 + ).view( + -1, *diff_squares[0].shape + ) + original_indices = ds_standard.get_original_window_indices( + args.step_length + ) + diff_means, diff_squares = [ + diff_means_gathered[i] for i in original_indices + ], [diff_squares_gathered[i] for i in original_indices] - diff_means.append( - torch.mean(batch_diffs, dim=(1, 2)) - ) # (N_batch', d_features,) - diff_squares.append( - torch.mean(batch_diffs**2, dim=(1, 2)) - ) # (N_batch', d_features,) + diff_means = [torch.cat(diff_means, dim=0)] # (N_batch', d_features,) + diff_squares = [torch.cat(diff_squares, dim=0)] # (N_batch', d_features,) - diff_mean = torch.mean(torch.cat(diff_means, dim=0), dim=0) # (d_features) - diff_second_moment = torch.mean(torch.cat(diff_squares, dim=0), dim=0) - diff_std = torch.sqrt(diff_second_moment - diff_mean**2) # (d_features) + if rank == 0: + save_stats(static_dir_path, diff_means, diff_squares, [], [], "diff") - print("Saving one-step difference mean and std.-dev...") - torch.save(diff_mean, os.path.join(static_dir_path, "diff_mean.pt")) - torch.save(diff_std, os.path.join(static_dir_path, "diff_std.pt")) + if distributed: + dist.destroy_process_group() if __name__ == "__main__": From 81d08400d5f11f40007c1c3686744fa01ee057b1 Mon Sep 17 00:00:00 2001 From: SimonKamuk <43374850+SimonKamuk@users.noreply.github.com> Date: Tue, 4 Jun 2024 10:16:11 +0200 Subject: [PATCH 008/190] Feature: add tests for meps dataset (#38) Implemeted tests for loading a reduced size meps example dataset, creating graphs, and training model. - reduce number of variables, size of domain etc in Joel's MEPS data example so that the zip file is less than 500MB. Calling it `meps_example_reduced` - create test-data zip file and upload to EWC (credentials from @leifdenby) - implement test using pytorch to download and unpack testdata using [pooch](https://pypi.org/project/pooch/) - Implement testing of: - initiation of `neural_lam.weather_dataset.WeatherDataset` from downloaded data - check shapes of returned parts of training item - create new graph in tests for reduced dataset - feed single batch through model and check shape of output - add github action to run tests during ci/cd closes #30 --- .github/workflows/pre-commit.yml | 2 +- .github/workflows/run_tests.yml | 45 ++++ CHANGELOG.md | 3 + README.md | 5 + create_mesh.py | 4 +- .../create_reduced_meps_dataset.ipynb | 239 ++++++++++++++++++ neural_lam/utils.py | 7 +- requirements.txt | 2 + tests/__init__.py | 0 tests/test_mllam_dataset.py | 138 ++++++++++ train_model.py | 5 +- 11 files changed, 443 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/run_tests.yml create mode 100644 docs/notebooks/create_reduced_meps_dataset.ipynb create mode 100644 tests/__init__.py create mode 100644 tests/test_mllam_dataset.py diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index dc519e5b..dadac50d 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -1,4 +1,4 @@ -name: lint +name: Linting on: # trigger on pushes to any branch, but not main diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml new file mode 100644 index 00000000..71bff3d3 --- /dev/null +++ b/.github/workflows/run_tests.yml @@ -0,0 +1,45 @@ +name: Unit Tests + +on: + # trigger on pushes to any branch, but not main + push: + branches-ignore: + - main + # and also on PRs to main + pull_request: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install torch-geometric>=2.5.2 + - name: Load cache data + uses: actions/cache/restore@v4 + with: + path: data + key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + restore-keys: | + ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + - name: Test with pytest + run: | + pytest -v -s + - name: Save cache data + uses: actions/cache/save@v4 + with: + path: data + key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 diff --git a/CHANGELOG.md b/CHANGELOG.md index fd836c7a..3544b299 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [unreleased](https://github.com/joeloskarsson/neural-lam/compare/v0.1.0...HEAD) ### Added +- Added tests for loading dataset, creating graph, and training model based on reduced MEPS dataset stored on AWS S3, along with automatic running of tests on push/PR to GitHub. Added caching of test data tp speed up running tests. + [/#38](https://github.com/mllam/neural-lam/pull/38) + @SimonKamuk - Replaced `constants.py` with `data_config.yaml` for data configuration management [\#31](https://github.com/joeloskarsson/neural-lam/pull/31) diff --git a/README.md b/README.md index ba0bb3fe..1bdc6602 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ +![Linting](https://github.com/mllam/neural-lam/actions/workflows/pre-commit.yml/badge.svg) +![Automatic tests](https://github.com/mllam/neural-lam/actions/workflows/run_tests.yml/badge.svg) +

@@ -279,6 +282,8 @@ pre-commit run --all-files ``` from the root directory of the repository. +Furthermore, all tests in the ```tests``` directory will be run upon pushing changes by a github action. Failure in any of the tests will also reject the push/PR. + # Contact If you are interested in machine learning models for LAM, have questions about our implementation or ideas for extending it, feel free to get in touch. You can open a github issue on this page, or (if more suitable) send an email to [joel.oskarsson@liu.se](mailto:joel.oskarsson@liu.se). diff --git a/create_mesh.py b/create_mesh.py index f04b4d4b..41557a97 100644 --- a/create_mesh.py +++ b/create_mesh.py @@ -153,7 +153,7 @@ def prepend_node_index(graph, new_index): return networkx.relabel_nodes(graph, to_mapping, copy=True) -def main(): +def main(input_args=None): parser = ArgumentParser(description="Graph generation arguments") parser.add_argument( "--data_config", @@ -186,7 +186,7 @@ def main(): default=0, help="Generate hierarchical mesh graph (default: 0, no)", ) - args = parser.parse_args() + args = parser.parse_args(input_args) # Load grid positions config_loader = config.Config.from_file(args.data_config) diff --git a/docs/notebooks/create_reduced_meps_dataset.ipynb b/docs/notebooks/create_reduced_meps_dataset.ipynb new file mode 100644 index 00000000..daba23c4 --- /dev/null +++ b/docs/notebooks/create_reduced_meps_dataset.ipynb @@ -0,0 +1,239 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Creating meps_example_reduced\n", + "This notebook outlines how the small-size test dataset ```meps_example_reduced``` was created based on the slightly larger dataset ```meps_example```. The zipped up datasets are 263 MB and 2.6 GB, respectively. See [README.md](../../README.md) for info on how to download ```meps_example```.\n", + "\n", + "The dataset was reduced in size by reducing the number of grid points and variables.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Standard library\n", + "import os\n", + "\n", + "# Third-party\n", + "import numpy as np\n", + "import torch" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "The number of grid points was reduced to 1/4 by halving the number of coordinates in both the x and y direction. This was done by removing a quarter of the grid points along each outer edge, so the center grid points would stay centered in the new set.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load existing grid\n", + "grid_xy = np.load('data/meps_example/static/nwp_xy.npy')\n", + "# Get slices in each dimension by cutting off a quarter along each edge\n", + "num_x, num_y = grid_xy.shape[1:]\n", + "x_slice = slice(num_x//4, 3*num_x//4)\n", + "y_slice = slice(num_y//4, 3*num_y//4)\n", + "# Index and save reduced grid\n", + "grid_xy_reduced = grid_xy[:, x_slice, y_slice]\n", + "np.save('data/meps_example_reduced/static/nwp_xy.npy', grid_xy_reduced)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "This cut out the border, so a new perimeter of 10 grid points was established as border (10 was also the border size in the original \"meps_example\").\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Outer 10 grid points are border\n", + "old_border_mask = np.load('data/meps_example/static/border_mask.npy')\n", + "assert np.all(old_border_mask[10:-10, 10:-10] == False)\n", + "assert np.all(old_border_mask[:10, :] == True)\n", + "assert np.all(old_border_mask[:, :10] == True)\n", + "assert np.all(old_border_mask[-10:,:] == True)\n", + "assert np.all(old_border_mask[:,-10:] == True)\n", + "\n", + "# Create new array with False everywhere but the outer 10 grid points\n", + "border_mask = np.zeros_like(grid_xy_reduced[0,:,:], dtype=bool)\n", + "border_mask[:10] = True\n", + "border_mask[:,:10] = True\n", + "border_mask[-10:] = True\n", + "border_mask[:,-10:] = True\n", + "np.save('data/meps_example_reduced/static/border_mask.npy', border_mask)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A few other files also needed to be copied using only the new reduced grid" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load surface_geopotential.npy, index only values from the reduced grid, and save to new file\n", + "surface_geopotential = np.load('data/meps_example/static/surface_geopotential.npy')\n", + "surface_geopotential_reduced = surface_geopotential[x_slice, y_slice]\n", + "np.save('data/meps_example_reduced/static/surface_geopotential.npy', surface_geopotential_reduced)\n", + "\n", + "# Load pytorch file grid_features.pt\n", + "grid_features = torch.load('data/meps_example/static/grid_features.pt')\n", + "# Index only values from the reduced grid. \n", + "# First reshape from (num_grid_points_total, 4) to (num_grid_points_x, num_grid_points_y, 4), \n", + "# then index, then reshape back to new total number of grid points\n", + "print(grid_features.shape)\n", + "grid_features_new = grid_features.reshape(num_x, num_y, 4)[x_slice,y_slice,:].reshape((-1, 4))\n", + "# Save to new file\n", + "torch.save(grid_features_new, 'data/meps_example_reduced/static/grid_features.pt')\n", + "\n", + "# flux_stats.pt is just a vector of length 2, so the grid shape and variable changes does not change this file\n", + "torch.save(torch.load('data/meps_example/static/flux_stats.pt'), 'data/meps_example_reduced/static/flux_stats.pt')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "The number of variables was reduced by truncating the variable list to the first 8." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_vars = 8\n", + "\n", + "# Load parameter_weights.npy, truncate to first 8 variables, and save to new file\n", + "parameter_weights = np.load('data/meps_example/static/parameter_weights.npy')\n", + "parameter_weights_reduced = parameter_weights[:num_vars]\n", + "np.save('data/meps_example_reduced/static/parameter_weights.npy', parameter_weights_reduced)\n", + "\n", + "# Do the same for following 4 pytorch files\n", + "for file in ['diff_mean', 'diff_std', 'parameter_mean', 'parameter_std']:\n", + " old_file = torch.load(f'data/meps_example/static/{file}.pt')\n", + " new_file = old_file[:num_vars]\n", + " torch.save(new_file, f'data/meps_example_reduced/static/{file}.pt')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lastly the files in each of the directories train, test, and val have to be reduced. The folders all have the same structure with files of the following types:\n", + "```\n", + "nwp_YYYYMMDDHH_mbrXXX.npy\n", + "wtr_YYYYMMDDHH.npy\n", + "nwp_toa_downwelling_shortwave_flux_YYYYMMDDHH.npy\n", + "```\n", + "with ```YYYYMMDDHH``` being some date with hours, and ```XXX``` being some 3-digit integer.\n", + "\n", + "The first type of file has x and y in dimensions 1 and 2, and variable index in dimension 3. Dimension 0 is unchanged.\n", + "The second type has has x and y in dimensions 1 and 2. Dimension 0 is unchanged.\n", + "The last type has just x and y as the only 2 dimensions.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(65, 268, 238, 18)\n", + "(65, 268, 238)\n" + ] + } + ], + "source": [ + "print(np.load('data/meps_example/samples/train/nwp_2022040100_mbr000.npy').shape)\n", + "print(np.load('data/meps_example/samples/train/nwp_toa_downwelling_shortwave_flux_2022040112.npy').shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following loop goes through each file in each sample folder and indexes them according to the dimensions given by the file name." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for sample in ['train', 'test', 'val']:\n", + " files = os.listdir(f'data/meps_example/samples/{sample}')\n", + "\n", + " for f in files:\n", + " data = np.load(f'data/meps_example/samples/{sample}/{f}')\n", + " if 'mbr' in f:\n", + " data = data[:,x_slice,y_slice,:num_vars]\n", + " elif 'wtr' in f:\n", + " data = data[x_slice, y_slice]\n", + " else:\n", + " data = data[:,x_slice,y_slice]\n", + " np.save(f'data/meps_example_reduced/samples/{sample}/{f}', data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lastly, the file ```data_config.yaml``` is modified manually by truncating the variable units, long and short names, and setting the new grid shape. Also the unit descriptions containing ```^``` was automatically parsed using latex, and to avoid having to install latex in the GitHub CI/CD pipeline, this was changed to ```**```. \n", + "\n", + "This new config file was placed in ```data/meps_example_reduced```, and that directory was then zipped and placed in a European Weather Cloud S3 bucket." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/neural_lam/utils.py b/neural_lam/utils.py index 836b04ed..59a529eb 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -1,5 +1,6 @@ # Standard library import os +import shutil # Third-party import numpy as np @@ -250,7 +251,11 @@ def fractional_plot_bundle(fraction): Get the tueplots bundle, but with figure width as a fraction of the page width. """ - bundle = bundles.neurips2023(usetex=True, family="serif") + # If latex is not available, some visualizations might not render correctly, + # but will at least not raise an error. + # Alternatively, use unicode raised numbers. + usetex = True if shutil.which("latex") else False + bundle = bundles.neurips2023(usetex=usetex, family="serif") bundle.update(figsizes.neurips2023()) original_figsize = bundle["figure.figsize"] bundle["figure.figsize"] = ( diff --git a/requirements.txt b/requirements.txt index f381d54f..9309eea4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,5 @@ plotly>=5.15.0 # for dev pre-commit>=2.15.0 +pytest>=8.1.1 +pooch>=1.8.1 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py new file mode 100644 index 00000000..f91170c9 --- /dev/null +++ b/tests/test_mllam_dataset.py @@ -0,0 +1,138 @@ +# Standard library +import os + +# Third-party +import pooch + +# First-party +from create_mesh import main as create_mesh +from neural_lam.config import Config +from neural_lam.utils import load_static_data +from neural_lam.weather_dataset import WeatherDataset +from train_model import main as train_model + +# Disable weights and biases to avoid unnecessary logging +# and to avoid having to deal with authentication +os.environ["WANDB_DISABLED"] = "true" + +# Initializing variables for the s3 client +S3_BUCKET_NAME = "mllam-testdata" +S3_ENDPOINT_URL = "https://object-store.os-api.cci1.ecmwf.int" +S3_FILE_PATH = "neural-lam/npy/meps_example_reduced.v0.1.0.zip" +S3_FULL_PATH = "/".join([S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_FILE_PATH]) +TEST_DATA_KNOWN_HASH = ( + "98c7a2f442922de40c6891fe3e5d190346889d6e0e97550170a82a7ce58a72b7" +) + + +def test_retrieve_data_ewc(): + # Download and unzip test data into data/meps_example_reduced + pooch.retrieve( + url=S3_FULL_PATH, + known_hash=TEST_DATA_KNOWN_HASH, + processor=pooch.Unzip(extract_dir=""), + path="data", + fname="meps_example_reduced.zip", + ) + + +def test_load_reduced_meps_dataset(): + # The data_config.yaml file is downloaded and extracted in + # test_retrieve_data_ewc together with the dataset itself + data_config_file = "data/meps_example_reduced/data_config.yaml" + dataset_name = "meps_example_reduced" + + dataset = WeatherDataset(dataset_name="meps_example_reduced") + config = Config.from_file(data_config_file) + + var_names = config.values["dataset"]["var_names"] + var_units = config.values["dataset"]["var_units"] + var_longnames = config.values["dataset"]["var_longnames"] + + assert len(var_names) == len(var_longnames) + assert len(var_names) == len(var_units) + + # in future the number of grid static features + # will be provided by the Dataset class itself + n_grid_static_features = 4 + # Hardcoded in model + n_input_steps = 2 + + n_forcing_features = config.values["dataset"]["num_forcing_features"] + n_state_features = len(var_names) + n_prediction_timesteps = dataset.sample_length - n_input_steps + + nx, ny = config.values["grid_shape_state"] + n_grid = nx * ny + + # check that the dataset is not empty + assert len(dataset) > 0 + + # get the first item + init_states, target_states, forcing = dataset[0] + + # check that the shapes of the tensors are correct + assert init_states.shape == (n_input_steps, n_grid, n_state_features) + assert target_states.shape == ( + n_prediction_timesteps, + n_grid, + n_state_features, + ) + assert forcing.shape == ( + n_prediction_timesteps, + n_grid, + n_forcing_features, + ) + + static_data = load_static_data(dataset_name=dataset_name) + + required_props = { + "border_mask", + "grid_static_features", + "step_diff_mean", + "step_diff_std", + "data_mean", + "data_std", + "param_weights", + } + + # check the sizes of the props + assert static_data["border_mask"].shape == (n_grid, 1) + assert static_data["grid_static_features"].shape == ( + n_grid, + n_grid_static_features, + ) + assert static_data["step_diff_mean"].shape == (n_state_features,) + assert static_data["step_diff_std"].shape == (n_state_features,) + assert static_data["data_mean"].shape == (n_state_features,) + assert static_data["data_std"].shape == (n_state_features,) + assert static_data["param_weights"].shape == (n_state_features,) + + assert set(static_data.keys()) == required_props + + +def test_create_graph_reduced_meps_dataset(): + args = [ + "--graph=hierarchical", + "--hierarchical=1", + "--data_config=data/meps_example_reduced/data_config.yaml", + "--levels=2", + ] + create_mesh(args) + + +def test_train_model_reduced_meps_dataset(): + args = [ + "--model=hi_lam", + "--data_config=data/meps_example_reduced/data_config.yaml", + "--n_workers=4", + "--epochs=1", + "--graph=hierarchical", + "--hidden_dim=16", + "--hidden_layers=1", + "--processor_layers=1", + "--ar_steps=1", + "--eval=val", + "--n_example_pred=0", + ] + train_model(args) diff --git a/train_model.py b/train_model.py index cbd787f0..03863275 100644 --- a/train_model.py +++ b/train_model.py @@ -23,7 +23,7 @@ } -def main(): +def main(input_args=None): """ Main function for training and evaluating models """ @@ -208,11 +208,10 @@ def main(): help="""JSON string with variable-IDs and lead times to log watched metrics (e.g. '{"1": [1, 2], "3": [3, 4]}')""", ) - args = parser.parse_args() + args = parser.parse_args(input_args) args.var_leads_metrics_watch = { int(k): v for k, v in json.loads(args.var_leads_metrics_watch).items() } - config_loader = config.Config.from_file(args.data_config) # Asserts for arguments From c8d3553003923b9cd64bfb9a3f7f5ceb1b2c4133 Mon Sep 17 00:00:00 2001 From: Joel Oskarsson Date: Tue, 4 Jun 2024 21:33:52 +0200 Subject: [PATCH 009/190] Fix swapped x and y dimensions in comments and variable names for MEPS data (#52) The x- and y-dimensions for the MEPS data are swapped in comments describing tensor shapes, and also in some variable names. This change swaps from (x, y) ordering to the correct (y, x) ordering. This fixes #46. See the issue for a more clear description. --- CHANGELOG.md | 5 +++++ create_grid_features.py | 6 +++--- neural_lam/weather_dataset.py | 28 ++++++++++++++-------------- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3544b299..f4680c37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -76,6 +76,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [\#47](https://github.com/mllam/neural-lam/pull/47) @joeloskarsson +- Fix incorrect ordering of x- and y-dimensions in comments describing tensor + shapes for MEPS data + [\#52](https://github.com/mllam/neural-lam/pull/52) + @joeloskarsson + ## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0) First tagged release of `neural-lam`, matching Oskarsson et al 2023 publication diff --git a/create_grid_features.py b/create_grid_features.py index c3714368..4f058e17 100644 --- a/create_grid_features.py +++ b/create_grid_features.py @@ -29,14 +29,14 @@ def main(): # -- Static grid node features -- grid_xy = torch.tensor( np.load(os.path.join(static_dir_path, "nwp_xy.npy")) - ) # (2, N_x, N_y) + ) # (2, N_y, N_x) grid_xy = grid_xy.flatten(1, 2).T # (N_grid, 2) pos_max = torch.max(torch.abs(grid_xy)) grid_xy = grid_xy / pos_max # Divide by maximum coordinate geopotential = torch.tensor( np.load(os.path.join(static_dir_path, "surface_geopotential.npy")) - ) # (N_x, N_y) + ) # (N_y, N_x) geopotential = geopotential.flatten(0, 1).unsqueeze(1) # (N_grid,1) gp_min = torch.min(geopotential) gp_max = torch.max(geopotential) @@ -46,7 +46,7 @@ def main(): grid_border_mask = torch.tensor( np.load(os.path.join(static_dir_path, "border_mask.npy")), dtype=torch.int64, - ) # (N_x, N_y) + ) # (N_y, N_x) grid_border_mask = ( grid_border_mask.flatten(0, 1).to(torch.float).unsqueeze(1) ) # (N_grid, 1) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index a782806b..3288ed67 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -16,8 +16,8 @@ class WeatherDataset(torch.utils.data.Dataset): For our dataset: N_t' = 65 N_t = 65//subsample_step (= 21 for 3h steps) - dim_x = 268 - dim_y = 238 + dim_y = 268 + dim_x = 238 N_grid = 268x238 = 63784 d_features = 17 (d_features' = 18) d_forcing = 5 @@ -87,7 +87,7 @@ def __getitem__(self, idx): try: full_sample = torch.tensor( np.load(sample_path), dtype=torch.float32 - ) # (N_t', dim_x, dim_y, d_features') + ) # (N_t', dim_y, dim_x, d_features') except ValueError: print(f"Failed to load {sample_path}") @@ -101,40 +101,40 @@ def __getitem__(self, idx): sample = full_sample[ subsample_index : subsample_end_index : self.subsample_step ] - # (N_t, dim_x, dim_y, d_features') + # (N_t, dim_y, dim_x, d_features') # Remove feature 15, "z_height_above_ground" sample = torch.cat( (sample[:, :, :, :15], sample[:, :, :, 16:]), dim=3 - ) # (N_t, dim_x, dim_y, d_features) + ) # (N_t, dim_y, dim_x, d_features) # Accumulate solar radiation instead of just subsampling - rad_features = full_sample[:, :, :, 2:4] # (N_t', dim_x, dim_y, 2) + rad_features = full_sample[:, :, :, 2:4] # (N_t', dim_y, dim_x, 2) # Accumulate for first time step init_accum_rad = torch.sum( rad_features[: (subsample_index + 1)], dim=0, keepdim=True - ) # (1, dim_x, dim_y, 2) + ) # (1, dim_y, dim_x, 2) # Accumulate for rest of subsampled sequence in_subsample_len = ( subsample_end_index - self.subsample_step + subsample_index + 1 ) rad_features_in_subsample = rad_features[ (subsample_index + 1) : in_subsample_len - ] # (N_t*, dim_x, dim_y, 2), N_t* = (N_t-1)*ss_step - _, dim_x, dim_y, _ = sample.shape + ] # (N_t*, dim_y, dim_x, 2), N_t* = (N_t-1)*ss_step + _, dim_y, dim_x, _ = sample.shape rest_accum_rad = torch.sum( rad_features_in_subsample.view( self.original_sample_length - 1, self.subsample_step, - dim_x, dim_y, + dim_x, 2, ), dim=1, - ) # (N_t-1, dim_x, dim_y, 2) + ) # (N_t-1, dim_y, dim_x, 2) accum_rad = torch.cat( (init_accum_rad, rest_accum_rad), dim=0 - ) # (N_t, dim_x, dim_y, 2) + ) # (N_t, dim_y, dim_x, 2) # Replace in sample sample[:, :, :, 2:4] = accum_rad @@ -168,7 +168,7 @@ def __getitem__(self, idx): np.load(water_path), dtype=torch.float32 ).unsqueeze( -1 - ) # (dim_x, dim_y, 1) + ) # (dim_y, dim_x, 1) # Flatten water_cover_features = water_cover_features.flatten(0, 1) # (N_grid, 1) # Expand over temporal dimension @@ -183,7 +183,7 @@ def __getitem__(self, idx): ) flux = torch.tensor(np.load(flux_path), dtype=torch.float32).unsqueeze( -1 - ) # (N_t', dim_x, dim_y, 1) + ) # (N_t', dim_y, dim_x, 1) if self.standardize: flux = (flux - self.flux_mean) / self.flux_std From 96f193e0ba61438cdf8033f16d8d4d03a2c6aad7 Mon Sep 17 00:00:00 2001 From: SimonKamuk <43374850+SimonKamuk@users.noreply.github.com> Date: Mon, 10 Jun 2024 11:24:58 +0200 Subject: [PATCH 010/190] Run ci/cd tests on push to main (#55) This is a minor change for ci/cd to also run on pushes to main (which then includes the push created when a branch is merged). Also changed ci/cd badges to only look at the main branch. --- .github/workflows/pre-commit.yml | 4 +--- .github/workflows/run_tests.yml | 4 +--- CHANGELOG.md | 4 ++-- README.md | 4 ++-- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index dadac50d..ad2b1a9c 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -1,10 +1,8 @@ name: Linting on: - # trigger on pushes to any branch, but not main + # trigger on pushes to any branch push: - branches-ignore: - - main # and also on PRs to main pull_request: branches: diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 71bff3d3..4c677908 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -1,10 +1,8 @@ name: Unit Tests on: - # trigger on pushes to any branch, but not main + # trigger on pushes to any branch push: - branches-ignore: - - main # and also on PRs to main pull_request: branches: diff --git a/CHANGELOG.md b/CHANGELOG.md index f4680c37..d109dcb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [unreleased](https://github.com/joeloskarsson/neural-lam/compare/v0.1.0...HEAD) ### Added -- Added tests for loading dataset, creating graph, and training model based on reduced MEPS dataset stored on AWS S3, along with automatic running of tests on push/PR to GitHub. Added caching of test data tp speed up running tests. - [/#38](https://github.com/mllam/neural-lam/pull/38) +- Added tests for loading dataset, creating graph, and training model based on reduced MEPS dataset stored on AWS S3, along with automatic running of tests on push/PR to GitHub, including push to main branch. Added caching of test data to speed up running tests. + [\#38](https://github.com/mllam/neural-lam/pull/38) [\#55](https://github.com/mllam/neural-lam/pull/55) @SimonKamuk - Replaced `constants.py` with `data_config.yaml` for data configuration management diff --git a/README.md b/README.md index 1bdc6602..26d844f7 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -![Linting](https://github.com/mllam/neural-lam/actions/workflows/pre-commit.yml/badge.svg) -![Automatic tests](https://github.com/mllam/neural-lam/actions/workflows/run_tests.yml/badge.svg) +![Linting](https://github.com/mllam/neural-lam/actions/workflows/pre-commit.yml/badge.svg?branch=main) +![Automatic tests](https://github.com/mllam/neural-lam/actions/workflows/run_tests.yml/badge.svg?branch=main)

From 066efe0a77f1ed741d9eb2f303ee920927d79ad2 Mon Sep 17 00:00:00 2001 From: sadamov <45732287+sadamov@users.noreply.github.com> Date: Thu, 13 Jun 2024 08:47:56 +0200 Subject: [PATCH 011/190] Add entry for PR #22 (previously forgotten) (#56) Changelog updated with missing entry for #22 --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d109dcb9..34a8e0e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed + Optional multi-core/GPU support for statistics calculation in `create_parameter_weights.py` + [\#22](https://github.com/mllam/neural-lam/pull/22) + @sadamov + - Robust restoration of optimizer and scheduler using `ckpt_path` [\#17](https://github.com/mllam/neural-lam/pull/17) @sadamov From a4e9a3b6fb3909bfa015511a05562c3d8140f670 Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Thu, 11 Jul 2024 21:29:14 +0200 Subject: [PATCH 012/190] Add draft github pull-request template (#53) Add a pull-request template for use on github when adding pull-requests to the common repo --------- Co-authored-by: Joel Oskarsson Co-authored-by: sadamov <45732287+sadamov@users.noreply.github.com> --- .github/pull_request_template.md | 52 ++++++++++++++++++++++++++++++++ CHANGELOG.md | 3 ++ 2 files changed, 55 insertions(+) create mode 100644 .github/pull_request_template.md diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..b4bf15ea --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,52 @@ +## Describe your changes + +< Summary of the changes.> + +< Please also include relevant motivation and context. > + +< List any dependencies that are required for this change. > + +## Issue Link + +< Link to the relevant issue or task. > (e.g. `closes #00` or `solves #00`) + +## Type of change + +- [ ] 🐛 Bug fix (non-breaking change that fixes an issue) +- [ ] ✨ New feature (non-breaking change that adds functionality) +- [ ] 💥 Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] 📖 Documentation (Addition or improvements to documentation) + +## Checklist before requesting a review + +- [ ] My branch is up-to-date with the target branch - if not update your fork with the changes from the target branch (use `pull` with `--rebase` option if possible). +- [ ] I have performed a self-review of my code +- [ ] For any new/modified functions/classes I have added docstrings that clearly describe its purpose, expected inputs and returned values +- [ ] I have placed in-line comments to clarify the intent of any hard-to-understand passages of my code +- [ ] I have updated the [README](README.MD) to cover introduced code changes +- [ ] I have added tests that prove my fix is effective or that my feature works +- [ ] I have given the PR a name that clearly describes the change, written in imperative form ([context](https://www.gitkraken.com/learn/git/best-practices/git-commit-message#using-imperative-verb-form)). +- [ ] I have requested a reviewer and an assignee (assignee is responsible for merging) + +## Checklist for reviewers + +Each PR comes with its own improvements and flaws. The reviewer should check the following: +- [ ] the code is readable +- [ ] the code is well tested +- [ ] the code is documented (including return types and parameters) +- [ ] the code is easy to maintain + +## Author checklist after completed review + +- [ ] I have added a line to the CHANGELOG describing this change, in a section + reflecting type of change (add section where missing): + - *added*: when you have added new functionality + - *changed*: when default behaviour of the code has been changed + - *fixes*: when your contribution fixes a bug + +## Checklist for assignee + +- [ ] PR is up to date with the base branch +- [ ] the tests pass +- [ ] author has added an entry to the changelog (and designated the change as *added*, *changed* or *fixed*) +- Once the PR is ready to be merged, squash commits and merge the PR. diff --git a/CHANGELOG.md b/CHANGELOG.md index 34a8e0e4..69140a11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [\#6](https://github.com/joeloskarsson/neural-lam/pull/6), [\#8](https://github.com/joeloskarsson/neural-lam/pull/8) @sadamov, @joeloskarsson +- added github pull-request template to ease contribution and review process + [\#53](https://github.com/mllam/neural-lam/pull/53), @leifdenby + ### Changed Optional multi-core/GPU support for statistics calculation in `create_parameter_weights.py` From 72965a9a654b79bf40c3828c3aef51b61e2c1a25 Mon Sep 17 00:00:00 2001 From: Joel Oskarsson Date: Thu, 8 Aug 2024 10:46:15 +0200 Subject: [PATCH 013/190] Cap numpy version to less than 2.0.0 (#68) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Describe your changes Nerual-lam is at the moment not completely compatible with the new release of numpy 2. This PR changes the numpy version in requirements.txt to < 2.0.0. ## Issue Link https://github.com/mllam/neural-lam/issues/67 ## Type of change - [x] 🐛 Bug fix (non-breaking change that fixes an issue) - [ ] ✨ New feature (non-breaking change that adds functionality) - [ ] 💥 Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] 📖 Documentation (Addition or improvements to documentation) ## Checklist before requesting a review - [x] My branch is up-to-date with the target branch - if not update your fork with the changes from the target branch (use `pull` with `--rebase` option if possible). - [x] I have performed a self-review of my code - [x] I have given the PR a name that clearly describes the change, written in imperative form ([context](https://www.gitkraken.com/learn/git/best-practices/git-commit-message#using-imperative-verb-form)). - [x] I have requested a reviewer and an assignee (assignee is responsible for merging) ## Checklist for reviewers Each PR comes with its own improvements and flaws. The reviewer should check the following: - [x] the code is readable - [x] the code is well tested - [x] the code is documented (including return types and parameters) - [x] the code is easy to maintain ## Author checklist after completed review - [x] I have added a line to the CHANGELOG describing this change, in a section reflecting type of change (add section where missing): - *added*: when you have added new functionality - *changed*: when default behaviour of the code has been changed - *fixes*: when your contribution fixes a bug ## Checklist for assignee - [x] PR is up to date with the base branch - [x] the tests pass - [x] author has added an entry to the changelog (and designated the change as *added*, *changed* or *fixed*) - Once the PR is ready to be merged, squash commits and merge the PR. --- CHANGELOG.md | 4 ++++ requirements.txt | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69140a11..dfb186f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -88,6 +88,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [\#52](https://github.com/mllam/neural-lam/pull/52) @joeloskarsson +- Cap numpy version to < 2.0.0 + [\#68](https://github.com/mllam/neural-lam/pull/68) + @joeloskarsson + ## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0) First tagged release of `neural-lam`, matching Oskarsson et al 2023 publication diff --git a/requirements.txt b/requirements.txt index 9309eea4..6bcf304d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # for all -numpy>=1.24.2 +numpy>=1.24.2, <2.0.0 wandb>=0.13.10 matplotlib>=3.7.0 scipy>=1.10.0 From a54c45f82812207e3eb9da7b4e5baafb2e7b4441 Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Mon, 19 Aug 2024 15:44:39 +0200 Subject: [PATCH 014/190] Refactor codebase into a python package (#32) Make it possible to `neural-lam` as a package, thereby enabling the possibility to run from anywhere once the package has been installed. This means it is now possible (in theory) to train neural-lam on a `.npy`-file based dataset with the neural-lam package installed into a user's `site-packages` (i.e. in their virtualenv). The primary changes are: - move all `*.py` that are currently outside of neural_lam/ into that folder, but keep the files the same - change all examples of running the neural-lam "scripts", e.g. `python create_mesh.py` by `python -m neural_lam.create_mesh` in the README - change all absolute imports to package-relative imports, i.e. `from .import utils` rather than `from neural_lam import utils` - add tests that all the CLI entrypoints to neural_lam can be imported and add ci/cd action to run these tests --------- Co-authored-by: SimonKamuk <43374850+SimonKamuk@users.noreply.github.com> Co-authored-by: joeloskarsson Co-authored-by: Leif Denby --- .github/workflows/run_tests.yml | 2 +- .pre-commit-config.yaml | 1 + CHANGELOG.md | 7 ++ README.md | 94 +++++++++---------- neural_lam/__init__.py | 10 ++ .../create_grid_features.py | 4 +- create_mesh.py => neural_lam/create_mesh.py | 4 +- .../create_parameter_weights.py | 5 +- neural_lam/interaction_net.py | 4 +- neural_lam/models/__init__.py | 6 ++ neural_lam/models/ar_model.py | 4 +- neural_lam/models/base_graph_model.py | 8 +- neural_lam/models/base_hi_graph_model.py | 8 +- neural_lam/models/graph_lam.py | 8 +- neural_lam/models/hi_lam.py | 6 +- neural_lam/models/hi_lam_parallel.py | 6 +- train_model.py => neural_lam/train_model.py | 9 +- neural_lam/vis.py | 4 +- neural_lam/weather_dataset.py | 4 +- pyproject.toml | 7 ++ tests/__init__.py | 0 tests/test_cli.py | 18 ++++ tests/test_mllam_dataset.py | 20 ++-- 23 files changed, 141 insertions(+), 98 deletions(-) create mode 100644 neural_lam/__init__.py rename create_grid_features.py => neural_lam/create_grid_features.py (97%) rename create_mesh.py => neural_lam/create_mesh.py (99%) rename create_parameter_weights.py => neural_lam/create_parameter_weights.py (99%) create mode 100644 neural_lam/models/__init__.py rename train_model.py => neural_lam/train_model.py (97%) delete mode 100644 tests/__init__.py create mode 100644 tests/test_cli.py diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 4c677908..810f2b2c 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -35,7 +35,7 @@ jobs: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 - name: Test with pytest run: | - pytest -v -s + python -m pytest -v -s tests/ - name: Save cache data uses: actions/cache/save@v4 with: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 815a92e1..dfbf8b60 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,3 +35,4 @@ repos: hooks: - id: flake8 description: Check Python code for correctness, consistency and adherence to best practices + additional_dependencies: [Flake8-pyproject] diff --git a/CHANGELOG.md b/CHANGELOG.md index dfb186f7..c183888e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -92,6 +92,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [\#68](https://github.com/mllam/neural-lam/pull/68) @joeloskarsson +- turn `neural-lam` into a python package by moving all `*.py`-files into the + `neural_lam/` source directory and updating imports accordingly. This means + all cli functions are now invoke through the package name, e.g. `python -m + neural_lam.train_model` instead of `python train_model.py` (and can be done + anywhere once the package has been installed). + [\#32](https://github.com/mllam/neural-lam/pull/32), @leifdenby + ## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0) First tagged release of `neural-lam`, matching Oskarsson et al 2023 publication diff --git a/README.md b/README.md index 26d844f7..ce8daf69 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Still, some restrictions are inevitable: ## A note on the limited area setting Currently we are using these models on a limited area covering the Nordic region, the so called MEPS area (see [paper](https://arxiv.org/abs/2309.17370)). There are still some parts of the code that is quite specific for the MEPS area use case. -This is in particular true for the mesh graph creation (`create_mesh.py`) and some of the constants set in a `data_config.yaml` file (path specified in `train_model.py --data_config` ). +This is in particular true for the mesh graph creation (`python -m neural_lam.create_mesh`) and some of the constants set in a `data_config.yaml` file (path specified in `python -m neural_lam.train_model --data_config ` ). If there is interest to use Neural-LAM for other areas it is not a substantial undertaking to refactor the code to be fully area-agnostic. We would be happy to support such enhancements. See the issues https://github.com/joeloskarsson/neural-lam/issues/2, https://github.com/joeloskarsson/neural-lam/issues/3 and https://github.com/joeloskarsson/neural-lam/issues/4 for some initial ideas on how this could be done. @@ -62,16 +62,10 @@ Follow the steps below to create the necessary python environment. 1. Install GEOS for your system. For example with `sudo apt-get install libgeos-dev`. This is necessary for the Cartopy requirement. 2. Use python 3.9. 3. Install version 2.0.1 of PyTorch. Follow instructions on the [PyTorch webpage](https://pytorch.org/get-started/previous-versions/) for how to set this up with GPU support on your system. -4. Install required packages specified in `requirements.txt`. -5. Install PyTorch Geometric version 2.2.0. This can be done by running +4. Install `neural-lam` with pip: ``` -TORCH="2.0.1" -CUDA="cu117" - -pip install pyg-lib==0.2.0 torch-scatter==2.1.1 torch-sparse==0.6.17 torch-cluster==1.6.1\ - torch-geometric==2.3.1 -f https://pytorch-geometric.com/whl/torch-${TORCH}+${CUDA}.html +pip install -e . ``` -You will have to adjust the `CUDA` variable to match the CUDA version on your system or to run on CPU. See the [installation webpage](https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html) for more information. ## Data Datasets should be stored in a directory called `data`. @@ -80,39 +74,39 @@ See the [repository format section](#format-of-data-directory) for details on th The full MEPS dataset can be shared with other researchers on request, contact us for this. A tiny subset of the data (named `meps_example`) is available in `example_data.zip`, which can be downloaded from [here](https://liuonline-my.sharepoint.com/:f:/g/personal/joeos82_liu_se/EuiUuiGzFIFHruPWpfxfUmYBSjhqMUjNExlJi9W6ULMZ1w?e=97pnGX). Download the file and unzip in the neural-lam directory. -All graphs used in the paper are also available for download at the same link (but can as easily be re-generated using `create_mesh.py`). -Note that this is far too little data to train any useful models, but all scripts can be ran with it. +All graphs used in the paper are also available for download at the same link (but can as easily be re-generated using `python -m neural_lam.create_mesh`). +Note that this is far too little data to train any useful models, but all pre-processing and training steps can be run with it. It should thus be useful to make sure that your python environment is set up correctly and that all the code can be ran without any issues. ## Pre-processing -An overview of how the different scripts and files depend on each other is given in this figure: +An overview of how the different pre-processing steps, training and files depend on each other is given in this figure:

-In order to start training models at least three pre-processing scripts have to be ran: +In order to start training models at least three pre-processing steps have to be run: -* `create_mesh.py` -* `create_grid_features.py` -* `create_parameter_weights.py` +* `python -m neural_lam.create_mesh` +* `python -m neural_lam.create_grid_features` +* `python -m neural_lam.create_parameter_weights` ### Create graph -Run `create_mesh.py` with suitable options to generate the graph you want to use (see `python create_mesh.py --help` for a list of options). +Run `python -m neural_lam.create_mesh` with suitable options to generate the graph you want to use (see `python neural_lam.create_mesh --help` for a list of options). The graphs used for the different models in the [paper](https://arxiv.org/abs/2309.17370) can be created as: -* **GC-LAM**: `python create_mesh.py --graph multiscale` -* **Hi-LAM**: `python create_mesh.py --graph hierarchical --hierarchical 1` (also works for Hi-LAM-Parallel) -* **L1-LAM**: `python create_mesh.py --graph 1level --levels 1` +* **GC-LAM**: `python -m neural_lam.create_mesh --graph multiscale` +* **Hi-LAM**: `python -m neural_lam.create_mesh --graph hierarchical --hierarchical 1` (also works for Hi-LAM-Parallel) +* **L1-LAM**: `python -m neural_lam.create_mesh --graph 1level --levels 1` The graph-related files are stored in a directory called `graphs`. ### Create remaining static features -To create the remaining static files run the scripts `create_grid_features.py` and `create_parameter_weights.py`. +To create the remaining static files run `python -m neural_lam.create_grid_features` and `python -m neural_lam.create_parameter_weights`. ## Weights & Biases Integration The project is fully integrated with [Weights & Biases](https://www.wandb.ai/) (W&B) for logging and visualization, but can just as easily be used without it. When W&B is used, training configuration, training/test statistics and plots are sent to the W&B servers and made available in an interactive web interface. If W&B is turned off, logging instead saves everything locally to a directory like `wandb/dryrun...`. -The W&B project name is set to `neural-lam`, but this can be changed in the flags of `train_model.py` (using argsparse). +The W&B project name is set to `neural-lam`, but this can be changed in the flags of `python -m neural_lam.train_model` (using argsparse). See the [W&B documentation](https://docs.wandb.ai/) for details. If you would like to login and use W&B, run: @@ -125,8 +119,8 @@ wandb off ``` ## Train Models -Models can be trained using `train_model.py`. -Run `python train_model.py --help` for a full list of training options. +Models can be trained using `python -m neural_lam.train_model`. +Run `python neural_lam.train_model --help` for a full list of training options. A few of the key ones are outlined below: * `--dataset`: Which data to train on @@ -145,12 +139,12 @@ This model class is used both for the L1-LAM and GC-LAM models from the [paper]( To train 1L-LAM use ``` -python train_model.py --model graph_lam --graph 1level ... +python -m neural_lam.train_model --model graph_lam --graph 1level ... ``` To train GC-LAM use ``` -python train_model.py --model graph_lam --graph multiscale ... +python -m neural_lam.train_model --model graph_lam --graph multiscale ... ``` ### Hi-LAM @@ -158,7 +152,7 @@ A version of Graph-LAM that uses a hierarchical mesh graph and performs sequenti To train Hi-LAM use ``` -python train_model.py --model hi_lam --graph hierarchical ... +python -m neural_lam.train_model --model hi_lam --graph hierarchical ... ``` ### Hi-LAM-Parallel @@ -167,13 +161,13 @@ Not included in the paper as initial experiments showed worse results than Hi-LA To train Hi-LAM-Parallel use ``` -python train_model.py --model hi_lam_parallel --graph hierarchical ... +python -m neural_lam.train_model --model hi_lam_parallel --graph hierarchical ... ``` Checkpoint files for our models trained on the MEPS data are available upon request. ## Evaluate Models -Evaluation is also done using `train_model.py`, but using the `--eval` option. +Evaluation is also done using `python -m neural_lam.train_model`, but using the `--eval` option. Use `--eval val` to evaluate the model on the validation set and `--eval test` to evaluate on test data. Most of the training options are also relevant for evaluation (not `ar_steps`, evaluation always unrolls full forecasts). Some options specifically important for evaluation are: @@ -216,13 +210,13 @@ data │ ├── nwp_xy.npy - Coordinates of grid nodes (part of dataset) │ ├── surface_geopotential.npy - Geopotential at surface of grid nodes (part of dataset) │ ├── border_mask.npy - Mask with True for grid nodes that are part of border (part of dataset) -│ ├── grid_features.pt - Static features of grid nodes (create_grid_features.py) -│ ├── parameter_mean.pt - Means of state parameters (create_parameter_weights.py) -│ ├── parameter_std.pt - Std.-dev. of state parameters (create_parameter_weights.py) -│ ├── diff_mean.pt - Means of one-step differences (create_parameter_weights.py) -│ ├── diff_std.pt - Std.-dev. of one-step differences (create_parameter_weights.py) -│ ├── flux_stats.pt - Mean and std.-dev. of solar flux forcing (create_parameter_weights.py) -│ └── parameter_weights.npy - Loss weights for different state parameters (create_parameter_weights.py) +│ ├── grid_features.pt - Static features of grid nodes (neural_lam.create_grid_features) +│ ├── parameter_mean.pt - Means of state parameters (neural_lam.create_parameter_weights) +│ ├── parameter_std.pt - Std.-dev. of state parameters (neural_lam.create_parameter_weights) +│ ├── diff_mean.pt - Means of one-step differences (neural_lam.create_parameter_weights) +│ ├── diff_std.pt - Std.-dev. of one-step differences (neural_lam.create_parameter_weights) +│ ├── flux_stats.pt - Mean and std.-dev. of solar flux forcing (neural_lam.create_parameter_weights) +│ └── parameter_weights.npy - Loss weights for different state parameters (neural_lam.create_parameter_weights) ├── dataset2 ├── ... └── datasetN @@ -234,13 +228,13 @@ The structure is shown with examples below: ``` graphs ├── graph1 - Directory with a graph definition -│ ├── m2m_edge_index.pt - Edges in mesh graph (create_mesh.py) -│ ├── g2m_edge_index.pt - Edges from grid to mesh (create_mesh.py) -│ ├── m2g_edge_index.pt - Edges from mesh to grid (create_mesh.py) -│ ├── m2m_features.pt - Static features of mesh edges (create_mesh.py) -│ ├── g2m_features.pt - Static features of grid to mesh edges (create_mesh.py) -│ ├── m2g_features.pt - Static features of mesh to grid edges (create_mesh.py) -│ └── mesh_features.pt - Static features of mesh nodes (create_mesh.py) +│ ├── m2m_edge_index.pt - Edges in mesh graph (neural_lam.create_mesh) +│ ├── g2m_edge_index.pt - Edges from grid to mesh (neural_lam.create_mesh) +│ ├── m2g_edge_index.pt - Edges from mesh to grid (neural_lam.create_mesh) +│ ├── m2m_features.pt - Static features of mesh edges (neural_lam.create_mesh) +│ ├── g2m_features.pt - Static features of grid to mesh edges (neural_lam.create_mesh) +│ ├── m2g_features.pt - Static features of mesh to grid edges (neural_lam.create_mesh) +│ └── mesh_features.pt - Static features of mesh nodes (neural_lam.create_mesh) ├── graph2 ├── ... └── graphN @@ -250,9 +244,9 @@ graphs To keep track of levels in the mesh graph, a list format is used for the files with mesh graph information. In particular, the files ``` -│ ├── m2m_edge_index.pt - Edges in mesh graph (create_mesh.py) -│ ├── m2m_features.pt - Static features of mesh edges (create_mesh.py) -│ ├── mesh_features.pt - Static features of mesh nodes (create_mesh.py) +│ ├── m2m_edge_index.pt - Edges in mesh graph (neural_lam.create_mesh) +│ ├── m2m_features.pt - Static features of mesh edges (neural_lam.create_mesh) +│ ├── mesh_features.pt - Static features of mesh nodes (neural_lam.create_mesh) ``` all contain lists of length `L`, for a hierarchical mesh graph with `L` layers. For non-hierarchical graphs `L == 1` and these are all just singly-entry lists. @@ -263,10 +257,10 @@ In addition, hierarchical mesh graphs (`L > 1`) feature a few additional files w ``` ├── graph1 │ ├── ... -│ ├── mesh_down_edge_index.pt - Downward edges in mesh graph (create_mesh.py) -│ ├── mesh_up_edge_index.pt - Upward edges in mesh graph (create_mesh.py) -│ ├── mesh_down_features.pt - Static features of downward mesh edges (create_mesh.py) -│ ├── mesh_up_features.pt - Static features of upward mesh edges (create_mesh.py) +│ ├── mesh_down_edge_index.pt - Downward edges in mesh graph (neural_lam.create_mesh) +│ ├── mesh_up_edge_index.pt - Upward edges in mesh graph (neural_lam.create_mesh) +│ ├── mesh_down_features.pt - Static features of downward mesh edges (neural_lam.create_mesh) +│ ├── mesh_up_features.pt - Static features of upward mesh edges (neural_lam.create_mesh) │ ├── ... ``` These files have the same list format as the ones above, but each list has length `L-1` (as these edges describe connections between levels). diff --git a/neural_lam/__init__.py b/neural_lam/__init__.py new file mode 100644 index 00000000..dd565a26 --- /dev/null +++ b/neural_lam/__init__.py @@ -0,0 +1,10 @@ +# First-party +import neural_lam.config +import neural_lam.interaction_net +import neural_lam.metrics +import neural_lam.models +import neural_lam.utils +import neural_lam.vis + +# Local +from .weather_dataset import WeatherDataset diff --git a/create_grid_features.py b/neural_lam/create_grid_features.py similarity index 97% rename from create_grid_features.py rename to neural_lam/create_grid_features.py index 4f058e17..adabd9dc 100644 --- a/create_grid_features.py +++ b/neural_lam/create_grid_features.py @@ -6,8 +6,8 @@ import numpy as np import torch -# First-party -from neural_lam import config +# Local +from . import config def main(): diff --git a/create_mesh.py b/neural_lam/create_mesh.py similarity index 99% rename from create_mesh.py rename to neural_lam/create_mesh.py index 41557a97..40f7ba0e 100644 --- a/create_mesh.py +++ b/neural_lam/create_mesh.py @@ -12,8 +12,8 @@ import torch_geometric as pyg from torch_geometric.utils.convert import from_networkx -# First-party -from neural_lam import config +# Local +from . import config def plot_graph(graph, title=None): diff --git a/create_parameter_weights.py b/neural_lam/create_parameter_weights.py similarity index 99% rename from create_parameter_weights.py rename to neural_lam/create_parameter_weights.py index c85cd5a3..74058d38 100644 --- a/create_parameter_weights.py +++ b/neural_lam/create_parameter_weights.py @@ -10,9 +10,8 @@ from torch.utils.data.distributed import DistributedSampler from tqdm import tqdm -# First-party -from neural_lam import config -from neural_lam.weather_dataset import WeatherDataset +# Local +from . import WeatherDataset, config class PaddedWeatherDataset(torch.utils.data.Dataset): diff --git a/neural_lam/interaction_net.py b/neural_lam/interaction_net.py index 663f27e4..2f45b03f 100644 --- a/neural_lam/interaction_net.py +++ b/neural_lam/interaction_net.py @@ -3,8 +3,8 @@ import torch_geometric as pyg from torch import nn -# First-party -from neural_lam import utils +# Local +from . import utils class InteractionNet(pyg.nn.MessagePassing): diff --git a/neural_lam/models/__init__.py b/neural_lam/models/__init__.py new file mode 100644 index 00000000..f65387ab --- /dev/null +++ b/neural_lam/models/__init__.py @@ -0,0 +1,6 @@ +# Local +from .base_graph_model import BaseGraphModel +from .base_hi_graph_model import BaseHiGraphModel +from .graph_lam import GraphLAM +from .hi_lam import HiLAM +from .hi_lam_parallel import HiLAMParallel diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 6ced211f..e94de8c6 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -8,8 +8,8 @@ import torch import wandb -# First-party -from neural_lam import config, metrics, utils, vis +# Local +from .. import config, metrics, utils, vis class ARModel(pl.LightningModule): diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index 256d4adc..99629073 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -1,10 +1,10 @@ # Third-party import torch -# First-party -from neural_lam import utils -from neural_lam.interaction_net import InteractionNet -from neural_lam.models.ar_model import ARModel +# Local +from .. import utils +from ..interaction_net import InteractionNet +from .ar_model import ARModel class BaseGraphModel(ARModel): diff --git a/neural_lam/models/base_hi_graph_model.py b/neural_lam/models/base_hi_graph_model.py index 3fd30579..a2ebcc1b 100644 --- a/neural_lam/models/base_hi_graph_model.py +++ b/neural_lam/models/base_hi_graph_model.py @@ -1,10 +1,10 @@ # Third-party from torch import nn -# First-party -from neural_lam import utils -from neural_lam.interaction_net import InteractionNet -from neural_lam.models.base_graph_model import BaseGraphModel +# Local +from .. import utils +from ..interaction_net import InteractionNet +from .base_graph_model import BaseGraphModel class BaseHiGraphModel(BaseGraphModel): diff --git a/neural_lam/models/graph_lam.py b/neural_lam/models/graph_lam.py index f767fba0..d73f7ad8 100644 --- a/neural_lam/models/graph_lam.py +++ b/neural_lam/models/graph_lam.py @@ -1,10 +1,10 @@ # Third-party import torch_geometric as pyg -# First-party -from neural_lam import utils -from neural_lam.interaction_net import InteractionNet -from neural_lam.models.base_graph_model import BaseGraphModel +# Local +from .. import utils +from ..interaction_net import InteractionNet +from .base_graph_model import BaseGraphModel class GraphLAM(BaseGraphModel): diff --git a/neural_lam/models/hi_lam.py b/neural_lam/models/hi_lam.py index 4d7eb94c..4f3aec05 100644 --- a/neural_lam/models/hi_lam.py +++ b/neural_lam/models/hi_lam.py @@ -1,9 +1,9 @@ # Third-party from torch import nn -# First-party -from neural_lam.interaction_net import InteractionNet -from neural_lam.models.base_hi_graph_model import BaseHiGraphModel +# Local +from ..interaction_net import InteractionNet +from .base_hi_graph_model import BaseHiGraphModel class HiLAM(BaseHiGraphModel): diff --git a/neural_lam/models/hi_lam_parallel.py b/neural_lam/models/hi_lam_parallel.py index 740824e1..b40a9424 100644 --- a/neural_lam/models/hi_lam_parallel.py +++ b/neural_lam/models/hi_lam_parallel.py @@ -2,9 +2,9 @@ import torch import torch_geometric as pyg -# First-party -from neural_lam.interaction_net import InteractionNet -from neural_lam.models.base_hi_graph_model import BaseHiGraphModel +# Local +from ..interaction_net import InteractionNet +from .base_hi_graph_model import BaseHiGraphModel class HiLAMParallel(BaseHiGraphModel): diff --git a/train_model.py b/neural_lam/train_model.py similarity index 97% rename from train_model.py rename to neural_lam/train_model.py index 03863275..39f7aecd 100644 --- a/train_model.py +++ b/neural_lam/train_model.py @@ -9,12 +9,9 @@ import torch from lightning_fabric.utilities import seed -# First-party -from neural_lam import config, utils -from neural_lam.models.graph_lam import GraphLAM -from neural_lam.models.hi_lam import HiLAM -from neural_lam.models.hi_lam_parallel import HiLAMParallel -from neural_lam.weather_dataset import WeatherDataset +# Local +from . import WeatherDataset, config, utils +from .models import GraphLAM, HiLAM, HiLAMParallel MODELS = { "graph_lam": GraphLAM, diff --git a/neural_lam/vis.py b/neural_lam/vis.py index 8c9ca77c..2f22bef1 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -3,8 +3,8 @@ import matplotlib.pyplot as plt import numpy as np -# First-party -from neural_lam import utils +# Local +from . import utils @matplotlib.rc_context(utils.fractional_plot_bundle(1)) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 3288ed67..29977789 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -7,8 +7,8 @@ import numpy as np import torch -# First-party -from neural_lam import utils +# Local +from . import utils class WeatherDataset(torch.utils.data.Dataset): diff --git a/pyproject.toml b/pyproject.toml index b513a258..c482abc9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,10 @@ +[project] +name = "neural-lam" +version = "0.1.0" + +[tool.setuptools] +py-modules = ["neural_lam"] + [tool.black] line-length = 80 diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 00000000..e90daa04 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,18 @@ +# First-party +import neural_lam +import neural_lam.create_grid_features +import neural_lam.create_mesh +import neural_lam.create_parameter_weights +import neural_lam.train_model + + +def test_import(): + """ + This test just ensures that each cli entry-point can be imported for now, + eventually we should test their execution too + """ + assert neural_lam is not None + assert neural_lam.create_mesh is not None + assert neural_lam.create_grid_features is not None + assert neural_lam.create_parameter_weights is not None + assert neural_lam.train_model is not None diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py index f91170c9..e12a57ae 100644 --- a/tests/test_mllam_dataset.py +++ b/tests/test_mllam_dataset.py @@ -1,15 +1,17 @@ # Standard library import os +from pathlib import Path # Third-party import pooch +import pytest # First-party -from create_mesh import main as create_mesh from neural_lam.config import Config +from neural_lam.create_mesh import main as create_mesh +from neural_lam.train_model import main as train_model from neural_lam.utils import load_static_data from neural_lam.weather_dataset import WeatherDataset -from train_model import main as train_model # Disable weights and biases to avoid unnecessary logging # and to avoid having to deal with authentication @@ -25,7 +27,8 @@ ) -def test_retrieve_data_ewc(): +@pytest.fixture(scope="module") +def meps_example_reduced_filepath(): # Download and unzip test data into data/meps_example_reduced pooch.retrieve( url=S3_FULL_PATH, @@ -34,16 +37,17 @@ def test_retrieve_data_ewc(): path="data", fname="meps_example_reduced.zip", ) + return Path("data/meps_example_reduced") -def test_load_reduced_meps_dataset(): +def test_load_reduced_meps_dataset(meps_example_reduced_filepath): # The data_config.yaml file is downloaded and extracted in # test_retrieve_data_ewc together with the dataset itself - data_config_file = "data/meps_example_reduced/data_config.yaml" - dataset_name = "meps_example_reduced" + data_config_file = meps_example_reduced_filepath / "data_config.yaml" + dataset_name = meps_example_reduced_filepath.name - dataset = WeatherDataset(dataset_name="meps_example_reduced") - config = Config.from_file(data_config_file) + dataset = WeatherDataset(dataset_name=dataset_name) + config = Config.from_file(str(data_config_file)) var_names = config.values["dataset"]["var_names"] var_units = config.values["dataset"]["var_units"] From 4969f92ad974f136089d15e7e2e2e9d73a43590d Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Tue, 20 Aug 2024 14:32:09 +0200 Subject: [PATCH 015/190] Move deps to pyproject toml and setup ci/cd cpu+gpu install testing (#37) Move dependencies from `requirements.txt` to`pyproject.toml`, introduce pdm/pip tests for both CPU and GPU based testing to ensure this new way of defining dependencies works for both CPU and GPU installs (GPU tests running on AWS EC2) and remove cap on numpy version (to make it clear we support `numpy >= 2.0.0`. --- .cirun.yml | 16 +++++ .../workflows/ci-pdm-install-and-test-cpu.yml | 55 +++++++++++++++++ .../workflows/ci-pdm-install-and-test-gpu.yml | 60 +++++++++++++++++++ .../workflows/ci-pip-install-and-test-cpu.yml | 45 ++++++++++++++ .../workflows/ci-pip-install-and-test-gpu.yml | 50 ++++++++++++++++ .github/workflows/run_tests.yml | 43 ------------- .gitignore | 5 ++ CHANGELOG.md | 11 +++- README.md | 42 ++++++++++--- pyproject.toml | 37 ++++++++++++ requirements.txt | 17 ------ tests/test_imports.py | 8 +++ 12 files changed, 319 insertions(+), 70 deletions(-) create mode 100644 .cirun.yml create mode 100644 .github/workflows/ci-pdm-install-and-test-cpu.yml create mode 100644 .github/workflows/ci-pdm-install-and-test-gpu.yml create mode 100644 .github/workflows/ci-pip-install-and-test-cpu.yml create mode 100644 .github/workflows/ci-pip-install-and-test-gpu.yml delete mode 100644 .github/workflows/run_tests.yml delete mode 100644 requirements.txt create mode 100644 tests/test_imports.py diff --git a/.cirun.yml b/.cirun.yml new file mode 100644 index 00000000..21b03ab4 --- /dev/null +++ b/.cirun.yml @@ -0,0 +1,16 @@ +# setup for using github runners via https://cirun.io/ +runners: + - name: "aws-runner" + # Cloud Provider: AWS + cloud: "aws" + # https://aws.amazon.com/ec2/instance-types/g4/ + instance_type: "g4ad.xlarge" + # Deep Learning Base OSS Nvidia Driver GPU AMI (Ubuntu 22.04), Frankfurt region + machine_image: "ami-0ba41b554b28d24a4" + # use Frankfurt region + region: "eu-central-1" + preemptible: false + # Add this label in the "runs-on" param in .github/workflows/.yml + # So that this runner is created for running the workflow + labels: + - "cirun-aws-runner" diff --git a/.github/workflows/ci-pdm-install-and-test-cpu.yml b/.github/workflows/ci-pdm-install-and-test-cpu.yml new file mode 100644 index 00000000..c5da88cc --- /dev/null +++ b/.github/workflows/ci-pdm-install-and-test-cpu.yml @@ -0,0 +1,55 @@ +# cicd workflow for running tests with pytest +# needs to first install pdm, then install torch cpu manually and then install the package +# then run the tests + +name: test (pdm install, cpu) + +on: [push, pull_request] + +jobs: + tests: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Install pdm + run: | + python -m pip install pdm + + - name: Create venv + run: | + pdm venv create --with-pip + pdm use --venv in-project + + - name: Install torch (CPU) + run: | + pdm run python -m pip install torch --index-url https://download.pytorch.org/whl/cpu + # check that the CPU version is installed + + - name: Install package (including dev dependencies) + run: | + pdm install --group :all + + - name: Print and check torch version + run: | + pdm run python -c "import torch; print(torch.__version__)" + pdm run python -c "import torch; assert torch.__version__.endswith('+cpu')" + + - name: Load cache data + uses: actions/cache/restore@v4 + with: + path: data + key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + restore-keys: | + ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + + - name: Run tests + run: | + pdm run pytest + + - name: Save cache data + uses: actions/cache/save@v4 + with: + path: data + key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 diff --git a/.github/workflows/ci-pdm-install-and-test-gpu.yml b/.github/workflows/ci-pdm-install-and-test-gpu.yml new file mode 100644 index 00000000..9ab4f379 --- /dev/null +++ b/.github/workflows/ci-pdm-install-and-test-gpu.yml @@ -0,0 +1,60 @@ +# cicd workflow for running tests with pytest +# needs to first install pdm, then install torch cpu manually and then install the package +# then run the tests + +name: test (pdm install, gpu) + +on: [push, pull_request] + +jobs: + tests: + runs-on: "cirun-aws-runner--${{ github.run_id }}" + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install pdm + run: | + python -m pip install pdm + + - name: Create venv + run: | + pdm config venv.in_project False + pdm config venv.location /opt/dlami/nvme/venv + pdm venv create --with-pip + + - name: Install torch (GPU CUDA 12.1) + run: | + pdm run python -m pip install torch --index-url https://download.pytorch.org/whl/cu121 + + - name: Print and check torch version + run: | + pdm run python -c "import torch; print(torch.__version__)" + pdm run python -c "import torch; assert not torch.__version__.endswith('+cpu')" + + - name: Install package (including dev dependencies) + run: | + pdm install --group :all + + - name: Load cache data + uses: actions/cache/restore@v4 + with: + path: data + key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + restore-keys: | + ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + + - name: Run tests + run: | + pdm run pytest + + - name: Save cache data + uses: actions/cache/save@v4 + with: + path: data + key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 diff --git a/.github/workflows/ci-pip-install-and-test-cpu.yml b/.github/workflows/ci-pip-install-and-test-cpu.yml new file mode 100644 index 00000000..81e402c5 --- /dev/null +++ b/.github/workflows/ci-pip-install-and-test-cpu.yml @@ -0,0 +1,45 @@ +# cicd workflow for running tests with pytest +# needs to first install pdm, then install torch cpu manually and then install the package +# then run the tests + +name: test (pip install, cpu) + +on: [push, pull_request] + +jobs: + tests: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Install torch (CPU) + run: | + python -m pip install torch --index-url https://download.pytorch.org/whl/cpu + + - name: Install package (including dev dependencies) + run: | + python -m pip install ".[dev]" + + - name: Print and check torch version + run: | + python -c "import torch; print(torch.__version__)" + python -c "import torch; assert torch.__version__.endswith('+cpu')" + + - name: Load cache data + uses: actions/cache/restore@v4 + with: + path: data + key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + restore-keys: | + ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + + - name: Run tests + run: | + python -m pytest + + - name: Save cache data + uses: actions/cache/save@v4 + with: + path: data + key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 diff --git a/.github/workflows/ci-pip-install-and-test-gpu.yml b/.github/workflows/ci-pip-install-and-test-gpu.yml new file mode 100644 index 00000000..ce68946a --- /dev/null +++ b/.github/workflows/ci-pip-install-and-test-gpu.yml @@ -0,0 +1,50 @@ +# cicd workflow for running tests with pytest +# needs to first install pdm, then install torch cpu manually and then install the package +# then run the tests + +name: test (pip install, gpu) + +on: [push, pull_request] + +jobs: + tests: + runs-on: "cirun-aws-runner--${{ github.run_id }}" + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install torch (GPU CUDA 12.1) + run: | + python -m pip install torch --index-url https://download.pytorch.org/whl/cu121 + + - name: Install package (including dev dependencies) + run: | + python -m pip install ".[dev]" + + - name: Print and check torch version + run: | + python -c "import torch; print(torch.__version__)" + python -c "import torch; assert not torch.__version__.endswith('+cpu')" + + - name: Load cache data + uses: actions/cache/restore@v4 + with: + path: data + key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + restore-keys: | + ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + + - name: Run tests + run: | + python -m pytest + + - name: Save cache data + uses: actions/cache/save@v4 + with: + path: data + key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml deleted file mode 100644 index 810f2b2c..00000000 --- a/.github/workflows/run_tests.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: Unit Tests - -on: - # trigger on pushes to any branch - push: - # and also on PRs to main - pull_request: - branches: - - main - -jobs: - build: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] - - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - pip install torch-geometric>=2.5.2 - - name: Load cache data - uses: actions/cache/restore@v4 - with: - path: data - key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 - restore-keys: | - ${{ runner.os }}-meps-reduced-example-data-v0.1.0 - - name: Test with pytest - run: | - python -m pytest -v -s tests/ - - name: Save cache data - uses: actions/cache/save@v4 - with: - path: data - key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 diff --git a/.gitignore b/.gitignore index 65e9f6f8..022206f5 100644 --- a/.gitignore +++ b/.gitignore @@ -75,3 +75,8 @@ tags # Coc configuration directory .vim + +# pdm (https://pdm-project.org/en/stable/) +.pdm-python +# exclude pdm.lock file so that both cpu and gpu versions of torch will be accepted by pdm +pdm.lock diff --git a/CHANGELOG.md b/CHANGELOG.md index c183888e..f7c5cd63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - added github pull-request template to ease contribution and review process [\#53](https://github.com/mllam/neural-lam/pull/53), @leifdenby +- ci/cd setup for running both CPU and GPU-based testing both with pdm and pip based installs [\#37](https://github.com/mllam/neural-lam/pull/37), @khintz, @leifdenby + ### Changed Optional multi-core/GPU support for statistics calculation in `create_parameter_weights.py` @@ -88,10 +90,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [\#52](https://github.com/mllam/neural-lam/pull/52) @joeloskarsson -- Cap numpy version to < 2.0.0 +- Cap numpy version to < 2.0.0 (this cap was removed in #37, see below) [\#68](https://github.com/mllam/neural-lam/pull/68) @joeloskarsson +- Remove numpy < 2.0.0 version cap + [\#37](https://github.com/mllam/neural-lam/pull/37) + @leifdenby + - turn `neural-lam` into a python package by moving all `*.py`-files into the `neural_lam/` source directory and updating imports accordingly. This means all cli functions are now invoke through the package name, e.g. `python -m @@ -99,6 +105,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 anywhere once the package has been installed). [\#32](https://github.com/mllam/neural-lam/pull/32), @leifdenby +- move from `requirements.txt` to `pyproject.toml` for defining package dependencies. + [\#37](https://github.com/mllam/neural-lam/pull/37), @leifdenby + ## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0) First tagged release of `neural-lam`, matching Oskarsson et al 2023 publication diff --git a/README.md b/README.md index ce8daf69..7dc6c7ab 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ ![Linting](https://github.com/mllam/neural-lam/actions/workflows/pre-commit.yml/badge.svg?branch=main) -![Automatic tests](https://github.com/mllam/neural-lam/actions/workflows/run_tests.yml/badge.svg?branch=main) +[![test (pdm install, gpu)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-gpu.yml/badge.svg)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-gpu.yml) +[![test (pdm install, cpu)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-cpu.yml/badge.svg)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-cpu.yml)

@@ -57,15 +58,38 @@ See the issues https://github.com/joeloskarsson/neural-lam/issues/2, https://git Below follows instructions on how to use Neural-LAM to train and evaluate models. ## Installation -Follow the steps below to create the necessary python environment. -1. Install GEOS for your system. For example with `sudo apt-get install libgeos-dev`. This is necessary for the Cartopy requirement. -2. Use python 3.9. -3. Install version 2.0.1 of PyTorch. Follow instructions on the [PyTorch webpage](https://pytorch.org/get-started/previous-versions/) for how to set this up with GPU support on your system. -4. Install `neural-lam` with pip: -``` -pip install -e . -``` +When installing `neural-lam` you have a choice of either installing with +directly `pip` or using the `pdm` package manager. +We recommend using `pdm` as it makes it easy to add/remove packages while +keeping versions consistent (it automatically updates the `pyproject.toml` +file), makes it easy to handle virtual environments and includes the +development toolchain packages installation too. + +**regarding `torch` installation**: because `torch` creates different package +variants for different CUDA versions and cpu-only support you will need to install +`torch` separately if you don't want the most recent GPU variant that also +expects the most recent version of CUDA on your system. + +We cover all the installation options in our [github actions ci/cd +setup](.github/workflows/) which you can use as a reference. + +### Using `pdm` + +1. Clone this repository and navigate to the root directory. +2. Install `pdm` if you don't have it installed on your system (either with `pip install pdm` or [following the install instructions](https://pdm-project.org/latest/#installation)). +> If you are happy using the latest version of `torch` with GPU support (expecting the latest version of CUDA is installed on your system) you can skip to step 5. +3. Create a virtual environment for pdm to use with `pdm venv create --with-pip`. +4. Install a specific version of `torch` with `pdm run python -m pip install torch --index-url https://download.pytorch.org/whl/cpu` for a CPU-only version or `pdm run python -m pip install torch --index-url https://download.pytorch.org/whl/cu111` for CUDA 11.1 support (you can find the correct URL for the variant you want on [PyTorch webpage](https://pytorch.org/get-started/locally/)). +5. Install the dependencies with `pdm install` (by default this in include the). If you will be developing `neural-lam` we recommend to install the development dependencies with `pdm install --group dev`. By default `pdm` installs the `neural-lam` package in editable mode, so you can make changes to the code and see the effects immediately. + +### Using `pip` + +1. Clone this repository and navigate to the root directory. +> If you are happy using the latest version of `torch` with GPU support (expecting the latest version of CUDA is installed on your system) you can skip to step 3. +2. Install a specific version of `torch` with `python -m pip install torch --index-url https://download.pytorch.org/whl/cpu` for a CPU-only version or `python -m pip install torch --index-url https://download.pytorch.org/whl/cu111` for CUDA 11.1 support (you can find the correct URL for the variant you want on [PyTorch webpage](https://pytorch.org/get-started/locally/)). +3. Install the dependencies with `python -m pip install .`. If you will be developing `neural-lam` we recommend to install in editable mode and install the development dependencies with `python -m pip install -e ".[dev]"` so you can make changes to the code and see the effects immediately. + ## Data Datasets should be stored in a directory called `data`. diff --git a/pyproject.toml b/pyproject.toml index c482abc9..d66c0087 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,38 @@ [project] name = "neural-lam" version = "0.1.0" +description = "LAM-based data-driven forecasting" +authors = [ + {name = "Joel Oskarsson", email = "joel.oskarsson@liu.se"}, + {name = "Simon Adamov", email = "Simon.Adamov@meteoswiss.ch"}, + {name = "Leif Denby", email = "lcd@dmi.dk"}, +] + +# PEP 621 project metadata +# See https://www.python.org/dev/peps/pep-0621/ +dependencies = [ + "numpy>=1.24.2", + "wandb>=0.13.10", + "scipy>=1.10.0", + "pytorch-lightning>=2.0.3", + "shapely>=2.0.1", + "networkx>=3.0", + "Cartopy>=0.22.0", + "pyproj>=3.4.1", + "tueplots>=0.0.8", + "matplotlib>=3.7.0", + "plotly>=5.15.0", + "torch>=2.3.0", + "torch-geometric==2.3.1", +] +requires-python = ">=3.9" +[project.optional-dependencies] +dev = [ + "pre-commit>=3.8.0", + "pytest>=8.3.2", + "pooch>=1.8.2", +] [tool.setuptools] py-modules = ["neural_lam"] @@ -70,3 +101,9 @@ max-statements=100 # Allow for some more involved functions allow-any-import-level="neural_lam" [tool.pylint.SIMILARITIES] min-similarity-lines=10 + + +[tool.pdm] +[build-system] +requires = ["pdm-backend"] +build-backend = "pdm.backend" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 6bcf304d..00000000 --- a/requirements.txt +++ /dev/null @@ -1,17 +0,0 @@ -# for all -numpy>=1.24.2, <2.0.0 -wandb>=0.13.10 -matplotlib>=3.7.0 -scipy>=1.10.0 -pytorch-lightning>=2.0.3 -shapely>=2.0.1 -networkx>=3.0 -Cartopy>=0.22.0 -pyproj>=3.4.1 -tueplots>=0.0.8 -plotly>=5.15.0 - -# for dev -pre-commit>=2.15.0 -pytest>=8.1.1 -pooch>=1.8.1 diff --git a/tests/test_imports.py b/tests/test_imports.py new file mode 100644 index 00000000..e7bbd356 --- /dev/null +++ b/tests/test_imports.py @@ -0,0 +1,8 @@ +# First-party +import neural_lam +import neural_lam.vis + + +def test_import(): + assert neural_lam is not None + assert neural_lam.vis is not None From 68399f7e0a3964bc73ec798b8b3c08cd761a3eb1 Mon Sep 17 00:00:00 2001 From: Erik Larsson <86654747+ErikLarssonDev@users.noreply.github.com> Date: Thu, 5 Sep 2024 13:59:26 +0200 Subject: [PATCH 016/190] Update Argument Parser to use action="store_true" instead of 0/1 for boolean arguments. (#72) I have replaced the parser.add_argument where the standard has been to have an integer 0=False, 1=True with an action="store_true" instead. Example: OLD: parser.add_argument( "--restore_opt", type=int, default=0, help="If optimizer state should be restored with model " "(default: 0 (false))", ) NEW: parser.add_argument( "--restore_opt", action="store_true", help="If optimizer state should be restored with model " "(default: false)", ) This will save some time and characters when running the scripts from the command line as well as being easier to understand as the parsed variables are supposed to be booleans. --- CHANGELOG.md | 6 +++++- README.md | 2 +- neural_lam/create_mesh.py | 10 ++++------ neural_lam/create_parameter_weights.py | 5 ++--- neural_lam/train_model.py | 26 +++++++++++--------------- plot_graph.py | 5 ++--- tests/test_mllam_dataset.py | 2 +- 7 files changed, 26 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7c5cd63..7348f6a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,7 +35,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Optional multi-core/GPU support for statistics calculation in `create_parameter_weights.py` +- Argument Parser updated to use action="store_true" instead of 0/1 for boolean arguments. + (https://github.com/mllam/neural-lam/pull/72) + @ErikLarssonDev + +- Optional multi-core/GPU support for statistics calculation in `create_parameter_weights.py` [\#22](https://github.com/mllam/neural-lam/pull/22) @sadamov diff --git a/README.md b/README.md index 7dc6c7ab..41b03219 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ Run `python -m neural_lam.create_mesh` with suitable options to generate the gra The graphs used for the different models in the [paper](https://arxiv.org/abs/2309.17370) can be created as: * **GC-LAM**: `python -m neural_lam.create_mesh --graph multiscale` -* **Hi-LAM**: `python -m neural_lam.create_mesh --graph hierarchical --hierarchical 1` (also works for Hi-LAM-Parallel) +* **Hi-LAM**: `python -m neural_lam.create_mesh --graph hierarchical --hierarchical` (also works for Hi-LAM-Parallel) * **L1-LAM**: `python -m neural_lam.create_mesh --graph 1level --levels 1` The graph-related files are stored in a directory called `graphs`. diff --git a/neural_lam/create_mesh.py b/neural_lam/create_mesh.py index 40f7ba0e..21b8bf6e 100644 --- a/neural_lam/create_mesh.py +++ b/neural_lam/create_mesh.py @@ -169,10 +169,9 @@ def main(input_args=None): ) parser.add_argument( "--plot", - type=int, - default=0, + action="store_true", help="If graphs should be plotted during generation " - "(default: 0 (false))", + "(default: False)", ) parser.add_argument( "--levels", @@ -182,9 +181,8 @@ def main(input_args=None): ) parser.add_argument( "--hierarchical", - type=int, - default=0, - help="Generate hierarchical mesh graph (default: 0, no)", + action="store_true", + help="Generate hierarchical mesh graph (default: False)", ) args = parser.parse_args(input_args) diff --git a/neural_lam/create_parameter_weights.py b/neural_lam/create_parameter_weights.py index 74058d38..4867e609 100644 --- a/neural_lam/create_parameter_weights.py +++ b/neural_lam/create_parameter_weights.py @@ -156,9 +156,8 @@ def main(): ) parser.add_argument( "--distributed", - type=int, - default=0, - help="Run the script in distributed mode (1) or not (0) (default: 0)", + action="store_true", + help="Run the script in distributed mode (default: False)", ) args = parser.parse_args() distributed = bool(args.distributed) diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index 39f7aecd..c1a6cb89 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -41,10 +41,9 @@ def main(input_args=None): ) parser.add_argument( "--subset_ds", - type=int, - default=0, + action="store_true", help="Use only a small subset of the dataset, for debugging" - "(default: 0=false)", + "(default: false)", ) parser.add_argument( "--seed", type=int, default=42, help="random seed (default: 42)" @@ -71,10 +70,9 @@ def main(input_args=None): ) parser.add_argument( "--restore_opt", - type=int, - default=0, + action="store_true", help="If optimizer state should be restored with model " - "(default: 0 (false))", + "(default: false)", ) parser.add_argument( "--precision", @@ -118,11 +116,10 @@ def main(input_args=None): ) parser.add_argument( "--output_std", - type=int, - default=0, + action="store_true", help="If models should additionally output std.-dev. per " "output dimensions " - "(default: 0 (no))", + "(default: False (no))", ) # Training options @@ -135,10 +132,9 @@ def main(input_args=None): ) parser.add_argument( "--control_only", - type=int, - default=0, + action="store_true", help="Train only on control member of ensemble data " - "(default: 0 (False))", + "(default: False)", ) parser.add_argument( "--loss", @@ -233,7 +229,7 @@ def main(input_args=None): pred_length=args.ar_steps, split="train", subsample_step=args.step_length, - subset=bool(args.subset_ds), + subset=args.subset_ds, control_only=args.control_only, ), args.batch_size, @@ -247,7 +243,7 @@ def main(input_args=None): pred_length=max_pred_length, split="val", subsample_step=args.step_length, - subset=bool(args.subset_ds), + subset=args.subset_ds, control_only=args.control_only, ), args.batch_size, @@ -313,7 +309,7 @@ def main(input_args=None): pred_length=max_pred_length, split="test", subsample_step=args.step_length, - subset=bool(args.subset_ds), + subset=args.subset_ds, ), args.batch_size, shuffle=False, diff --git a/plot_graph.py b/plot_graph.py index 90462194..e47e62c0 100644 --- a/plot_graph.py +++ b/plot_graph.py @@ -38,9 +38,8 @@ def main(): ) parser.add_argument( "--show_axis", - type=int, - default=0, - help="If the axis should be displayed (default: 0 (No))", + action="store_true", + help="If the axis should be displayed (default: False)", ) args = parser.parse_args() diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py index e12a57ae..5c8b7aa1 100644 --- a/tests/test_mllam_dataset.py +++ b/tests/test_mllam_dataset.py @@ -118,7 +118,7 @@ def test_load_reduced_meps_dataset(meps_example_reduced_filepath): def test_create_graph_reduced_meps_dataset(): args = [ "--graph=hierarchical", - "--hierarchical=1", + "--hierarchical", "--data_config=data/meps_example_reduced/data_config.yaml", "--levels=2", ] From a2ddcd4298d7865225c45866e7495664b3e382ab Mon Sep 17 00:00:00 2001 From: Joel Oskarsson Date: Tue, 10 Sep 2024 16:54:44 +0200 Subject: [PATCH 017/190] Clarify PR template (#74) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Describe your changes As a new contributor you can not actually assign reviewers and asignees to PRs. But we request this in the PR template: https://github.com/mllam/neural-lam/blob/4969f92ad974f136089d15e7e2e2e9d73a43590d/.github/pull_request_template.md?plain=1#L29 This change clarifies the PR template to state that you only have to do this if you are able to. Otherwise we instruct contributors to tag a maintainer to add reviewer and asignee. ## Issue Link Solves #73 ## Type of change - [ ] 🐛 Bug fix (non-breaking change that fixes an issue) - [ ] ✨ New feature (non-breaking change that adds functionality) - [ ] 💥 Breaking change (fix or feature that would cause existing functionality to not work as expected) - [x] 📖 Documentation (Addition or improvements to documentation) ## Checklist before requesting a review - [x] My branch is up-to-date with the target branch - if not update your fork with the changes from the target branch (use `pull` with `--rebase` option if possible). - [x] I have performed a self-review of my code - [x] For any new/modified functions/classes I have added docstrings that clearly describe its purpose, expected inputs and returned values - [x] I have placed in-line comments to clarify the intent of any hard-to-understand passages of my code - [x] I have updated the [README](README.MD) to cover introduced code changes - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have given the PR a name that clearly describes the change, written in imperative form ([context](https://www.gitkraken.com/learn/git/best-practices/git-commit-message#using-imperative-verb-form)). - [x] I have requested a reviewer and an assignee (assignee is responsible for merging) ## Checklist for reviewers Each PR comes with its own improvements and flaws. The reviewer should check the following: - [ ] the code is readable - [ ] the code is well tested - [ ] the code is documented (including return types and parameters) - [ ] the code is easy to maintain ## Author checklist after completed review - [x] I have added a line to the CHANGELOG describing this change, in a section reflecting type of change (add section where missing): - *added*: when you have added new functionality - *changed*: when default behaviour of the code has been changed - *fixes*: when your contribution fixes a bug ## Checklist for assignee - [x] PR is up to date with the base branch - [x] the tests pass - [x] author has added an entry to the changelog (and designated the change as *added*, *changed* or *fixed*) - Once the PR is ready to be merged, squash commits and merge the PR. --- .github/pull_request_template.md | 2 +- CHANGELOG.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index b4bf15ea..9d4aeb54 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -26,7 +26,7 @@ - [ ] I have updated the [README](README.MD) to cover introduced code changes - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have given the PR a name that clearly describes the change, written in imperative form ([context](https://www.gitkraken.com/learn/git/best-practices/git-commit-message#using-imperative-verb-form)). -- [ ] I have requested a reviewer and an assignee (assignee is responsible for merging) +- [ ] I have requested a reviewer and an assignee (assignee is responsible for merging). This applies only if you have write access to the repo, otherwise feel free to tag a maintainer to add a reviewer and assignee. ## Checklist for reviewers diff --git a/CHANGELOG.md b/CHANGELOG.md index 7348f6a0..b4f7eb07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- Clarify routine around requesting reviewer and assignee in PR template + [\#74](https://github.com/mllam/neural-lam/pull/74) + @joeloskarsson + - Argument Parser updated to use action="store_true" instead of 0/1 for boolean arguments. (https://github.com/mllam/neural-lam/pull/72) @ErikLarssonDev From a7d6bf7b38b3704f0e8401ac6b0dc902b5578335 Mon Sep 17 00:00:00 2001 From: Joel Oskarsson Date: Wed, 23 Oct 2024 16:57:27 +0200 Subject: [PATCH 018/190] Add slack and new publication info to readme (#78) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Describe your changes This PR just contains some updates and clarifications to the readme. In particular: 1. Restructure references to publications as its own section in readme 2. Add reference to new, extended publication with links to branches where corresponding code can be found 3. Add links to join slack channel 4. Clarify state of ongoing work with making repo area-agnostic 5. Update text about Lightning issue with multi-gpu eval ## Type of change - [ ] 🐛 Bug fix (non-breaking change that fixes an issue) - [ ] ✨ New feature (non-breaking change that adds functionality) - [ ] 💥 Breaking change (fix or feature that would cause existing functionality to not work as expected) - [x] 📖 Documentation (Addition or improvements to documentation) ## Checklist before requesting a review - [x] My branch is up-to-date with the target branch - if not update your fork with the changes from the target branch (use `pull` with `--rebase` option if possible). - [x] I have performed a self-review of my code - [ ] For any new/modified functions/classes I have added docstrings that clearly describe its purpose, expected inputs and returned values - [ ] I have placed in-line comments to clarify the intent of any hard-to-understand passages of my code - [x] I have updated the [README](README.MD) to cover introduced code changes - [ ] I have added tests that prove my fix is effective or that my feature works - [x] I have given the PR a name that clearly describes the change, written in imperative form ([context](https://www.gitkraken.com/learn/git/best-practices/git-commit-message#using-imperative-verb-form)). - [x] I have requested a reviewer and an assignee (assignee is responsible for merging). This applies only if you have write access to the repo, otherwise feel free to tag a maintainer to add a reviewer and assignee. ## Checklist for reviewers Each PR comes with its own improvements and flaws. The reviewer should check the following: - [x] the code is readable - [x] the code is well tested - [x] the code is documented (including return types and parameters) - [x] the code is easy to maintain ## Author checklist after completed review - [x] I have added a line to the CHANGELOG describing this change, in a section reflecting type of change (add section where missing): - *added*: when you have added new functionality - *changed*: when default behaviour of the code has been changed - *fixes*: when your contribution fixes a bug ## Checklist for assignee - [x] PR is up to date with the base branch - [x] the tests pass - [x] author has added an entry to the changelog (and designated the change as *added*, *changed* or *fixed*) - Once the PR is ready to be merged, squash commits and merge the PR. --- CHANGELOG.md | 4 ++++ README.md | 54 +++++++++++++++++++++++++++++++++++----------------- 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4f7eb07..0badfa56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -81,6 +81,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Maintenance +- Add slack and new publication info to readme + [\#78](https://github.com/mllam/neural-lam/pull/78) + @joeloskarsson + - simplify pre-commit setup by 1) reducing linting to only cover static analysis excluding imports from external dependencies (this will be handled in build/test cicd action introduced later), 2) pinning versions of linting diff --git a/README.md b/README.md index 41b03219..416f7e8c 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +[![slack](https://img.shields.io/badge/slack-join-brightgreen.svg?logo=slack)](https://join.slack.com/t/ml-lam/shared_invite/zt-2t112zvm8-Vt6aBvhX7nYa6Kbj_LkCBQ) ![Linting](https://github.com/mllam/neural-lam/actions/workflows/pre-commit.yml/badge.svg?branch=main) [![test (pdm install, gpu)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-gpu.yml/badge.svg)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-gpu.yml) [![test (pdm install, cpu)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-cpu.yml/badge.svg)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-cpu.yml) @@ -7,6 +8,7 @@

Neural-LAM is a repository of graph-based neural weather prediction models for Limited Area Modeling (LAM). +Also global forecasting is possible, but currently on a [different branch](https://github.com/mllam/neural-lam/tree/prob_model_global) ([planned to be merged with main](https://github.com/mllam/neural-lam/issues/63)). The code uses [PyTorch](https://pytorch.org/) and [PyTorch Lightning](https://lightning.ai/pytorch-lightning). Graph Neural Networks are implemented using [PyG](https://pyg.org/) and logging is set up through [Weights & Biases](https://wandb.ai/). @@ -16,8 +18,15 @@ The repository contains LAM versions of: * GraphCast, by [Lam et al. (2023)](https://arxiv.org/abs/2212.12794). * The hierarchical model from [Oskarsson et al. (2023)](https://arxiv.org/abs/2309.17370). -For more information see our paper: [*Graph-based Neural Weather Prediction for Limited Area Modeling*](https://arxiv.org/abs/2309.17370). -If you use Neural-LAM in your work, please cite: +# Publications +For a more in-depth scientific introduction to machine learning for LAM weather forecasting see the publications listed here. +As the code in the repository is continuously evolving, the latest version might feature some small differences to what was used for these publications. +We retain some paper-specific branches for reproducibility purposes. + + +*If you use Neural-LAM in your work, please cite the relevant paper(s)*. + +#### [Graph-based Neural Weather Prediction for Limited Area Modeling](https://arxiv.org/abs/2309.17370) ``` @inproceedings{oskarsson2023graphbased, title={Graph-based Neural Weather Prediction for Limited Area Modeling}, @@ -26,12 +35,20 @@ If you use Neural-LAM in your work, please cite: year={2023} } ``` -As the code in the repository is continuously evolving, the latest version might feature some small differences to what was used in the paper. -See the branch [`ccai_paper_2023`](https://github.com/joeloskarsson/neural-lam/tree/ccai_paper_2023) for a revision of the code that reproduces the workshop paper. +See the branch [`ccai_paper_2023`](https://github.com/joeloskarsson/neural-lam/tree/ccai_paper_2023) for a revision of the code that reproduces this workshop paper. -We plan to continue updating this repository as we improve existing models and develop new ones. -Collaborations around this implementation are very welcome. -If you are working with Neural-LAM feel free to get in touch and/or submit pull requests to the repository. +#### [Probabilistic Weather Forecasting with Hierarchical Graph Neural Networks](https://arxiv.org/abs/2406.04759) +``` +@inproceedings{oskarsson2024probabilistic, + title = {Probabilistic Weather Forecasting with Hierarchical Graph Neural Networks}, + author = {Oskarsson, Joel and Landelius, Tomas and Deisenroth, Marc Peter and Lindsten, Fredrik}, + booktitle = {Advances in Neural Information Processing Systems}, + volume = {37}, + year = {2024}, +} +``` +See the branches [`prob_model_lam`](https://github.com/mllam/neural-lam/tree/prob_model_lam) and [`prob_model_global`](https://github.com/mllam/neural-lam/tree/prob_model_global) for revisions of the code that reproduces this paper. +The global and probabilistic models from this paper are not yet fully merged with `main` (see issues [62](https://github.com/mllam/neural-lam/issues/62) and [63](https://github.com/mllam/neural-lam/issues/63)). # Modularity The Neural-LAM code is designed to modularize the different components involved in training and evaluating neural weather prediction models. @@ -47,12 +64,12 @@ Still, some restrictions are inevitable: ## A note on the limited area setting -Currently we are using these models on a limited area covering the Nordic region, the so called MEPS area (see [paper](https://arxiv.org/abs/2309.17370)). +Currently we are using these models on a limited area covering the Nordic region, the so called MEPS area (see [paper](#graph-based-neural-weather-prediction-for-limited-area-modeling)). There are still some parts of the code that is quite specific for the MEPS area use case. This is in particular true for the mesh graph creation (`python -m neural_lam.create_mesh`) and some of the constants set in a `data_config.yaml` file (path specified in `python -m neural_lam.train_model --data_config ` ). -If there is interest to use Neural-LAM for other areas it is not a substantial undertaking to refactor the code to be fully area-agnostic. -We would be happy to support such enhancements. -See the issues https://github.com/joeloskarsson/neural-lam/issues/2, https://github.com/joeloskarsson/neural-lam/issues/3 and https://github.com/joeloskarsson/neural-lam/issues/4 for some initial ideas on how this could be done. +There is ongoing efforts to refactor the code to be fully area-agnostic. +See issues [4](https://github.com/mllam/neural-lam/issues/4) and [24](https://github.com/mllam/neural-lam/issues/24) for more about this. +See also the [weather-model-graphs](https://github.com/mllam/weather-model-graphs) package for constructing graphs for arbitrary areas. # Using Neural-LAM Below follows instructions on how to use Neural-LAM to train and evaluate models. @@ -98,7 +115,7 @@ See the [repository format section](#format-of-data-directory) for details on th The full MEPS dataset can be shared with other researchers on request, contact us for this. A tiny subset of the data (named `meps_example`) is available in `example_data.zip`, which can be downloaded from [here](https://liuonline-my.sharepoint.com/:f:/g/personal/joeos82_liu_se/EuiUuiGzFIFHruPWpfxfUmYBSjhqMUjNExlJi9W6ULMZ1w?e=97pnGX). Download the file and unzip in the neural-lam directory. -All graphs used in the paper are also available for download at the same link (but can as easily be re-generated using `python -m neural_lam.create_mesh`). +Graphs used in the initial paper are also available for download at the same link (but can as easily be re-generated using `python -m neural_lam.create_mesh`). Note that this is far too little data to train any useful models, but all pre-processing and training steps can be run with it. It should thus be useful to make sure that your python environment is set up correctly and that all the code can be ran without any issues. @@ -115,7 +132,7 @@ In order to start training models at least three pre-processing steps have to be ### Create graph Run `python -m neural_lam.create_mesh` with suitable options to generate the graph you want to use (see `python neural_lam.create_mesh --help` for a list of options). -The graphs used for the different models in the [paper](https://arxiv.org/abs/2309.17370) can be created as: +The graphs used for the different models in the [paper](#graph-based-neural-weather-prediction-for-limited-area-modeling) can be created as: * **GC-LAM**: `python -m neural_lam.create_mesh --graph multiscale` * **Hi-LAM**: `python -m neural_lam.create_mesh --graph hierarchical --hierarchical` (also works for Hi-LAM-Parallel) @@ -159,7 +176,7 @@ The implemented models are: ### Graph-LAM This is the basic graph-based LAM model. The encode-process-decode framework is used with a mesh graph in order to make one-step pedictions. -This model class is used both for the L1-LAM and GC-LAM models from the [paper](https://arxiv.org/abs/2309.17370), only with different graphs. +This model class is used both for the L1-LAM and GC-LAM models from the [paper](#graph-based-neural-weather-prediction-for-limited-area-modeling), only with different graphs. To train 1L-LAM use ``` @@ -199,7 +216,9 @@ Some options specifically important for evaluation are: * `--load`: Path to model checkpoint file (`.ckpt`) to load parameters from * `--n_example_pred`: Number of example predictions to plot during evaluation. -**Note:** While it is technically possible to use multiple GPUs for running evaluation, this is strongly discouraged. If using multiple devices the `DistributedSampler` will replicate some samples to make sure all devices have the same batch size, meaning that evaluation metrics will be unreliable. This issue stems from PyTorch Lightning. See for example [this draft PR](https://github.com/Lightning-AI/torchmetrics/pull/1886) for more discussion and ongoing work to remedy this. +**Note:** While it is technically possible to use multiple GPUs for running evaluation, this is strongly discouraged. If using multiple devices the `DistributedSampler` will replicate some samples to make sure all devices have the same batch size, meaning that evaluation metrics will be unreliable. +A possible workaround is to just use batch size 1 during evaluation. +This issue stems from PyTorch Lightning. See for example [this PR](https://github.com/Lightning-AI/torchmetrics/pull/1886) for more discussion. # Repository Structure Except for training and pre-processing scripts all the source code can be found in the `neural_lam` directory. @@ -303,5 +322,6 @@ from the root directory of the repository. Furthermore, all tests in the ```tests``` directory will be run upon pushing changes by a github action. Failure in any of the tests will also reject the push/PR. # Contact -If you are interested in machine learning models for LAM, have questions about our implementation or ideas for extending it, feel free to get in touch. -You can open a github issue on this page, or (if more suitable) send an email to [joel.oskarsson@liu.se](mailto:joel.oskarsson@liu.se). +If you are interested in machine learning models for LAM, have questions about the implementation or ideas for extending it, feel free to get in touch. +There is an open [mllam slack channel](https://join.slack.com/t/ml-lam/shared_invite/zt-2t112zvm8-Vt6aBvhX7nYa6Kbj_LkCBQ) that anyone can join (after following the link you have to request to join, this is to avoid spam bots). +You can also open a github issue on this page, or (if more suitable) send an email to [joel.oskarsson@liu.se](mailto:joel.oskarsson@liu.se). From 2d36857e67dc1cbc445dd1d7d0604cd047909068 Mon Sep 17 00:00:00 2001 From: Joel Oskarsson Date: Thu, 24 Oct 2024 13:31:04 +0200 Subject: [PATCH 019/190] Release version 0.2.0 (#79) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Describe your changes Going by the [roadmap](https://github.com/mllam/neural-lam/wiki/Roadmap) we are ready to release version 0.2.0 :partying_face: This PR just updates the changelog for this release (and also re-orders one entry that was put in the wrong place). Now this does the changelog update first, and then we create the release for the commit with the update, but I guess the order of this does not matter much. Something I am unsure about is if we want to now remove this version with the completed items from the roadmap? Could be good to have archived somewhere. Perhaps we could add a section for finished releases at the bottom. ## Github TODOs after meged: - [ ] Create release (there is a draft release ready) - [ ] Update roadmap ## Type of change - [ ] 🐛 Bug fix (non-breaking change that fixes an issue) - [ ] ✨ New feature (non-breaking change that adds functionality) - [ ] 💥 Breaking change (fix or feature that would cause existing functionality to not work as expected) - [x] 📖 Documentation (Addition or improvements to documentation) ## Checklist before requesting a review - [x] My branch is up-to-date with the target branch - if not update your fork with the changes from the target branch (use `pull` with `--rebase` option if possible). - [ ] I have performed a self-review of my code - [ ] For any new/modified functions/classes I have added docstrings that clearly describe its purpose, expected inputs and returned values - [ ] I have placed in-line comments to clarify the intent of any hard-to-understand passages of my code - [ ] I have updated the [README](README.MD) to cover introduced code changes - [ ] I have added tests that prove my fix is effective or that my feature works - [x] I have given the PR a name that clearly describes the change, written in imperative form ([context](https://www.gitkraken.com/learn/git/best-practices/git-commit-message#using-imperative-verb-form)). - [x] I have requested a reviewer and an assignee (assignee is responsible for merging). This applies only if you have write access to the repo, otherwise feel free to tag a maintainer to add a reviewer and assignee. ## Checklist for reviewers Each PR comes with its own improvements and flaws. The reviewer should check the following: - [ ] the code is readable - [ ] the code is well tested - [ ] the code is documented (including return types and parameters) - [ ] the code is easy to maintain ## Author checklist after completed review - [ ] I have added a line to the CHANGELOG describing this change, in a section reflecting type of change (add section where missing): - *added*: when you have added new functionality - *changed*: when default behaviour of the code has been changed - *fixes*: when your contribution fixes a bug ## Checklist for assignee - [x] PR is up to date with the base branch - [x] the tests pass - [ ] author has added an entry to the changelog (and designated the change as *added*, *changed* or *fixed*) - Once the PR is ready to be merged, squash commits and merge the PR. --- CHANGELOG.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0badfa56..18cf5d4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,9 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [unreleased](https://github.com/joeloskarsson/neural-lam/compare/v0.1.0...HEAD) +## [unreleased](https://github.com/joeloskarsson/neural-lam/compare/v0.2.0...HEAD) + +## [v0.2.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.2.0) ### Added - Added tests for loading dataset, creating graph, and training model based on reduced MEPS dataset stored on AWS S3, along with automatic running of tests on push/PR to GitHub, including push to main branch. Added caching of test data to speed up running tests. @@ -81,10 +83,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Maintenance -- Add slack and new publication info to readme - [\#78](https://github.com/mllam/neural-lam/pull/78) - @joeloskarsson - - simplify pre-commit setup by 1) reducing linting to only cover static analysis excluding imports from external dependencies (this will be handled in build/test cicd action introduced later), 2) pinning versions of linting @@ -120,6 +118,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - move from `requirements.txt` to `pyproject.toml` for defining package dependencies. [\#37](https://github.com/mllam/neural-lam/pull/37), @leifdenby +- Add slack and new publication info to readme + [\#78](https://github.com/mllam/neural-lam/pull/78) + @joeloskarsson + ## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0) First tagged release of `neural-lam`, matching Oskarsson et al 2023 publication From 7112013f24ad36d8d9d19b4b5f853b11a2bbebf4 Mon Sep 17 00:00:00 2001 From: Joel Oskarsson Date: Thu, 24 Oct 2024 13:43:32 +0200 Subject: [PATCH 020/190] Update version number to v0.2.0 (#81) ## Describe your changes Forgot about the version number in pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d66c0087..14b7e69a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "neural-lam" -version = "0.1.0" +version = "0.2.0" description = "LAM-based data-driven forecasting" authors = [ {name = "Joel Oskarsson", email = "joel.oskarsson@liu.se"}, From 2cc617e5ed49340b9a9b5fd7292387b34fc52462 Mon Sep 17 00:00:00 2001 From: Joel Oskarsson Date: Mon, 18 Nov 2024 08:35:03 +0100 Subject: [PATCH 021/190] Add weights_only=True to all torch.load calls (#86) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Describe your changes Currently running neural-lam with the latest version of pytorch gives a warning: ``` FutureWarning: You are using torch.load with weights_only=False (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for weights_only will be flipped to True. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via torch.serialization.add_safe_globals. We recommend you start setting weights_only=True for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. ``` As we only use `torch.load` to load tensors and lists, we can just set `weights_only=True` and get rid of this warning (and increase security I suppose). ## Issue Link None ## Type of change - [x] 🐛 Bug fix (non-breaking change that fixes an issue) - [ ] ✨ New feature (non-breaking change that adds functionality) - [ ] 💥 Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] 📖 Documentation (Addition or improvements to documentation) ## Checklist before requesting a review - [x] My branch is up-to-date with the target branch - if not update your fork with the changes from the target branch (use `pull` with `--rebase` option if possible). - [x] I have performed a self-review of my code - [x] For any new/modified functions/classes I have added docstrings that clearly describe its purpose, expected inputs and returned values - [x] I have placed in-line comments to clarify the intent of any hard-to-understand passages of my code - [x] I have updated the [README](README.MD) to cover introduced code changes - [ ] I have added tests that prove my fix is effective or that my feature works - [x] I have given the PR a name that clearly describes the change, written in imperative form ([context](https://www.gitkraken.com/learn/git/best-practices/git-commit-message#using-imperative-verb-form)). - [x] I have requested a reviewer and an assignee (assignee is responsible for merging). This applies only if you have write access to the repo, otherwise feel free to tag a maintainer to add a reviewer and assignee. ## Checklist for reviewers Each PR comes with its own improvements and flaws. The reviewer should check the following: - [x] the code is readable - [ ] the code is well tested - [x] the code is documented (including return types and parameters) - [x] the code is easy to maintain ## Author checklist after completed review - [ ] I have added a line to the CHANGELOG describing this change, in a section reflecting type of change (add section where missing): - *added*: when you have added new functionality - *changed*: when default behaviour of the code has been changed - *fixes*: when your contribution fixes a bug ## Checklist for assignee - [ ] PR is up to date with the base branch - [ ] the tests pass - [ ] author has added an entry to the changelog (and designated the change as *added*, *changed* or *fixed*) - Once the PR is ready to be merged, squash commits and merge the PR. --- neural_lam/utils.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/neural_lam/utils.py b/neural_lam/utils.py index 59a529eb..c47c44ff 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -17,7 +17,9 @@ def load_dataset_stats(dataset_name, device="cpu"): def loads_file(fn): return torch.load( - os.path.join(static_dir_path, fn), map_location=device + os.path.join(static_dir_path, fn), + map_location=device, + weights_only=True, ) data_mean = loads_file("parameter_mean.pt") # (d_features,) @@ -42,7 +44,9 @@ def load_static_data(dataset_name, device="cpu"): def loads_file(fn): return torch.load( - os.path.join(static_dir_path, fn), map_location=device + os.path.join(static_dir_path, fn), + map_location=device, + weights_only=True, ) # Load border mask, 1. if node is part of border, else 0. @@ -116,7 +120,11 @@ def load_graph(graph_name, device="cpu"): graph_dir_path = os.path.join("graphs", graph_name) def loads_file(fn): - return torch.load(os.path.join(graph_dir_path, fn), map_location=device) + return torch.load( + os.path.join(graph_dir_path, fn), + map_location=device, + weights_only=True, + ) # Load edges (edge_index) m2m_edge_index = BufferList( From c3c1722f5130bfe374acdbad01e57fc76893e5cd Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Thu, 21 Nov 2024 08:01:08 +0100 Subject: [PATCH 022/190] Add "datastores" to represent input data from zarr, npy, etc (#66) Introduce new "datastores" concept where loading of input data from disk into `pytorch.Dataset` (`neural_lam.WeatherDataset`) is split into two layers: a) datastores that work with and return `xr.DataArray` objects for the whole time-series and b) `neural_lam.WeatherDataset` which consumes output from a datastore, takes care of time-sampling and produces `pytorch.Tensor`-based training samples. Currently, two kinds of datastores are implemented: 1) reading of zarr-based training datasets produced with `mllam-data-prep` and 2) reading of the npyfiles-based MEPS example dataset included with neural-lam `v0.1.0`. --------- Co-authored-by: SimonKamuk <43374850+SimonKamuk@users.noreply.github.com> Co-authored-by: joeloskarsson Co-authored-by: Leif Denby Co-authored-by: Joel Oskarsson Co-authored-by: Simon Adamov Co-authored-by: Simon Adamov Co-authored-by: Kasper Hintz --- .../workflows/ci-pdm-install-and-test-cpu.yml | 12 +- .../workflows/ci-pdm-install-and-test-gpu.yml | 12 +- .../workflows/ci-pip-install-and-test-cpu.yml | 12 +- .../workflows/ci-pip-install-and-test-gpu.yml | 12 +- .github/workflows/pre-commit.yml | 2 +- .gitignore | 11 +- CHANGELOG.md | 6 + README.md | 364 ++++++-- figures/component_dependencies.png | Bin 203632 -> 0 bytes neural_lam/__init__.py | 1 - neural_lam/config.py | 225 +++-- .../{create_mesh.py => create_graph.py} | 232 +++-- neural_lam/create_grid_features.py | 63 -- neural_lam/data_config.yaml | 64 -- neural_lam/datastore/__init__.py | 26 + neural_lam/datastore/base.py | 553 +++++++++++ neural_lam/datastore/mdp.py | 464 +++++++++ neural_lam/datastore/npyfilesmeps/__init__.py | 2 + .../compute_standardization_stats.py} | 251 ++--- neural_lam/datastore/npyfilesmeps/config.py | 66 ++ neural_lam/datastore/npyfilesmeps/store.py | 788 ++++++++++++++++ neural_lam/datastore/plot_example.py | 189 ++++ neural_lam/loss_weighting.py | 106 +++ neural_lam/models/ar_model.py | 249 +++-- neural_lam/models/base_graph_model.py | 17 +- neural_lam/models/base_hi_graph_model.py | 6 +- neural_lam/models/graph_lam.py | 6 +- neural_lam/models/hi_lam.py | 9 +- neural_lam/models/hi_lam_parallel.py | 9 +- plot_graph.py => neural_lam/plot_graph.py | 33 +- neural_lam/train_model.py | 152 ++- neural_lam/utils.py | 136 +-- neural_lam/vis.py | 51 +- neural_lam/weather_dataset.py | 877 +++++++++++++----- pyproject.toml | 30 +- tests/__init__.py | 0 tests/conftest.py | 106 +++ tests/datastore_examples/.gitignore | 2 + .../mdp/danra_100m_winds/.gitignore | 2 + .../mdp/danra_100m_winds/config.yaml | 9 + .../mdp/danra_100m_winds/danra.datastore.yaml | 99 ++ tests/dummy_datastore.py | 449 +++++++++ tests/test_cli.py | 14 +- tests/test_config.py | 72 ++ tests/test_datasets.py | 261 ++++++ tests/test_datastores.py | 384 ++++++++ tests/test_graph_creation.py | 119 +++ tests/test_mllam_dataset.py | 142 --- tests/test_time_slicing.py | 146 +++ tests/test_training.py | 103 ++ 50 files changed, 5765 insertions(+), 1179 deletions(-) delete mode 100644 figures/component_dependencies.png rename neural_lam/{create_mesh.py => create_graph.py} (74%) delete mode 100644 neural_lam/create_grid_features.py delete mode 100644 neural_lam/data_config.yaml create mode 100644 neural_lam/datastore/__init__.py create mode 100644 neural_lam/datastore/base.py create mode 100644 neural_lam/datastore/mdp.py create mode 100644 neural_lam/datastore/npyfilesmeps/__init__.py rename neural_lam/{create_parameter_weights.py => datastore/npyfilesmeps/compute_standardization_stats.py} (71%) create mode 100644 neural_lam/datastore/npyfilesmeps/config.py create mode 100644 neural_lam/datastore/npyfilesmeps/store.py create mode 100644 neural_lam/datastore/plot_example.py create mode 100644 neural_lam/loss_weighting.py rename plot_graph.py => neural_lam/plot_graph.py (88%) create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/datastore_examples/.gitignore create mode 100644 tests/datastore_examples/mdp/danra_100m_winds/.gitignore create mode 100644 tests/datastore_examples/mdp/danra_100m_winds/config.yaml create mode 100644 tests/datastore_examples/mdp/danra_100m_winds/danra.datastore.yaml create mode 100644 tests/dummy_datastore.py create mode 100644 tests/test_config.py create mode 100644 tests/test_datasets.py create mode 100644 tests/test_datastores.py create mode 100644 tests/test_graph_creation.py delete mode 100644 tests/test_mllam_dataset.py create mode 100644 tests/test_time_slicing.py create mode 100644 tests/test_training.py diff --git a/.github/workflows/ci-pdm-install-and-test-cpu.yml b/.github/workflows/ci-pdm-install-and-test-cpu.yml index c5da88cc..8fb4df79 100644 --- a/.github/workflows/ci-pdm-install-and-test-cpu.yml +++ b/.github/workflows/ci-pdm-install-and-test-cpu.yml @@ -39,17 +39,17 @@ jobs: - name: Load cache data uses: actions/cache/restore@v4 with: - path: data - key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + path: tests/datastore_examples/npyfilesmeps/meps_example_reduced.zip + key: ${{ runner.os }}-meps-reduced-example-data-v0.2.0 restore-keys: | - ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + ${{ runner.os }}-meps-reduced-example-data-v0.2.0 - name: Run tests run: | - pdm run pytest + pdm run pytest -vv -s - name: Save cache data uses: actions/cache/save@v4 with: - path: data - key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + path: tests/datastore_examples/npyfilesmeps/meps_example_reduced.zip + key: ${{ runner.os }}-meps-reduced-example-data-v0.2.0 diff --git a/.github/workflows/ci-pdm-install-and-test-gpu.yml b/.github/workflows/ci-pdm-install-and-test-gpu.yml index 9ab4f379..43a701c2 100644 --- a/.github/workflows/ci-pdm-install-and-test-gpu.yml +++ b/.github/workflows/ci-pdm-install-and-test-gpu.yml @@ -44,17 +44,17 @@ jobs: - name: Load cache data uses: actions/cache/restore@v4 with: - path: data - key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + path: tests/datastore_examples/npyfilesmeps/meps_example_reduced.zip + key: ${{ runner.os }}-meps-reduced-example-data-v0.2.0 restore-keys: | - ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + ${{ runner.os }}-meps-reduced-example-data-v0.2.0 - name: Run tests run: | - pdm run pytest + pdm run pytest -vv -s - name: Save cache data uses: actions/cache/save@v4 with: - path: data - key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + path: tests/datastore_examples/npyfilesmeps/meps_example_reduced.zip + key: ${{ runner.os }}-meps-reduced-example-data-v0.2.0 diff --git a/.github/workflows/ci-pip-install-and-test-cpu.yml b/.github/workflows/ci-pip-install-and-test-cpu.yml index 81e402c5..b131596d 100644 --- a/.github/workflows/ci-pip-install-and-test-cpu.yml +++ b/.github/workflows/ci-pip-install-and-test-cpu.yml @@ -29,17 +29,17 @@ jobs: - name: Load cache data uses: actions/cache/restore@v4 with: - path: data - key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + path: tests/datastore_examples/npyfilesmeps/meps_example_reduced.zip + key: ${{ runner.os }}-meps-reduced-example-data-v0.2.0 restore-keys: | - ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + ${{ runner.os }}-meps-reduced-example-data-v0.2.0 - name: Run tests run: | - python -m pytest + python -m pytest -vv -s - name: Save cache data uses: actions/cache/save@v4 with: - path: data - key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + path: tests/datastore_examples/npyfilesmeps/meps_example_reduced.zip + key: ${{ runner.os }}-meps-reduced-example-data-v0.2.0 diff --git a/.github/workflows/ci-pip-install-and-test-gpu.yml b/.github/workflows/ci-pip-install-and-test-gpu.yml index ce68946a..3afcca5a 100644 --- a/.github/workflows/ci-pip-install-and-test-gpu.yml +++ b/.github/workflows/ci-pip-install-and-test-gpu.yml @@ -34,17 +34,17 @@ jobs: - name: Load cache data uses: actions/cache/restore@v4 with: - path: data - key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + path: tests/datastore_examples/npyfilesmeps/meps_example_reduced.zip + key: ${{ runner.os }}-meps-reduced-example-data-v0.2.0 restore-keys: | - ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + ${{ runner.os }}-meps-reduced-example-data-v0.2.0 - name: Run tests run: | - python -m pytest + python -m pytest -vv -s - name: Save cache data uses: actions/cache/save@v4 with: - path: data - key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + path: tests/datastore_examples/npyfilesmeps/meps_example_reduced.zip + key: ${{ runner.os }}-meps-reduced-example-data-v0.2.0 diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index ad2b1a9c..71e28ad7 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v2 - name: Set up Python diff --git a/.gitignore b/.gitignore index 022206f5..fdb51d3d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,14 +1,14 @@ ### Project Specific ### wandb -slurm_log* saved_models lightning_logs data graphs *.sif sweeps -test_*.sh .vscode +*.html +*.zarr *slurm* ### Python ### @@ -75,8 +75,15 @@ tags # Coc configuration directory .vim +.vscode + +# macos +.DS_Store +__MACOSX # pdm (https://pdm-project.org/en/stable/) .pdm-python +.venv + # exclude pdm.lock file so that both cpu and gpu versions of torch will be accepted by pdm pdm.lock diff --git a/CHANGELOG.md b/CHANGELOG.md index 18cf5d4d..12cf54f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [unreleased](https://github.com/joeloskarsson/neural-lam/compare/v0.2.0...HEAD) +### Added + +- Introduce Datastores to represent input data from different sources, including zarr and numpy. + [\#66](https://github.com/mllam/neural-lam/pull/66) + @leifdenby @sadamov + ## [v0.2.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.2.0) ### Added diff --git a/README.md b/README.md index 416f7e8c..e21b7c24 100644 --- a/README.md +++ b/README.md @@ -63,18 +63,7 @@ Still, some restrictions are inevitable:

-## A note on the limited area setting -Currently we are using these models on a limited area covering the Nordic region, the so called MEPS area (see [paper](#graph-based-neural-weather-prediction-for-limited-area-modeling)). -There are still some parts of the code that is quite specific for the MEPS area use case. -This is in particular true for the mesh graph creation (`python -m neural_lam.create_mesh`) and some of the constants set in a `data_config.yaml` file (path specified in `python -m neural_lam.train_model --data_config ` ). -There is ongoing efforts to refactor the code to be fully area-agnostic. -See issues [4](https://github.com/mllam/neural-lam/issues/4) and [24](https://github.com/mllam/neural-lam/issues/24) for more about this. -See also the [weather-model-graphs](https://github.com/mllam/weather-model-graphs) package for constructing graphs for arbitrary areas. - -# Using Neural-LAM -Below follows instructions on how to use Neural-LAM to train and evaluate models. - -## Installation +# Installing Neural-LAM When installing `neural-lam` you have a choice of either installing with directly `pip` or using the `pdm` package manager. @@ -91,7 +80,7 @@ expects the most recent version of CUDA on your system. We cover all the installation options in our [github actions ci/cd setup](.github/workflows/) which you can use as a reference. -### Using `pdm` +## Using `pdm` 1. Clone this repository and navigate to the root directory. 2. Install `pdm` if you don't have it installed on your system (either with `pip install pdm` or [following the install instructions](https://pdm-project.org/latest/#installation)). @@ -100,7 +89,7 @@ setup](.github/workflows/) which you can use as a reference. 4. Install a specific version of `torch` with `pdm run python -m pip install torch --index-url https://download.pytorch.org/whl/cpu` for a CPU-only version or `pdm run python -m pip install torch --index-url https://download.pytorch.org/whl/cu111` for CUDA 11.1 support (you can find the correct URL for the variant you want on [PyTorch webpage](https://pytorch.org/get-started/locally/)). 5. Install the dependencies with `pdm install` (by default this in include the). If you will be developing `neural-lam` we recommend to install the development dependencies with `pdm install --group dev`. By default `pdm` installs the `neural-lam` package in editable mode, so you can make changes to the code and see the effects immediately. -### Using `pip` +## Using `pip` 1. Clone this repository and navigate to the root directory. > If you are happy using the latest version of `torch` with GPU support (expecting the latest version of CUDA is installed on your system) you can skip to step 3. @@ -108,41 +97,291 @@ setup](.github/workflows/) which you can use as a reference. 3. Install the dependencies with `python -m pip install .`. If you will be developing `neural-lam` we recommend to install in editable mode and install the development dependencies with `python -m pip install -e ".[dev]"` so you can make changes to the code and see the effects immediately. -## Data -Datasets should be stored in a directory called `data`. -See the [repository format section](#format-of-data-directory) for details on the directory structure. +# Using Neural-LAM + +Once `neural-lam` is installed you will be able to train/evaluate models. For this you will in general need two things: + +1. **Data to train/evaluate the model**. To represent this data we use a concept of + *datastores* in Neural-LAM (see the [Data](#data-the-datastore-and-weatherdataset-classes) section for more details). + In brief, a datastore implements the process of loading data from disk in a + specific format (for example zarr or numpy files) by implementing an + interface that provides the data in a data-structure that can be used within + neural-lam. A datastore is used to create a `pytorch.Dataset`-derived + class that samples the data in time to create individual samples for + training, validation and testing. + +2. **The graph structure** is used to define message-passing GNN layers, + that are trained to emulate fluid flow in the atmosphere over time. The + graph structure is created for a specific datastore. + +Any command you run in neural-lam will include the path to a configuration file +to be used (usually called `config.yaml`). This configuration file defines the +path to the datastore configuration you wish to use and allows you to configure +different aspects about the training and evaluation of the model. + +The path you provide to the neural-lam config (`config.yaml`) also sets the +root directory relative to which all other paths are resolved, as in the parent +directory of the config becomes the root directory. Both the datastore and +graphs you generate are then stored in subdirectories of this root directory. +Exactly how and where a specific datastore expects its source data to be stored +and where it stores its derived data is up to the implementation of the +datastore. + +In general the folder structure assumed in Neural-LAM is as follows (we will +assume you placed `config.yaml` in a folder called `data`): + +``` +data/ +├── config.yaml - Configuration file for neural-lam +├── danra.datastore.yaml - Configuration file for the datastore, referred to from config.yaml +└── graphs/ - Directory containing graphs for training +``` + +And the content of `config.yaml` could in this case look like: +```yaml +datastore: + kind: mdp + config_path: danra.datastore.yaml +training: + state_feature_weighting: + __config_class__: ManualStateFeatureWeighting + values: + u100m: 1.0 + v100m: 1.0 +``` + +For now the neural-lam config only defines two things: 1) the kind of data +store and the path to its config, and 2) the weighting of different features in +the loss function. If you don't define the state feature weighting it will default +to weighting all features equally. + +(This example is taken from the `tests/datastore_examples/mdp` directory.) + + +Below follows instructions on how to use Neural-LAM to train and evaluate +models, with details first given for each kind of datastore implemented +and later the graph generation. Once `neural-lam` has been installed the +general process is: + +1. Run any pre-processing scripts to generate the necessary derived data that your chosen datastore requires +2. Run graph-creation step +3. Train the model + +## Data (the `DataStore` and `WeatherDataset` classes) + +To enable flexibility in what input-data sources can be used with neural-lam, +the input-data representation is split into two parts: + +1. A "datastore" (represented by instances of + [neural_lam.datastore.BaseDataStore](neural_lam/datastore/base.py)) which + takes care of loading a given category (state, forcing or static) and split + (train/val/test) of data from disk and returning it as a `xarray.DataArray`. + The returned data-array is expected to have the spatial coordinates + flattened into a single `grid_index` dimension and all variables and vertical + levels stacked into a feature dimension (named as `{category}_feature`). The + datastore also provides information about the number, names and units of + variables in the data, the boundary mask, normalisation values and grid + information. + +2. A `pytorch.Dataset`-derived class (called + `neural_lam.weather_dataset.WeatherDataset`) which takes care of sampling in + time to create individual samples for training, validation and testing. The + `WeatherDataset` class is also responsible for normalising the values and + returning `torch.Tensor`-objects. + +There are currently two different datastores implemented in the codebase: + +1. `neural_lam.datastore.MDPDatastore` which represents loading of + *training-ready* datasets in zarr format created with the + [mllam-data-prep](https://github.com/mllam/mllam-data-prep) package. + Training-ready refers to the fact that this data has been transformed + (variables have been stacked, spatial coordinates have been flattened, + statistics for normalisation have been calculated, etc) to be ready for + training. `mllam-data-prep` can combine any number of datasets that can be + read with [xarray](https://github.com/pydata/xarray) and the processing can + either be done at run-time or as a pre-processing step before calling + neural-lam. + +2. `neural_lam.datastore.NpyFilesDatastoreMEPS` which reads MEPS data from + `.npy`-files in the format introduced in neural-lam `v0.1.0`. Note that this + datastore is specific to the format of the MEPS dataset, but can act as an + example for how to create similar numpy-based datastores. + +If neither of these options fit your need you can create your own datastore by +subclassing the `neural_lam.datastore.BaseDataStore` class or +`neural_lam.datastore.BaseRegularGridDatastore` class (if your data is stored on +a regular grid) and implementing the abstract methods. + + +### MDP (mllam-data-prep) Datastore - `MDPDatastore` + +With `MDPDatastore` (the mllam-data-prep datastore) all the selection, +transformation and pre-calculation steps that are needed to go from +for example gridded weather data to a format that is optimised for training +in neural-lam, are done in a separate package called +[mllam-data-prep](https://github.com/mllam/mllam-data-prep) rather than in +neural-lam itself. +Specifically, the `mllam-data-prep` datastore configuration (for example +[danra.datastore.yaml](tests/datastore_examples/mdp/danra.datastore.yaml)) +specifies a) what source datasets to read from, b) what variables to select, c) +what transformations of dimensions and variables to make, d) what statistics to +calculate (for normalisation) and e) how to split the data into training, +validation and test sets (see full details about the configuration specification +in the [mllam-data-prep README](https://github.com/mllam/mllam-data-prep)). + +From a datastore configuration `mllam-data-prep` returns the transformed +dataset as an `xr.Dataset` which is then written in zarr-format to disk by +`neural-lam` when the datastore is first initiated (the path of the dataset is +derived from the datastore config, so that from a config named `danra.datastore.yaml` the resulting dataset is stored in `danra.datastore.zarr`). +You can also run `mllam-data-prep` directly to create the processed dataset by providing the path to the datastore configuration file: + +```bash +python -m mllam_data_prep --config data/danra.datastore.yaml +``` + +If you will be working on a large dataset (on the order of 10GB or more) it +could be beneficial to produce the processed `.zarr` dataset before using it +in neural-lam so that you can do the processing across multiple CPU cores in parallel. This is done by including the `--dask-distributed-local-core-fraction` argument when calling mllam-data-prep to set the fraction of your system's CPU cores that should be used for processing (see the +[mllam-data-prep +README for details](https://github.com/mllam/mllam-data-prep?tab=readme-ov-file#creating-large-datasets-with-daskdistributed)). + +For example: + +```bash +python -m mllam_data_prep --config data/danra.datastore.yaml --dask-distributed-local-core-fraction 0.5 +``` + +### NpyFiles MEPS Datastore - `NpyFilesDatastoreMEPS` + +Version `v0.1.0` of Neural-LAM was built to train from numpy-files from the +MEPS weather forecasts dataset. +To enable this functionality to live on in later versions of neural-lam we have +built a datastore called `NpyFilesDatastoreMEPS` which implements functionality +to read from these exact same numpy-files. At this stage this datastore class +is very much tied to the MEPS dataset, but the code is written in a way where +it quite easily could be adapted to work with numpy-based weather +forecast/analysis files in future. The full MEPS dataset can be shared with other researchers on request, contact us for this. -A tiny subset of the data (named `meps_example`) is available in `example_data.zip`, which can be downloaded from [here](https://liuonline-my.sharepoint.com/:f:/g/personal/joeos82_liu_se/EuiUuiGzFIFHruPWpfxfUmYBSjhqMUjNExlJi9W6ULMZ1w?e=97pnGX). +A tiny subset of the data (named `meps_example`) is available in +`example_data.zip`, which can be downloaded from +[here](https://liuonline-my.sharepoint.com/:f:/g/personal/joeos82_liu_se/EuiUuiGzFIFHruPWpfxfUmYBSjhqMUjNExlJi9W6ULMZ1w?e=97pnGX). + Download the file and unzip in the neural-lam directory. -Graphs used in the initial paper are also available for download at the same link (but can as easily be re-generated using `python -m neural_lam.create_mesh`). +Graphs used in the initial paper are also available for download at the same link (but can as easily be re-generated using `python -m neural_lam.create_graph`). Note that this is far too little data to train any useful models, but all pre-processing and training steps can be run with it. It should thus be useful to make sure that your python environment is set up correctly and that all the code can be ran without any issues. -## Pre-processing -An overview of how the different pre-processing steps, training and files depend on each other is given in this figure: -

- -

-In order to start training models at least three pre-processing steps have to be run: +The following datastore configuration works with the MEPS dataset: + +```yaml +# meps.datastore.yaml +dataset: + name: meps_example + num_forcing_features: 16 + var_longnames: + - pres_heightAboveGround_0_instant + - pres_heightAboveSea_0_instant + - nlwrs_heightAboveGround_0_accum + - nswrs_heightAboveGround_0_accum + - r_heightAboveGround_2_instant + - r_hybrid_65_instant + - t_heightAboveGround_2_instant + - t_hybrid_65_instant + - t_isobaricInhPa_500_instant + - t_isobaricInhPa_850_instant + - u_hybrid_65_instant + - u_isobaricInhPa_850_instant + - v_hybrid_65_instant + - v_isobaricInhPa_850_instant + - wvint_entireAtmosphere_0_instant + - z_isobaricInhPa_1000_instant + - z_isobaricInhPa_500_instant + var_names: + - pres_0g + - pres_0s + - nlwrs_0 + - nswrs_0 + - r_2 + - r_65 + - t_2 + - t_65 + - t_500 + - t_850 + - u_65 + - u_850 + - v_65 + - v_850 + - wvint_0 + - z_1000 + - z_500 + var_units: + - Pa + - Pa + - W/m\textsuperscript{2} + - W/m\textsuperscript{2} + - "-" + - "-" + - K + - K + - K + - K + - m/s + - m/s + - m/s + - m/s + - kg/m\textsuperscript{2} + - m\textsuperscript{2}/s\textsuperscript{2} + - m\textsuperscript{2}/s\textsuperscript{2} + num_timesteps: 65 + num_ensemble_members: 2 + step_length: 3 + remove_state_features_with_index: [15] +grid_shape_state: +- 268 +- 238 +projection: + class_name: LambertConformal + kwargs: + central_latitude: 63.3 + central_longitude: 15.0 + standard_parallels: + - 63.3 + - 63.3 +``` + +Which you can then use in a neural-lam configuration file like this: + +```yaml +# config.yaml +datastore: + kind: npyfilesmeps + config_path: meps.datastore.yaml +training: + state_feature_weighting: + __config_class__: ManualStateFeatureWeighting + values: + u100m: 1.0 + v100m: 1.0 +``` -* `python -m neural_lam.create_mesh` -* `python -m neural_lam.create_grid_features` -* `python -m neural_lam.create_parameter_weights` +For npy-file based datastores you must separately run the command that creates the variables used for standardization: + +```bash +python -m neural_lam.datastore.npyfilesmeps.compute_standardization_stats +``` + +### Graph creation -### Create graph Run `python -m neural_lam.create_mesh` with suitable options to generate the graph you want to use (see `python neural_lam.create_mesh --help` for a list of options). The graphs used for the different models in the [paper](#graph-based-neural-weather-prediction-for-limited-area-modeling) can be created as: -* **GC-LAM**: `python -m neural_lam.create_mesh --graph multiscale` -* **Hi-LAM**: `python -m neural_lam.create_mesh --graph hierarchical --hierarchical` (also works for Hi-LAM-Parallel) -* **L1-LAM**: `python -m neural_lam.create_mesh --graph 1level --levels 1` +* **GC-LAM**: `python -m neural_lam.create_graph --config_path --name multiscale` +* **Hi-LAM**: `python -m neural_lam.create_graph --config_path --name hierarchical --hierarchical` (also works for Hi-LAM-Parallel) +* **L1-LAM**: `python -m neural_lam.create_graph --config_path --name 1level --levels 1` The graph-related files are stored in a directory called `graphs`. -### Create remaining static features -To create the remaining static files run `python -m neural_lam.create_grid_features` and `python -m neural_lam.create_parameter_weights`. - ## Weights & Biases Integration The project is fully integrated with [Weights & Biases](https://www.wandb.ai/) (W&B) for logging and visualization, but can just as easily be used without it. When W&B is used, training configuration, training/test statistics and plots are sent to the W&B servers and made available in an interactive web interface. @@ -160,15 +399,17 @@ wandb off ``` ## Train Models -Models can be trained using `python -m neural_lam.train_model`. +Models can be trained using `python -m neural_lam.train_model --config_path `. Run `python neural_lam.train_model --help` for a full list of training options. A few of the key ones are outlined below: -* `--dataset`: Which data to train on +* `--config_path`: Path to the configuration for neural-lam (for example in `data/myexperiment/config.yaml`). * `--model`: Which model to train * `--graph`: Which graph to use with the model +* `--epochs`: Number of epochs to train for * `--processor_layers`: Number of GNN layers to use in the processing part of the model -* `--ar_steps`: Number of time steps to unroll for when making predictions and computing the loss +* `--ar_steps_train`: Number of time steps to unroll for when making predictions and computing the loss +* `--ar_steps_eval`: Number of time steps to unroll for during validation steps Checkpoints of trained models are stored in the `saved_models` directory. The implemented models are: @@ -208,13 +449,14 @@ python -m neural_lam.train_model --model hi_lam_parallel --graph hierarchical .. Checkpoint files for our models trained on the MEPS data are available upon request. ## Evaluate Models -Evaluation is also done using `python -m neural_lam.train_model`, but using the `--eval` option. +Evaluation is also done using `python -m neural_lam.train_model --config_path `, but using the `--eval` option. Use `--eval val` to evaluate the model on the validation set and `--eval test` to evaluate on test data. -Most of the training options are also relevant for evaluation (not `ar_steps`, evaluation always unrolls full forecasts). +Most of the training options are also relevant for evaluation. Some options specifically important for evaluation are: * `--load`: Path to model checkpoint file (`.ckpt`) to load parameters from * `--n_example_pred`: Number of example predictions to plot during evaluation. +* `--ar_steps_eval`: Number of time steps to unroll for during evaluation **Note:** While it is technically possible to use multiple GPUs for running evaluation, this is strongly discouraged. If using multiple devices the `DistributedSampler` will replicate some samples to make sure all devices have the same batch size, meaning that evaluation metrics will be unreliable. A possible workaround is to just use batch size 1 during evaluation. @@ -223,47 +465,7 @@ This issue stems from PyTorch Lightning. See for example [this PR](https://githu # Repository Structure Except for training and pre-processing scripts all the source code can be found in the `neural_lam` directory. Model classes, including abstract base classes, are located in `neural_lam/models`. - -## Format of data directory -It is possible to store multiple datasets in the `data` directory. -Each dataset contains a set of files with static features and a set of samples. -The samples are split into different sub-directories for training, validation and testing. -The directory structure is shown with examples below. -Script names within parenthesis denote the script used to generate the file. -``` -data -├── dataset1 -│ ├── samples - Directory with data samples -│ │ ├── train - Training data -│ │ │ ├── nwp_2022040100_mbr000.npy - A time series sample -│ │ │ ├── nwp_2022040100_mbr001.npy -│ │ │ ├── ... -│ │ │ ├── nwp_2022043012_mbr001.npy -│ │ │ ├── nwp_toa_downwelling_shortwave_flux_2022040100.npy - Solar flux forcing -│ │ │ ├── nwp_toa_downwelling_shortwave_flux_2022040112.npy -│ │ │ ├── ... -│ │ │ ├── nwp_toa_downwelling_shortwave_flux_2022043012.npy -│ │ │ ├── wtr_2022040100.npy - Open water features for one sample -│ │ │ ├── wtr_2022040112.npy -│ │ │ ├── ... -│ │ │ └── wtr_202204012.npy -│ │ ├── val - Validation data -│ │ └── test - Test data -│ └── static - Directory with graph information and static features -│ ├── nwp_xy.npy - Coordinates of grid nodes (part of dataset) -│ ├── surface_geopotential.npy - Geopotential at surface of grid nodes (part of dataset) -│ ├── border_mask.npy - Mask with True for grid nodes that are part of border (part of dataset) -│ ├── grid_features.pt - Static features of grid nodes (neural_lam.create_grid_features) -│ ├── parameter_mean.pt - Means of state parameters (neural_lam.create_parameter_weights) -│ ├── parameter_std.pt - Std.-dev. of state parameters (neural_lam.create_parameter_weights) -│ ├── diff_mean.pt - Means of one-step differences (neural_lam.create_parameter_weights) -│ ├── diff_std.pt - Std.-dev. of one-step differences (neural_lam.create_parameter_weights) -│ ├── flux_stats.pt - Mean and std.-dev. of solar flux forcing (neural_lam.create_parameter_weights) -│ └── parameter_weights.npy - Loss weights for different state parameters (neural_lam.create_parameter_weights) -├── dataset2 -├── ... -└── datasetN -``` +Notebooks for visualization and analysis are located in `docs`. ## Format of graph directory The `graphs` directory contains generated graph structures that can be used by different graph-based models. diff --git a/figures/component_dependencies.png b/figures/component_dependencies.png deleted file mode 100644 index fae77cab4655f8f1259565117a17ce86b1054959..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 203632 zcmZ_0bzD?k7dDKdAfX5%9U{`*9fE*LNl3#0Bb_5XFo1zb42X1hNOv{+AHIX1vFu-KJD{OGYQFt*C!FOGIk-sbD5K@5W@qZ?V)W4j z&Beuq-NM$&0cvD#!fyA`EOuLz3=QoW+M8D|)m`H@r(D$4M~E8cW_sOsl2TH>KXd*a zG&PFkY9V^Um$5?E)BJ)%Q4OLnpsq{6*GjCrS!Z}hM&{K^eEfI#7X9wACvCH<53t*3 z%_nDc@|Wrywo&fboFyxh2*S7Ns(+Co;xO49 zE{4E$<&opOGO~kUry4;*m`)B?Tkxh_#Um8B%cIFwswowG_G=EY*7sLAhfC9XvgtQe zMF)AdSSWtDISHO{Q(?h@8I;g(*~#z?qc10*UIThvCKPogr)0^uydVu95J}3@faXsY z2P2F^bdW;FA=CHnTMG2N*FC}a(5i70=dUD2RF@A}b`DJ(mz?xs<+XY^zEeJTvMLyg zvZ|mqbTR*`0S%%W{c=4eV$QcbBl4)2Ww8=mqUeqPBqj_-Igh8=6eP(XilRMtfxhce z0#i5AWSqdG01utJs^+fLG|Ow9th{(UaZEjA*=dA8cT0022A4`#ICW1b`z$+?5qC)l z2$P1QVo$ym(FrQ#aQWVIGBQt;1UC_QeC=|_v*KHxosK7+uE^1V+D^EHA>AKY@GU19 z#FO?Z*SOhA*atqWkt!o|Fboa~r0CVp;c}tCHM{D5oY%TJQ-_|4dE9H+sZY~(=HB)a zTpGzk4}R2dn@}SiT;g2ge6TrDCI-bg8MN#a^L?&WvgNG-J;t(bg9aZ0yVm(C7s59@ z^ICDS%{xYE>dMG?1X+K+*FCo2Gu>go78U1xox^oPDfG9!>$eVati*vlfnr!LhYOEF ztmTG(3%oH^{H3d9b`e^EyJxoxkG$%MuxEcvJ;4`glCHPkKnGD^N2A6Sq73E$tx z&TF;)Au6WF$igyWsbD52lGlp2~+SADOz~AKk|~JtcmzB^nENDuwwZ6Sl{_ zM5oA}DJ!2~F6~)+i&Act-nham!3bI6w4%9g{8o$EL#|2ZxH(&*vEt9%`0WPK%AR4y zVm?=vyTJHUa>vpt=y(|Kbpm)!EZ^{TPe^L~F?G+kQ#Y5wlGfkdF@c>FXoN==8~>j% z|1s;}5Rxq_Jmz{-_c<`q^rVr zZbzSVf16a868PEwqd#-IQ+yDZbn?@0aJbewxlx9lH%cmb!}0c6Dmed67cYc`?*6MK zlaDCQ$j|=V`kA!gQ{D&CVq!5dI&(I*(l`#}X$rURF`4-9`-tBA*e;KSf=5?2Qt1CP zQ{Kwr_P543#eQsmTF3r`J#)tsRp@kzkGM6M?Q^kApT|<)y(&9e?q5bzu=rSjY2u|1 z_CMD!@LPDT6qB!p5AJJL{IFaKeqj$ULS3(6Jxq`}?Crl#UkxOn!{iR_lY1#=CHL?A zzB6+>=m6uL`nI!im`Lk%3CZgU+o{5eiV@lCqloZuA3r~=jEszG zr6rV_w)Ptx9Wr)y_G%>=FjM`}$Y=!>nX3GO+ZWs>e;c;hS=n4dCBDk?A^O$rB8;r- z$8~phE=HRr{2CZ#(=E?ZvNUY?j!zpUju1x2z?Fmr&k#1ZwCwG7a6F-;R8&m3ECY8yIGG1me}w)U=QgFF{A=vo%pMQ*t0?E+r);=)4*A z&+Jw3G7m}|<3IoNK>zNZpYykCkW713m7(!^0Cg zIH)cqEiGg}i$+RHy1pmkvfVs0llVSIqptuWVe^jziH6N%JN);#Z`qh$6x8C%qf6^& z?$5F?eLFCt;k)l2;24htZx!>2W^_!tgvij{tCZYRt4ermVLc80sMH|t_}~EA{2jld zwyv}^%%mfN%l+7z-JteC*10$yIe%cC`w0OFN!He_ZNB@ywJ^C~wUl11Bn_6nvcM#a z=nbqI#ojDEdWVr^z=hw9Kq~mjB`GI2w{d$I6NQ9l913xf zeL_r3d~muD92y(zUZ6$**7sUv-vs}5RZ}$1o~pjfi%i3LT&#&bAoU$HSoIh4J8G;U zC;GdTM{yXydCV4Ck@qk#KCTZkp|R^%H=89QT8SYZu#N>|W8(){SaX|WC3+~2=;C5- zTwL73k`mfy&%A$s!CY8%HLGg^_i%D%oh94DV85rP>c+22OL>oXW|Y;{3H|*12#JX9 z8d_Ne1P9}N!Fbr%)|S~V^ug4$*r*BP`Sa(m6&2HqiwQ(TM92he1HnC?K7FdJt{!I6 zkdnF^%Ww0Xnb|)#mlb@OkC!q5=XeIZEr=+Im67q8sOV)x#l^)1)SR*8A&66+s;b(_ z8-%xZy)Y=ctgCfjTC!hE%5DI$0WZ6fWY_vpw^L4q-?!wFd0y}6Xi^r9+y>wR?d@;T zJkQq{rrmeMz(CnZY5`jc@L8ovEw$GrzM7hvf#;ct_|@TnFtH+oa)L+4#5CeWA6!sF zBIF}jtG2jCa4Chu&JI>av?fs*xw(O3C1wXhnh+H|J&z6;w$EoV_v6QegpM6tSXfw{ zaYCU(Ihv`tbBc|a)E<&6-SKC8ZPaU%RStP)=&$7D27ak!>P{!b#p$i2RDqXqml0>g zzFo;X`ftBJU*ll=dWtFNBI`dOoN>^~j6$IT$@t#Z#YaZAogZyjQEbf5t6J5jql zjBXk&f?BcmT1x8&6oLJjVKtJUmXTY5}2W<;l3RM=bvN0l){G~p>vEkk^fk!3! zI+9&qtITr9X6K|YRSpXq+jt~j->Dk!Ha3AxM)UPs&RTW1sQqe<4VTbdW zy@j?C=PjM1;{-8x5(!U_GT8L=wB!1q-}Y3^K-!xp6ciNp_V&+MSOVfjT#7x;KDK9{ z>*qo06Iud@d7Rb!_Kv;t*t&SATu-OT4aaC>Y{nrZDMsMFb#(#P&W$t?^K2UX$A~;oIrc8+K18=oc zS64?aWGE+ggPB;>+Dul$L{BCi4mU=EQ?fe(fTv{&Kh2Z?-gEcv-Ss%T64S2l!NCQ4 z<-ptr=Nko22no|r)Lu7N0)PMht*olD2-GjPjy}Kf^z@_Y?~IPL-Ut#=|2a zp4xSY`DQHSSo4X0U7gs$a+jK&rnN1z_dOYp+Ccgslk9*q=XHtsofh(7$-` zs_qKdR+B{ma14o4`>+%`Z&$Aij#S zO3hN0I1Ca|*TVCgx9L9*R(eKQ3TlRGT2S}e)D_d5{O*H|g8J;6o%#7j zd5${Ai9G?L9Ug9OLEu;V@tU`O>tc6P;I#{@c=@$pDs4j@II`%y;^N}lP(Lpo@-wt) z!Yj|{c!u05-v4m}qVU-~Uu$dY`b}h5*yNGIb38meC;_&d$-o>TfhKf?e zATbYRHzO{7x!WWX^H)z>`&Ut+(av<8olw(x3<#s<)^e|24W4-bpK1QG-ah6Khl7+0sl<+82U-qCS8nG`D1%PQ>p z3W*n@bB+3K-<3ON>&Ywd336ip#z{?XqE>|-{)mqLbzT4E3p%#Xd;}CkD-e9}R8&+x zeE9HAUHxl(JSCe>X;!(pq4f108#D8BLBXg$e_j(jdBViX+T7p&lU^}qc46U5!t~L& zmyC=|*@9VT)bAKRt3jde1hLqsEE>D*ECrlKjrXi=Z0y{x7Z*PuwU_DW=vcK1XUp3r zFI`_SFpR(@ZZjFosZTT4i+|dkYReAn6j<3D(Vy2{|tWH#LKwATiy5od+ZN}9{n5HyQGey2xX&oN>qvjy7w6&){!-m*QcaM52 zfwXSL)6?{>yKpy* z>8|#tutE({&Jtl1)=iD4%Ej#n0MU80{Eng|0RbD&`%B?NoC;%^i$)j(TSY_Hp zR$Eu+wpBJ#x?iRG0A?qOKN<~5i1*G>5Pp2~?4+f0&1Rz9!rEnHSP^>B9?lX-En$#K zB5L0ItTm8SdZOH>sHliWLLw2QdLRMm0VzzMVU<#xV94mG)=@dgE}qaUB$@Rjj806b zsi^!0r?3#>oR=DLh`+YBR@Msk_6P93FJ8Rh^t$#SV%N*+TmCSRq4@RNHv$R@1(1#m zUa+z9a>#qm`4DfP?&lJ*=}h*y0#n?0$Z6!!&Y9nfj$~JhikG)g!Kc@VC|w=)NwNqn zEYxH(_Tb|7N8m)x4^D=@EU3@Ir-hG*2Ng^x>{x`FPYx?Dl;6~y-7#sQ6FpmE#Z^=k zEJFwsVLl2CaSgf5(GDYeYBXmk7hrqm(DnSy(b;yGKCMfCH$(Bkh0iKESyFE9VvWiZ z&a&b(lVC~aw;QPLCO)$hUxGTGsKe#}!Ve~DKOx>BznG@$YWj9sm|nySE1p(&fp5}o z7EF&4kgLMBtf(ui1dESrgBHekRLdY(5iTvx%}>QOFtp7IygS%&dt*eyz;DYS&KlR5 z_F=!kG{r0_ST`pkF!4f&o`}fx^!zc(mS=8*KecyYVFfmTW^c8nZ^aUN;2R9Hys+(` z=Xm!cS_Wnyj?pUXdV1EoF@{xnQQqHq@qw3t--dyYJ>fD?!T;j{C%-`I8+Wg2W%%95 z2!r*a8ZRA%{k0Ou*j>RcbUrgt6MDLmvw$7oY9^y11tVu};1E&qT3TA_j1>qDXVK)g z8fMKl*$j60#I^X1}*@m+kdL|7wq= ztt`!%Tf9jeo<6i3Ql(_5uly<@-6WXcitk;H6eF1tLt{}>Lmn&<5*2Yh|5>7`TKO~n z%clf!8*=wu^$PaUsOX`n1%GLy(V1O0cXM~INgJENi7_9aOH%mPC&cW=Nd?MacaVbT zl;{k5SBK9Rb3f;xhpnp;XcjxX?&)e8{PP{G0_k+J4H5?ZsWu8@j%g0eSp=(YVcdvT zKP=imTreJPqX)z#uGhKf{|nHwyOo|ZZiZH^U8I~SJD_H|S8wo(KI zm?2KFjy6oyGmr1_7I^wdS}L*sHtYUKv%n1ONq$qBWd{b-@cJ8 zI0X1>!FM0`z4=QR89ujW*aOuU?q4O^sZ)laQua=3CdN2RYkW?Hx9QlCXan==cY{@fl0DJuS{pjE+7}bq%c@@;wz+A2bbK=7)ziUI zuLl+N^19Ip7!bHg$=^`e1jq8{FJFcxXyt-F3}wG3nLb2+1j>fy%d>+G^*B_9#XyF~ zUJG&U&GiMVL2VQ`=W}#jKrO`$n|}prsll_ENyx<5SacDzZTQT?DY-*?9UM)L&=I_V zKedje(odkQl>}R=kYlsCxxnP1Y|QZa9xkw4i5{wRs~U> zI3c>u^NU03se*_jV}iC}>*l;Nfo~bv0u%0DXYm0cDj@V2FD*)%$lNCmN?dH*3#HIO z`I`}UrM|h~AKK6DDs(Y01DiB7HsbdsR*y}hW-=}G;F{jeSxUAiRpc{bAy&HJ!kQ718{ zrFuurJ~sX={UC<_O{!KboIT-n12(`Q%W?-j66`aOz&|&YD?et#vi#jNucc4<%cU%XVjH&-qh6Kg>ilw=iW-o z&^)5n_KEuq54xRZq-8~2&AuISOMj;}nO$i|h_Y$nVA+8W;t;abBVcWGQgf4G@t&nf zq-b;2C_%Tjv~@x+H&1*2k_Xp%A*bkOW$Jh51WFe=<1*pQ1$O-K^@hx0_BuwDjEo>^ zSNN#_qFjKRb9QVxTWYiai*L5r@eKM>+T96i)=76s=w@VW`QCVvr-#7aU(1yT%9Ra=UrpAx zKnxseS=;x2o55z#xN{-R7j$O4pfOD${GB5au29FpyvBXK)2Vuct2fa68`a%n;Uo3? zv)F|YS)x|n0|cvh$XY#n>K8L;ql6n|p;CFo&U9h#HL%(@IApczK=gb{Ic<4SgS=)V zUVzsiwSIHDZKmz{sQ3YdOlC$clAXtSnj^06rz#XBp-9z(BXATTPC?uQ76FOaxh#vA z$&}~_)fdqWD3YL$xZ)aNuzIv>)z#~K%^3bRA>oF+;}KQ1b70vRZ^HU`dTSioqc~*x zju~Xcbz;LS_rr7=gIjgxx{E5@fNH=?U$%xG`Tg_|9a(hNLZ@))#%0WulEoW6}5MCRG09?d_+-c(kWsDZxZMd zIKE2^piBF+p5J*|>UC>u?SGdl)(W0@Eeg65vm34I78o0u$uE?n^u7zq?1LQs^w+f~ zOZeOQCUq~KCqW6l-$}u(XySaedtQiz#C6gNpD`ipt8>gpY>=*w8bQG4>i3vAKZt z<6y)&6@3H|Fz@G;o9<aYUotU=73^w{Y}8{vH$dDeAldOFI6Ju4G35{ADP&e{$wBJCLYmQNu1(V3{Ugg zZulxECRj%wh=DW<B-&nqaY5WWO`e@a>dXVdQ>{;)AGH}R2cIqyDx;%T68+P3rS zX1dhbWoc&wl2%qj9uyJrBF${K8)7vEPERimNJ|E3fqKVLFmZ+@>-SNVU-pr?6dWV9 z{>m?Oe;UBt^bC)N61gBr^W zw);}n^MKY@94-gV*jh}OBAG~kk`Q!EvcB;IzAf5wQyq9T$8b86OdGgCgyyaK3$9?( z)unYXj>B7Aky@D>*bzNOLY_TM9k|u+_l(*uMnuTi+WfU^z(7n>Xt~J6A&z33Fh}ev_kEiaH9h<7R9h0qW4Q12q1e0Y< zm-E&cOt{${&0L}+2ya@cSQCzU4Z4*!MV@qo`N&6=xQVpt9fF` zM@B8NwQD%0cHX!cXFGRm?T53CAX(y{-z6)&{sR};*p&<%bRiJE&pv)!XyI?^Xo_H3 zF;Tra;p@!Di@Da{yspHAd}KT>M;Q%PXJSDDF%79p0O@Dc?tpQS=Vju=a!5pMTLTA* z4lF%Gjq`G}!IZ`F;%igSJBfCq?bW;A$$}4@o#r<7pcZrU3cxYQ8$;(Gd3P7RgBQ9-+rNFjs{xQjjW|=SyEP;d_7s7RP7s`!lv{bPDO+Hv z2on!|b|4cD(@CzpmhJCsjgd2}mxo`<%s-->TU|#Ux>ld##*F8}PoVh*3dZdPGYeBW zswiIlxe|wLy)gkIL48=_o@E@?t;j9clMn5V0>Q?7lp&6TBZu z)U_t$M&a&xP2S;u{$DLXc-?_l=8e4C&Z+0x+6K>TxW!l2i-5E&o-jf}^^4E5tt*`k zf4%?l_NL*J60^gEgk`UU?+8*mA2dcR|Ohue5S|zr z)dmYIb4nAf*;ifh1rWZ=sd9Z#P#mm-Q=Ahnh;~dwbSZ4ZZ3NqU^}=9LtMR_Dz`?zS z{^nPye=Mw~qn0?7TCTzcu*8Tpq4`s~@VPZ}h|LX1144Awn=w;;d7!IF*{-InUNB|@ zyIT3(=#n`3H>o}W1Nx#;y_0`K*x$1;-X`XoJHB7i@83*SB2P5(1{l*SP6qXItQ<3s z(>|>1&hKzTHxef=%bfwiIuH%EBRvhfU)=gU^> z`~Q(&Obu!{f+JF@(mI&b?7e1jf6r94XXmp^o6QaZk^93p8ayC^$YNmN?w^<(h1l~` z1b@&;Ojuv|(i3;za945bUdxx43@vMymmcD0i%&pKB9H7~yrok-5QrS4i+!c1wt}@a z>(i%CS2s3hj?Q9vEk2`A;^1eruRcCpPL^BL=vV$#z9+ahEJhsRPg;hp_YJ zpCC#R$9UPghZKT^N(%t0x+P^x%zE;V&ni&Elxq5le0*8H)tm?c{;zU(cdd7}$u1rq z9<6Rn_>YFE2emL{gF2HrCo3tdln_%M2VfMhU-u*>rQLIYE>y=~RIX3<9QKP=_$AZS z>6CqFNv5Tce_gvV@2F_{$o(safQ?z(FLR`Jr2sP{(@ThQaQSCA!|A0UaF6(Z-jKI!R!tG(qqUJIXWbkMU6 z2A#@y)_Ge@OmKI4HT0uURLbw6!V&LIT%Cq~MKg2{YfuQgBsBJ`g008M%(eiE^z)0e zLY8&hnw0J=oCXmk8FX{PzXBARS9+6^{*CV-iQ?jUf{8KRMPG3$*p%$DbrvtcrWw2t zKRP%t>RBUU|1g->gr;}%5&HMjJB+gn?uG)Vx&ySKo>MQ4D@BEm*Z@fG9iKw(O`&Tc zUc~XqHCeGMUC6`EUpn^q54gFxQ3yMJpQ>>wG4Fehynlms%klV9dulBdfs1A>{!hIv z>30C7*U(1odTjRxVX3+H%C?C+mP2+ zJ6PM;4ctKOU~CAW6prs;k;Y14(&C3yr?W*5JMCnlCRYY5EJ`G}l{zxxRBt z-R?Nxf0ezHM<-p5ua}S65a~Qs@_MS?*||0xO%y@8V}?V$5~c zRm|0oyTkaWJ)=0~Zk@0vO@W=EWa3JqiXC}$bpOe>ts18-H;&G~IVNfT2&fAo98S2j ztfH#*y(pTLQV!2#rS|Mq?o9g+QjS|V(Q)XraGotQJK#~Sn!CG$?x3$jcB>MZ2(iQ7 z(!G57(&J>x>Z#!c*b3W?2I6`67#LK8*N1n{!-(%axsmjxE(Kat`qg z{>a3GldkHQS4HacKCh@fn?7$S10C3QPfXAXBde@-i|coZ}0Dq5^l zYjr?Vc2$eE%0!5Jd~)lm$FJHrK>!Bn@H;(?R;w@P4r`V~6H2msY}tP3AgI4%B*e9t z$Y2(r^IOY)ZXY~$lQ*zYML&Z5H*a!%#=os<{q1?_=9bZqV`pMY2F23h=2$2Z zn>w`?hhC+mxj7?90bjk!GG*tp8V;n9{#IzxL4*cC{@|J#5n9>6B*6a)iiq55UJMMV zA~_(-jXpRPU2{_|GjyV&6kc9l)y`XQQ)Q8b<>eVkN#20PDz}^I1`yiN!qw$vcGsW= zBO$AH@mD}&=1hbFy#po$0=c}pTI)?lKR!MNMJP89k6%E*!k-XYEiJ9d;zUP4+yg}J zVWv|2T|>Z;^V&_v_Yc3B4| z+_oF7$3y}(0~`YZ*+fl!Je*m3T=gT{yuR)G*kSKA;iT$$T}CsbUx>u+`ia?pcSx2q zAEW(|wdMZ4Ao04cETIGdQseq8Z)<954%RXgxh?wBFvK8k>DdpgRfC;k!``M>M~P;u zi6DK>Z5?ff?Tjp#>cp>a5pVOg`OX~l3u7Z=DOm$gWI)A~l7*X45`23sHku6%j0z|ZGpzIEY7TUXj2pM^Qi;p1BE^t66|t$@T`1E*f{*7$DMaC~!SI@H2g z3IITkyVki0Q~JNuRSk|3)&PQSJH|3rAJS&yz{&eg@dIN%d`6FYQuk?GX-+;#6{$)n1Rd;{V|ded6bbK{(^&-6~?D29uh(@^RJZP|9FD=}Hlbs^-A zwW#OMegAe#8{OtB8_hqk?l?`hpqwLRcuPx*)B0R)K|#+C8tK!`lAhgznyXTG@D;(T z-5A9M1v;fW&j2Nt2qXge2Oh^`X4*x@_uWsn?*kF==h<0901orD#tS?7PfqGNQ7=P0 z&wP@Sp3N`3XNe&qB+N2%n(+g&k0U8xAW~7kxzV7Yq&!;)max1;U!9wK`TF$(Y2Qb5 z^z<0-t_XqwHpIiFEWk!VwjLz!fKUE#ez;Z< zXl8EyC+X#vKQ(g%{<26id?KROTcFa$s71G^q$Cvv?UF#9ot*_z2o(>vbqx-N0mVwp z?(XgcsOfXHc>q288-)Me-O8&9RZ~;bKq@hvDTf+*{TgQz5MfI#95(C{CMPHV6^w}1 zl2urJkq=mon=VN#4Br5#e3FIP(0tw2w`{nc?fIoHH@jZS)pt40%9bU>TV6Bsu*k5F z=4X~@Iw$UerXw0*;YAmjEoi5B7Ev94ctU?xQM|8U>Dq3+Zt+3`-mHwgGSIK-1$ES_ zzPr%r1$J0?xTB}_t1nA{0`tGsfMOL0kSK)H&S}~^J70*2 z#e-6Ce}A8mgQKmt_c1v+Ina(+3}&g+nWB~1%_MZi3WSD)ymWOH+}qm&`Kz&^p?7*Z zU3q!Appekq=4N1KrdgiXhpw)!v!zJL>EVDf(9O|+&{$h5%F4!OF`P>Zbb_O!qv|5v zzkh!Msm7-Q)VV(Z%8jAWzkmKD0ZAbk6o~l%=na<22S~a`NSgZkLV(;8i1HLZe0Y(m zoJbFNna`i^Dkv%z6&C|#qOZ5NxA4>JmqX7@fS#Z;itEcjrt%XiDkWK23~g=gsMuJj znVBz;5l{*{zN^r$inpDr#wR6xtZk;P?FsB#cDrS-I*`(>gG4^zMqr9wG$^2g*hhP86JQVA1RV2o zD_>?B#7`DIC$0A6f}u;B-$C7JvGN4#Vn&BuR(5W*6ibXjBK_#V1oFnHyFreLh25yJ zmIFZN@9O4l&eq*O4)mK(PE+5<4l00gwH3;UfrcgpaFi`NkDJa!kh<%xmIM-tcbQ~1*MrNwC}#XkX8GB7!Y z-yuxcZiY+R+}_l7YN5%A+}w&9Ol1Le8#wJ>WSIR>(@JCiQ1($xpQ%>-@*W4?W`HuA zm|LBG{4Ee&P_dZ=sG3eB(oeagK@t+25%j2&;`o8CQrvup;smUyeA%y|8-OJMXno_M zg8|CKBjz7A5~N*0YJG=?wtjxeciem8mWI-^S;}tsD(m5K!8I(&E8PbR-F2lFiw2o( zTN(f{8Y6`LJaa!DXsYq4mUEQe|LAON9T}91E7BTOC2_E@xL0b||9}=99sBLu2Y$#@ zJ1c6Z*Z?pWz+FF1aQ%dAqXo1)8z5;w{_AjiaQ(XKI|jMxm=TYdc6h%yZlqg zr#s*lia@cp27c6$&i4c7gsrJB)Gpg5-OF1*Zjc?&^6mUwbf^qxW@q%v-S-=hKnae7 z&H!o)RGDXro*w|&a!;{S0?`4p4xHyniQ|X_~O!Cb;Qw z7uYX~oE)m|B-412Gs5LEVLMH=_h2W8Bn1`(-Sf*8C+KeA&nx2QqXXE}6Hg7rQCAdX z$8yVinv6ecDcMN0!j(xT-9%wKy~;Vpwa~mQ&lf1moSmJ^@NyY2F)=Y7X3zNkA|IQW z;4yBF@0kPzZZb$+hKSzQ4v+wc((!WWt#-1~smQ;<{)O#xS3;s*xX1n<`e|Jd&Zvuu zlap~1qLj5jjcp#Ta`GegD7R60_9r$@9VBj~n%Xye6i7q@<@o+#4`jqmr2*4NO8Kbu z3TVv$ldQDBUyvW~w!Wb}h{I&5reaHF(vnSN;by^CAO>RqA$;@FGr3iqT$7mII&KF2X-+Y2jo4anj!42?&QUMSO-Lw{1S$qa!9Zjui=7 z-~GR%Du=tr^KE|@+)UOIYinN}^)xm|HU3luPGYc1-K-qiHgg1Z1e&lf*B6u^g&-ka z%`uA;pQynEoJdw%7uZo7LGiOay_~}oSZ!nD*ysp^YO%xiY1|FjWFlBsYjGIC3h_)s zODh>*So+Bg@BAk3O5_e<855!qbPsC4laN+U&Vt*j@2D9sdjz@+xnlt_44`Cu?aH@5 zGz&-=S{q7&>XJZXmv*)Sn**+(7c+&OjF5SgneE+hrF9(0|&3T$x z@L`1$aKVg(*qwH+lz<@b@J-Db*%;^jxDrzCX)Of9hN7bbmUk(CC$YEg!rh4mBnP0o zWdU{ewRoOD!H$`NnGidJ<08HF{yyvGmg`;t1O(@74ev)Dmbk#J3t@E#yjo&S*5?9O z%)emS;saEkc^1RzO^6|vsvDL8^^=thyq<0Y#c0$fac6E$#UCFscMxk5Jn zPTiY^(BPkBM``KIjsfw|eUHrLiXblry*gdawKkzey*KF_r$y^mbqVp3 zHOc5Oi?#P#QARwFR89us6*3|cvh!i9%CpiJX9}z`Zf=5n2hBB_0=PLu1y#`rC@MDh zdq08x3@9kFVSzwqwH1IwwlqqeKpq_1O!=2U5RHI*$MQqcigDlpe86K;-(q?7L6#0p zCx!TF`Bhq*3tp0L$1>|9@jn2mF2=@&m6xadN=1j(II0G#F%nw4E?ep8i@k0ZSf=3a6>c#lxSH9>7B@Qmz*4hJx6gmmV*V zAZS1!d;!E=W>zeE2BE@BP5_)Sgk)t}I_ z;88=NuU-%r7|19m%h9^)3iBN`w~XsRFXLd)J!dB)`9-E;92{6hS-Jy{^3mr2XQE{a z*BiE5F%c3!M6Wu==M#zw2}J0Volo2YHX!FB+S*(Wr-m`36EpMCl@1V5u5Wc_#v}t( zsjb&Rim{v*MIm8f;XHq4Dq5ioN2_m7hzu^Pcb>Hou>Sy{l$d~n7{25fW2ki~0q3($aCl*;3+n!HR4tnjKjbrWpT>8SC`MM2kX0`Q$ z^>`p1Fb1^akO5@=cgj2F9eXULfA-o)ei-4d*}jl|ri$5ttn#Aq2SPhr$QOrz^Ajb) zhjs%iGX%BvOi=M_JpDEajgDiezov* z)2I2Mx_h1)Iya4ed)D2CM>_^l(00}=@k+&l6vZR)j7IE>8pPvkV%yb7s2jjI&OML$ z02^e|M9wkOSJcCP8c&BQCB%Ip;h5VqzlMRa+q_sKVK5nP;1r#pV6n$Z;q0jxDrkUqg6L>W8k>is5Y{vGY7PLK!nTX;s6?|ROkX0#w){@Wwy z`fL%>^#N4&Ll)LR1meW3L=CC;`3l7^3$p4Hlk^{ONv$X_B!!}uyh&N)LH88rL{*kWl>ceI&L@?ANeHf;E&w1I4L&61*J9}!cIe=B@ zar;m765;6wO*}>d|04;l?8w~$Z zPF3*~puWFLU`}Kg!-X&7YQvh#|CkH&SxHGi5;3}!Cc9H+(e=7mvYhl-1c;Fai`CA@ zZ?S{7Wkif+~eTlBb_AuH=> zIsTQsE2qHsqh?y3uhQh`u=L(Mrb9T|C(vXo2=`Wc^5j-)ekEppi^n{cB2E+3$Gh#R zyi|J)(_ye>RoYk+B)(Ja&A@<1shOC0myYo*(#rojk2&WOUHZwPhlNEve@ugyyZ=+wlS<$rU<=PtyR7VNb-g^jE4_(1V;3EE zgv*QjeSK3TB&2J??|~iW6FYvtF?|O*IVxHH)9m`~DA7ZveM}Ie?)olBU@a%h7kx-c zVq^CgHL0yOCF@k;ecab9T9w zA5Lf3XVKA&EX+k#i%PLC5o_XWZ-7wp&TBE^p6W4(@zQF8+Kh+l*5m~0Is}J7iR%;@ zY2J@gjNdN!k6&Ge0>`{{MzX-JPC;S<+0f%_vX-?R;?P@#JGPjZEb4|nV|auZ&u^Tx zv+4)u-9)lfd=@~f()swT+mw(8jfuO6N2YcgTAKKQe0@lWozBSELS;pdjdF|hV9yIp zu;c^{v=%VE1s!b?H)x4TRAp3j)qbY5YUGZxF;;~6g30Y(s?oD z-RiH1HAYY{uhia0)2b0P+S+^t%7#cKSqC80hgu16BgQ;(<0xUf*3IdE82t}$NcM+w zv}*JcRWj6>g=&o-`Uq7C@>!{_T(I2kp*_y5++5HIgUQ3g6NogKQ>3}JICt5RWbm>B zf%4XoD*cq-;ekepAE&f)WETsMDDO)GvSNPkwa59P`V0|Jv7BRIly2f@!4yDl$9JH>(>AOUcPTXky$_dHjfkDNgWWHGJn0cO6AIl~|Dyt&;MI>(#s zfAeYt-JgZAL*;(8WhRK+rj-el6BdMLw-J`+9_aq)uXS@kU7uS`yDmJu?cwR=)?{$k zi&xjISpQGRnKUCSJ8GI7{)U5}$;ig4x+6hMkg+{QK~9h{od`S$2zmE5M~T!}I^glT z$?uZhF>bF$&0pTtK#Adiy#*H&`IKa#3^g|S4cdhmyE!UzM6D&eIvK9vbE&br&Ln?>{WofLe> zsr|y}=O?aDFG}xe)vxR|0n}Z5BTk4X%IP>)aRkfdj=i&&Lj2X1*ksiHn!A>W(Bldx z%wMoQv{K4JJa1YnftW+((K0;m-mGm+!o>gh^d$BRp?^B$pS%tK*_%aW&cY&8ck;|I zijA@!Az*`X+|ttHdume?c%^JRfhB1!)^4tc7g5eHdemXu+T%{i;x3XKPZ7mw$FF={ zOd_uv%R)86Bly_+^04!~#mLMmPRP^J*C@g*_70+`uxM`acq(j}nB+o(AyYa9t_QpA zC%bhySV`#OZQC=A<%2D&*Su}EZTw{CUynB4 zUlvj;8u8&NdRy*A=LVx{WG1*#0lhig+sXKw~9oW4_Lh6X{2 zB_&#nKlXh2R^xCBY2t9Q=F`;w8lm%FBYY>5a(9ulG3xF+@=6`q5JDLR7|A*%QnkLY z-(0p)4+|rxM-bayckO}yt70*$Aa7mfF?mXHE(KbdPgPOgMcN+P=7kNNA!D$rfu2H+ z2prPK^`A3oUW3L99}B!{3I+?N`#A2Vyr+)v$|H*TT?qM}%?yUF8H0}V+IJu1L*EhG z+0)iM^@#8%T+UzrUK8;TqlFPew7XVf->>_HqPgBHoei4A#0Om~*i-kHwk5)z&m8h6uiHQAv0#80X6$LM4W7f&Qs zP!fl%jrKRjscSr&1=V_0O=0txzsHN4HZFV_YB5-#ROSDyNS1Fw^!8Q;i(F&m|*;~AI~htsa~QI5=MkUk9K;w zksN~3K!I6O`)A+gBUp$4HiAbow_jtA>hkhNo0R9hv$6s9=g)_Sle87hwyzkQ1b$K{ zErG~Rw0L47M z;B6lKe~F|k4LkQ*k9%HFk~O{#DNjdyP;U=y8S%cUuzL*+DJU)Wsbn{pcZpBciRY1vwwr#N}3*(r;{i{WC?9;hN??;S6~4TN2l= zUW%lJL%Qj5gXcrn>O8xYZT7?rl>ggPf_Qm69nSTiEad~si%W7sE+J3@M+M8ZhfJSXqyyHQF0PfW1 zt7Y<&&GD(YxGG!QrLAbeIQma&ec949vc)%uQoJ(+Gv-Oe&U&|2Nxar^u=daNKnbxq zjgKOGE=AI8D8dB_y?@MybGf+c)c+0C;KP&{6?F<9!*)vb>FLE0s)BI~Mh|{g|6?-WYkVZW{B^C;lC$H)L;>?k#Z8WVF3+Rmr|d&FyOXAC{R{}J(yB^cS@t+6 z=*tf8m!dEx9{k&Y!+$1#0^Z@`@zBqoL}3OKVa?*;=TL# zy+~;K#7?QbLmlD}d?Le#p-(j8uRAnW7{ytyWR#nH322rABG}H{0<`OpM-1M_1O7@m ztLw6F;s5yizrQ~w;{Lhv|9&`Y9r6G56KVT*Nd7yh9i1BWHN1BIpAY;AC9_AEje7C= zQw4PmG@YHf5kOV(2GH>2lxnBT9kgKz!prpMR$dBe>Hd3ex9hj&rRgh$93d%DIu<(oKDzNK+pU_#4xBZ#opS_#rK@)Xk@aI&rz&21Pd#dk{}AkM;&dG`K~CbuKL~g}S~Af?_B>CQCp9{h7W$!HVOuS*Ee(J){YSfzs?uhdh3t4-}E7rf( zZ6hTqK+@SNA31q4wG71&Wg`=?(A3*s60;t#%wb?4;7v*#_m#ctj6LV?-)cl5XT90_Pp93Ceo&_<2Co1fIGsM zmX7|l%U$-%pOEFnEiZ+v)~RZIp2C7x!kY~&5%#eN}(1n2U+NxZqcmrJU zHx#=5NQ~-x&HTTXUfO->VZj+j)wG%}KpixyVbd+uOaRD)7cL)9f*C?;&;;dNLk zl-oYsKmIdJr4l?e4K9KO6IE>C-Q;zGY`Zbm<R-jIHW0MSk&Fl%_#aVVbw@ zPPqfrPmxv59H`uxngqt!*07ag#_sUzlEC6$a+fJ;esx1wCrc9Moy}V-LEA8M&sf%< z+*#~SIE~(v*>4kAW=qrEmBq$H>Z1^l?`dnEoXj}YT#@$UTQEI%V)^*^CH&bk=(Vc4 zhVQS8<%o$vr(M&_qUS=hZnJZ9@LFx4Lv<;f!G4MiBu}6q`|goA+%*3Sk#+gmq9bP< zGRBNQPVZ`I<8j;afSBgNBJ57s&12v500VV>YD_TxWbWYb^|)t+f53jTXm1OHzbo>Ryq z?TqZ%aOnxVymVZ#SEY%*sIsu{atAeZzpaWJrKh_Tp&Vw^D37FFdw=S{T!o?GsWC0b zp10p1)@h~u$0zFRr_Dh>=I5MO6J5*^W2jh%a`Dsl8z6LI3=kYT)M5z?-xLpo@c@-db?0 zB)6iKoZ2^TPFwqeW+$m*S%PaF9ncAk6p~<%6-tfsu)Lvavhyo1!SQGO_pD`Dksqh) zWeEm0Idxc??ruXIFTtOX;%ojh1nqs2z8QC4gWb*Fu-ioGqLz7($63qb#Jib|Jn=lz zi!5CrWl2f;`BG=D2}hhswx5KnJsXRJ#rYmvl9E^)O%FC34?^c3FI*#hGO5Ncv{W@Y zZrL#kbvl1yE~K};ZF5vzPS3nySwUA=Sq`}^ASTAUoVMc$sVj%#p_g6=$|g3DaIBbI z-9Wl2ubX+T*^sy58|*JgBSo6Pni#(%53t?=^sX84!ga{e{KDzE#iP>|fHgVK##B^} zh9+r$wx_HPQKn+ysozh{kjc-*R~!)clM0BbEIWURROGI*L4D6(_k!Dm$V;17*Wn&M z*>0jaX?uG)On?bh5KqP)9vj8;q48DPGWGmw7SoYTknoJwkcJz;;kNB$EdhxDOzjHQ zv;X$1jN@Ea`}qRPmad9YuNm?6CriUF*hA0E2!QXZzk_Z-Wk7Jeys{#Fr;RN9#bv40 z=O~|=%<}!e5lX;`V)l?Jn@BKmK#(r$2@ii)sST0lJ*QNvGd>EF;|;Ih3hDFEtU~l$ zn`|07dKQUy6fl5ArXDg=b#493ECuMrc&xbEhDyuj#^jAukFlT~!X6?6!Uyi>vanwk zH+%*hwOzDmF6#kw0;H~(oEoR~B~KeFSA3AZz!vUHF2YT|emhw5HW}xM>Sf*FD*3`7 z*a%8SDJiJ}ba**cH83~5RgIrHF!sOv{JC@^c7eOp5T!@gJ?L8CvY9UM@Y;lxtIm?O zim@L&fmc>C1Hj)z+SRq=caSrZ#h|XR^bu1N&I=5;nQAglTAzoW1$rF?&QUlohwpR6 zf%NtJ*;W?_VQh@7f+fDxUGYl>bAQvxW{r1tGSWo@2>pjn_*Xq6Yk3EZdA*kJr@O!< zpgWu3gGtPuX+y0)nS4?0SvqcPoKR0GRy^h7EG%yNjQHZeZ)BaXv$U9u8@h6e2E_5W?+t3 zs-A7HrA)vR1l$fH3pLFz*Wh5|2iP&FC4Y%cR6-AKru%-%eMzbPS-Jf5icK zi}H5VgwOjy@QkeLoa4qBDGNTeAVV^km<>YS)+a|uzq@8451xAi`2Kr($P0Y#a95Ia z+guFopNrOGO@U@h<2hI|0uLWy&+&W$#{<8mY*;&yXgUgXRkS$=DhT-oK!&w>hb`m;NbhIk?v} z<$Ia}n0c4%z~ZDY1JRusd4eQ5CZonDm~K?Gu#O}_QJ=dBYf15;JAbVOHHvD{+N_uG z$7=&x4w+D3A|0u=g0DQTEl*q;C4{zNO?Y-13}CvC#fdV0uW^n~Ogz53-e+~YI3SoI zaoHr}acsJI$O^EH#2qD2G?0lD?jKtj?MkI-08@29`$KBvs*> z>JtboN!6@kr$WQS-5!Idun3B_?_WxO@wrvFzy*T1>x=QWe^oJ02})sJ$+#kZ=#aVu z%>0Rkp<##uvOSo0e$33buR+d2b+}-Y}l{oswLBi>3^AQn7${rZmE~sqtYrS)iM_LtDZexdF zTp{P3cvy{wq8${(Asv0q+~Cb1ay2+Sq^^RcsVBZoxpFQoB@*5kYKwR5qyY$q;&v3D z@4;>D_#^)7?-KDbr=$$<5Ze5DT|9F2 z>&)=9sb`fl`3q#3SYGshaXI&h9N5d%_Pt*0^$`A2TKDr$Zr;9ci2!aBArT+HX>Xhb zYY7voxb$lyWwtgWz7U~Y3bs6I*Vy}(fadCv<2%%@t@gOKsMR$rJbXR+ki%=3GYDvG zCSdp{aeOV-=y+bPUj>`{l6rp0HrM5?+U@)8u{+b)GUSe$QhB`fyCkPN0*4@pIZ!dN zdrPM>fw2AhIzLytkUlsMw;S`Yzum@pt#0j4cGGuXWs+) zKqxh-kxX$q(L#+6teryZ#_i)3irr3cNo)#p(|D9>$;A-Ff2PiR>==jTE_uQR)7Z<* zRu_bf)POZ}%WKL+c;TRWtNCK=Hn3m)@sJ%lXlV$SHh4|h|lY|x$|8$)EC6{k_AvuTa4xQkm021vElte;ExM8 z<*qQDGwifEu*+6Ylmk$p_Wml@#hpll82#P3#LCAPBVU=*9ZEqnns5?Nf@~ulyVTDK zY$s3AGgfkVhfFGoF6QKxUx8(HbbUQ(x^&_KOC4M>m)JF!RY}%z!GdUIRj~dm>i+H; zmy7donHlwo3x7PM|H0x!I`McYMiT!sVMF!WQmdOYZQM{*d$h0LlU>1Sfx#P^&*)-b z6Nd>JD^O2@74*yzgAJw{om4l^qdZ55{{#|j5uZKqZ{*4w<>|nYp_DJ|SQi zu~!caR>WPFt`R$T5mplX8o1!GR8Pi##|AJ8xDN#twf0Nl&fuBl9p?o=Z-zA1(;;u+ z13DVfn6p9|ZN?S^EsYxpXC#*FFZQ%M!-R4bY>ae2hD3--2e`gN z^=;$|8U7{|Bkh@NI4~dremQW%t*XV!3p^PBcqh)Q0v{Xo4jmU%@Ib z7a!03xlRvfq019(raLf@+ja46%6mk>!Q@RaC;C-CT)YWinOG%ajIm=Iw+;^7H7(TL zT&NXR7926i?o2g0ttUO&T^Ts;Sna&n@|mFRX1VcH0uar6@skF#lb{YEDveJwg=0DmKOpG^Yop6@@oovC~Hn?l`W4H5=Lj>h?eH!Hdgm`Za3r;oSd}oE zQWSJ(n_#nKn_$;#J-~LN(@elX-U?3@j21OQ6NYSN(9XvXVpEx&3n#YxXR5FOeV*WG zZp9*_-~0T|X@?)@e#4|G=?JO0;p9<5x1^m$^5K&BhvE+a`+;oQ*HXqhKs0h|aVMY1(@-Zg2phod}^~v{>J(-)cxz8!wor@bO?R0FL zft|+|6A2sRcS6mbBuKvjr@>U9E5Cw^h`zSDg`ss}nG>YQ3a7Di0E56Vf6%DL-*XQJ z1?p#(b%_l31FST^otja}EE)9l^!QGrmhQ>DzkR#d_q^BaBmtaid;t-h)AM)9Eut;V z&0(vWs;vdwYVgU$2*B=`zYozhji<86El|#cEi7eV>z)YyGiAhpkkY%|jy*xL&dHn8 zP*JlhlxfRp_*3Ga6cTArIAf8VjNZvuS28*48-{!6=?yKpJnl%EBNcuxMrEVvII6~0 zMD|8f3bDi?hg+(S!%$Q-Hf*LNS8R4HU?Fqo{t-w)FaRzSu+YLE_V<$JU4N8i=mU#r zx0v$s&K7Ts)W%WHqGrP#6DdkzcQtRrSvf>#y*hmj7<~oV_Q==-Uoh$*jR8ffJ0wCy zMQrx{+wB0>Kr)t^v^1)4Ka&UHbx6H5d?)hF$wCcu!3bg~YCb>7F%00q) zzGe0ft-amD7j;dT&zL$hu5{>`iZeHm&-TU^E`9eAs$&=qb_QdBbLxk&kNy&DawY_n z*q{J^5sw&|$jB*Zk-4(I4h5Ae%+0O3z94ymq42k@@-x+}TcFJl{F0P8l=yfTRB^Ra zxX8r&*H&U9&hgj+r^1Gf4NaYN%t`1_BNgSYMNpgKorat$ z4@4F2#YRR6oqQW8EvYin{_5D$fTjnSR!&Djs5-#FG+9qG zar&xqZvlmsPAatMK2g57`FAP0ruEq&l$Gm;%B@T3ZM`Az)7YsTdaKzyRov?TX#ore zw~BbBC10pb&&UJFXdu=6Q+;R9rNf4NsusVy_;ZSM1LKOHm_0RH9xnhbd?t%KqAQF6(5y8zn=%-KufLl84E{S5GOBCLk3T z2d(!sL}M8zvO}AnWo-S3|0mu4CV8(8D_WzkMeu(4AORgdGw1ZwMDju66Ye{XCd5Lm zW&<7bpRKgIo<6@1BU{>;DN^=c$&;ywUR|50YjhSD#t*q5b~PmbB|y1q`+ zgwR`9a87Ca|8c4C?8SAEZP(%n@l)98vXomB0vwW8Mn3eLDA@R;O9;4w ziak=p$#KD_jWUoLOchasjcxs-{J@GDr{4#kIyQ6{@Shs?3F-kd`|;*@JUohW+N1%Q z!l0m3%yfBNqlv4EC!)Z4xzWcg&o;QE?o|HlJkcp&42lf5uF|>2-B>?du?5Y9EIL-h z77SZ?%LhTAX*bWp7iWs> z$;Ift*F2|SW@z7i|4#y`=FONy)~W&ZE_Pzki8q~{}#YxQZH4p=800fn#TO-BNXZ$r^ z7@pLh8-R@5H7^PRt`U7UG&!TJqM+qaOR~CN96;NtMswtA4^3r==Rg_)JShRF_n~7! z)^R;#|G9>c6~d}ddKS{zLv0_=f^zE4;EooAMxo?>2%mOW-+fTR47Hkvf{gjfL^=Zq zVYCXjS8lJiwE!GiB1hX#pPo%}5PoFsvj6_<_Kcd*L!bgS-Sv5)2y5@brxogZ@~k|W zvyQwTQ>A>FsP&ut21$F@vr9UrdIdr-idO>{OnfN`rstZ zFg6qcNp&n>{$fjYl_Cp|^ow%24NFuux94VDS6u9fZTB1kee&50O6NzX=EX;%1M1g3sA^0d^Q?(BzD)-r7jaPKSrNa^xdP3;6R zLLtftnLg~puuO{nO7?yInU^lb`hc;AhmAuc)>dq!t-#dkQUW0F1UEds>SU9U6da6dNCWsvV3-;f~uU=RT;|n0z@J6?S7e5&nFvE zf#0VfyzYNtin7%$Ck>sLN+!dRX z^A`w+X~&y-(7HiDdutumY84nx$N&icBRTH78LA*~Tg;X_Y%X}N9+%0>vwSZ6PF5kq zuK)q|+<8C^PVXR-Q!URlp<6QCzyq`mHx`hcvj%todUu!1Te5PUs(w5#4g#$K?Q#gh zV1^~7eD&z6hH(mtHHK1oniq5>hEi(S-BQ%ue+9=R^b=EpO?)zQ)I!@5-HHjBPm$Xa= zbSfOBDb5NuVF>$hceySOG8dcIo43%S4bCu`&&OD3Y1NJYp$(L z)7?*YsYZY9ex>7W^kYIbNXA!`x*9}7v#Ux}?iUzfD(hg*_5jyteUz%*Fye^Rw5yTH z!kT|dGyvoMbBCP|2&s5~8{DD^f7O(=_88B}n~rKbB715egei8u0o0&}+nzGWD`z45 ze7uA;<`Xb^bdt<(vwvSFp4QtnmTld;g=5^!T@$U3=lahL#7ZoI3+K0f3`VL`-ce`l z@Neqg?%zZs6;UG}U_xokF1H%MlDw{>!am)0b@mhRC*ZkFM-g{fJYIqnz>~cxbhB)! zi3)U5fjC9pa{@?r9q7dh)D_aEP>#`y+F~yqIOW@35GZD08S1F^mhuu=jhAPzz@Fy* z_LNtpkw3MjkoN=s^Dv?GG-oGXz;mJB_v7MhDdt7-9ra}wXkKtL=Vt(^N3BgF@GyWK z!pB81&jGB_^7ekGZ&=f6Zxh}MJAiUa&o^b|!iuPF=WY^N?K@tmG&X*n?3?)@X#WUd zp{fv%<+d5=9Hz2@XKHKvdY`znCE9Gu<=i;ZqVVbz#7ECSf)eCc*v~2}%E8*7J?`_k zkeJx!@Hp3I54@k0^SM631qSk?lv1iEn=#Lq*f6c6 zqF~tW{nkSO;MR=(`?%i4r(qeYlATUnX(Nq=%VniEl9F5?0KGKX`btK-y}V(N6r$BP z)RWv@#-g;HK&heSmLPvwEIJ3$ROAqu?1|g+A0@7&VAN_Z*fK>$xNKd}MRS$MG(48S zfKwf!Jz4IZe+XhR+_zD|hhdu^0Gwo|4Om%S2?4@@8@keBxi{8_$1USyV5KBG=fUe< z1{3#_bAapgct<=+lNLxx^-+Q_g0^#|q;?qL z;QRK#bac3?qH^hw6z6&4qHU5b&g@U z*AyBKUx5yICnuuH-kh-0Ex3d!0#3(sP0z^03$8UQ(4bK&o+w}Ql(IgvfaXl-yuIX8 z=bp6P2$C(oYh#)`YNiCd!8Ues+ZtSE1yTs@!s5~-Z0gf0?wnlXXLEA)m{Vg~u(4_j zx+qg@>grvQ{Gm@qCU8~~&2tMMrloCq!dYICwh>i-x#KMhIA>y4#|bmnZ+{&CTOhTL zW)8~cI9YI)2&FP#Xv_dls%@eXJB!Jb&{{Ft|DV&$r$6{5c?5L z=cA&|`}amT;{dMd$n6>_$}32kyhr4ak;!4`Wtve{l-Db((bwLc&;Xg1JF7vw*_*`U zW#~e&F

UxP%S~PY4b#XY3>D+MVG+Pyr6IZ=3d4J1+Op>$nJ?^vrCP+2@&lO>EKh z_lOsd>FsQ5?TfxE9?#kn1=w~?loFtePe#hF5%tVl6&hfDEPlgGbPQozkWG9K#E#5+ zyAy+^Y%e!f^;TBpHqMPtZe-!I5R%JvEFl-i=4Cv_@tLW_$0+A@yl~CTa)L&gK7YNw zK4Sb|y`w+XF95Vvr7SFUAo|0-!w(<}MyDjlE|kl?^b8BHSGFh4;V^Q(1s z`l; z>cpCbFBbKjUg*ftM6-7<%s!5-!&Npinm9?Ht^B2wTXY$07_H&n1y3?aMlY4yUlfPH z6Wrfi>>7;%Ol(|7HkU*^ke`SW9MRcD8Q(W*OU~$`^ri zN(Lw?Kq1V&9F|R90-zbFpFkSZv;qlwD6M(95^|rfx^VAy9xt&Au{oYR>%)Ynhk+~& zBs-ML>roL8%ocokQ`{bZjZ+>1%9ItCkjv2{D^muOSC$POVFJP$v$;Ii$beYAaMWo= zd_J}?oUKd6y#68i7UZ#vhFcvi--)OiZB^j^?4Xv@ko)l7g$j(^>~Tv{o#V?44B9M> z@n2C2YR>LB@34i~q_Z^mr`C9uQR!^@iWd8gRJh`GXLPdz=*a(h47N854uz#9mASS? z1=F(y^X2?Iygg?wB_&lwj!CK-3o2|)AVbGoUO~Zo$k)N`Of&W-$vxpY!I(Ej8w&n{ zCaPwl%L3*^o|aFI3kJy^9KlQt%olEr2~;^c7EbVQEPAZ;s880`J_Ch%dtP_;XZA;j zR3K_c!E~&f<4@2zU$E6S8Vs80xqN}%oXYf`Zsey=O`&15nYu$!+><^F>lh^a%*X^d zG;3#AH_HEOP;8HKx!ql9N4785?VcM(#N!Al*_6tCu|n01Qdn8>mFoLL)m(!2FQEG? z;i~B%?k)V$Uh%9|#Gy8{=9^?IQPJG;dWJ{CtoFed&ZzRp+8y}B|L4nD9k@yy4v&TW zsWv~SyK%B{{G;&v`-SD0tTAC3ANsE}-*af|wVDVC69137%%=(c^Nzp{_`-xHi z9pq)4%y%j$@H20}zSo!+pg5%fa?RMkNBL_8M@x?L!nJkV`V99v@aYpG^B1VJGeE`e zCjb8Cb0&)!>=~T6no0TZFpc!~w%NSWHu!lH=(B{pvVH6g{QjEe+33g~TymK#{jcQG zjPf_s>m+a(iUHS+8b{pFTa?P>5%B(P#$T0F@IP06{Ev8JwyciVd712# z4IXQd+hzs6w`_tdpTNO+{{$X2qPf_TVl~yjRSd@`)8XSDB2y^GGTVsEXcQAd9Qc2~ z>NAz!S6i+AR)@tKs21BTdL^@rf+12GYE$eEG~krqYy9gS>3?zwdH$-@&DAo z;QO=BP*m=PVB@DJ4d>xNw>CNwLzyZcl^h_$L^tMdX3%!_uswwbKi42@M~_Gd z?+X~&K>Rrf5N%J-}~j0934JhtK|;S2axInsnv{ck2mt zNb$q^llDT zha0pvdYaYm2{vwqyeWt|8!*8w!5{M)WyraGZjz_|%bglpspdpvY6=i(F_4A3Ki(Z{ z9zO&;k2q&BndLQ=B%k}28uMm*7mSzMgQnQcFY*ibDr zR|ryhE5f28#o?n&A&BcE6j1N^Y)M$$U}IS)?07W7ZTZ94sbm%nH7Y_i4q^o<*9-9h zqu5!;^I|Ev#CSs8YErfrW+>S_VYKaQG$$_ckxc(umyo}vVyW0bhN2HA%xOOqCbQi& zy+iAF*5KW>?`{||IAX%yFxY}s_-puQv7`kw>N3b=#;&x-d)MzP@9lFbZ$-L*{ zOoC7>cBPmUYfz@0uJq+Rk#xxLgu(#qf_dM0#F>)M-fxs{LCZw@n(pvphz+z)I^zK%u|KTb33!Y&6knim&J6H9k{#5-Tq+q;Jt$Ru6UYVXzIemxe)JZh%i z`}K;o?`K$LBY7C(6?(Hna&V5wnxH%$oN`JxxKYGK0OffT(_?|5rf@pS^6q(}n%4 zzkF#0TT_AcG()0(|F-zh0xR@a*|*UegM#}Tewmx{@Qx9xq`D4UX8t%}dy zdY9N9r*TmSd3va||0>2gIxc`1k3rtPsl-QcX~g)-m`Odf-;%4w%N3Ct?(0(-a#$0& zaQzgLx;|MC5`1w3DcFJ~;0@kzY#>i#68QLp6iOt4Y4rBsa7%4+GTfQbOK{#fU=>Te z#MTCK1M@+0*a}cll<3uM#=7m|y1`HVtRcd)fZ9fm>O5b)9Zs zCc#fTj;W=euY?;aj7TMwl_m2{`wj2KQ#0Yy(9%#^$MgjTNo)$D8#f6e*+&=0=NvMO zfKV#A-0^9@IpxhlzC?4oLFPdt@rlpEh8UccxSf^5Zdb(XLX$tf+aQ55$247#_7hEt z?lcMGu^#gm5GQdp>|uJOoTAaK_rEGJW8!-#6mm=5e~U)VL<*a01odg3?o$Opox6I0 z(*`9E8gWQvW2Qu~#Wf~mvqH6`q%`v_Yw{dj6Qr}O`0~z2+{yk{Z{_w2w`JeMU`)xt z<1bv55W1n%UW3zboF!$BX`StUX0YQw8-ke})o+^|&P;5ORn;CVcK=(V0e4Y-WSpF_ zIXN^`GD#l{4GrgbAKMTfo6S5?ZH{qz^!=N*-f==$;y>~+T{%K>|OE&+EYVEC%%Sl_IGXl;y2bYr* z>tgA~lg(N0qX>O4VJ7f;x_WTI!AT8nkN0po4mRfBR=8e2$K9< zu&;H9Ch?Xn)ZB*}>VmaRXd@g8pT~kK#^G$Tu7+9vD)GY4k&=@=poTcnjA)`-nyV$; zJ@}rx#uf2k05gcDw?Azrt_#g$o08sQ7j9gm!PUUzWmJ~&Wp*#Kp-%H;7S?7YU%l&b z`)0%!H@CE#Ik$DecMna*NWhD^nRfVmLr9vz!YW=oF|@ywefZQeBwtsGC#)v1pPc%^ zvu4{Mn|oFiN342#e(?N!B?r9VkwC~wra#IrGI!1e98)8m2OGDM^=DG>Xaw`Dj!akl zCu}Wq*BP`-3aEgWwrgSo1Xu9o^JU5ZS2ZBg2s#kKcWiWkO}2n`w1m7w zWR`EhnAs7U(fHYXzRlCP1_^dBI`6W_7O`iaer@oK?#^K<{1wp(L#v3!c6#K450#WI zvTfdeL8QJRGch@y(U|UQ`yT4^g|PGP4i!xKz1$(e9L<*q8(|%zyn8FkE21}Md_F?& z53qwBD+^#m2WV)b*<%rrZvCG%gD{>Wy!@O+IFq-s@3b*VP~PE9evRGda=79wJ-A2iDN0fBt)aCb>#%6@ZlP@*0z4;iC43i9r8E_GPAJ)_r; zJ@;Z$da`Zde=-E{z-(I@ly#g==wg9=03TxV{RW#>X5CdhJN;~Ff{ynhVywyi3^CMF z!y8y}k`fCD0aY$WABPMPtP880% zt@((?@C%tyK{4CoJUKgi&{e@e;U>dX$$v+p@zbFdLd8?l{@*Fd+nYe27XIdxr#~7e zLQ!6k zXf(@3r7#o1y0Vj*IR0s4x&LIL8YDvWVVZH#E%SmN1@=kP;D*Yr)>uH^$$4+Wf{Hzs z9UI#`7xJYpooJSiw!5DK%-`>)Byy1Z?e9hFFYI@SXB$C0mOT@9{djx)IsxiJ&gFzx zmkWY+3>Ad~Y@?Jvx;2@c+V6Gq@lEkjZuYa~b^Tv*!$U10MWG}SKLPL> z`Y9?R@)qo=2X7}0lqVgar{EYs6ho`Knzd+e>FH5GDEjcLw_mJ;{*`3Z>IM*;mfF}g zEq5Rki^x^eM9329om)w5M<-R;e?fjFSa^0d@qW%FGRFGbTr)#0ZRE&%ZI{y1`T?$* zb=5}#hp*RTCDvoGvD3zsK#IUTBF>VYyDK1Rz8MQlr27L)sDma%6$Dd%q@DYJdWxyI+JLeEayla{S#z%H`i6{ZKGr}Rz1i{r+LKQcUiGWnvK z>hITUk!1E4OokW8U*UbVSnzzWqrsvfO&Jf74S(U-`*z-47HF3H?gTxJfD8R2AtRDK zmXS+CeGH@RCz^Gi>tEA=2R(g4GZB$Vf2w6=$u8TUvth4o+VdQ-DsVSZPESsZPoaZh zd0IX^b$QH5#oj`AbvI#jJVDCR*vQ-5>rw};3T8coshzPS$NP3LC@zN{_EmIkR%pBy zoAWL#&o3<&muhpZx;s7x;ed#u4)oH1c~yHE(fb)XS#$l1j3>FHI~V-7Z1LDxaA2Ad zZm1Gf_Ij7i(G{f&%@iz-(lMZu*5PsoXv-Yl6-jM-u=u{A(P}nWnk=DAyQkT3;>b!q zw=dY*~<4ZzcNkl;`X!K?=jH5wS+Bz zu)NCc93OayxAj(Z2Cf^=v1%OWV{5K1Sq)pN!#FD1|I-4vox>iQUo5+GxE(fiITdC+ zIs}1>ZuN1khGcXgihxFUY*^^pMT~K%Re)H!5>QhOnpDPVY8>|8y#%B+_|Ef@h1Jg3 z)Wwa7?8BK^+)`VItc-$_1o|r{nk!83lG+QjTWZu%KUPI!V;}XvAQy&btW*=#hS?v) znm%1g#d%;#ul#|I5osOMp-kMEnv=2{Fv=^YG;DKq>`G%lXa9`zH7uKOx9tG|H}Iua z^+&`0bp84Dna`H1$ok7-R)`0py7?AJ#Ow8D#wp;Z~!><=SYdo>~TNDsRE68YoSb$qVO6G+B^Kh zOdJB<(&5vmTqOhg%lCPw6*XF0s@Re#EO0DU5WXd1r85JOW=Tb9 zN2cKqy#g=t$2(x(H&l+*S*=W4J$SWeHQ-+Q0;1jJQBxbdQgAX z+SSuJxwpL`Tg`k9xYchyl?+uymKPPKvTFNYBB6=;FY-aG3xFWV3YJz>c7nbeD(;U) zOfI-U$btx>sCN@XktGgxP(bZys&cw%I=(76e^(chYz|W;jtr8u{Z=QYx3TSkysyU2 z4}{=f{Yfq=*|n|ZHPyYudp-du9~V^#z;K+V`be2AW`poF-V89)03uFg+|+e|6Tmw3 z9rBLmy|(L+*^z6kNbaXmdl@5)9j9FD4*>c#(sF@NLp zjgb*2gcxeEaS$*FS{!qn;RH@-Z;nVG%87eKD zE3AxgK-BnCy{;~6C=~{9Ql2|{?Sj(*9-xR2=k3V)`S}s<$htZ^vpc8(Dr*$1U2b=S zIWDKkb!-Ipp>svHBA^w`-*aRrW#`sMg1JMn0N;|?5~shv-|hbP9KOA<>wRkDHHKB$ z4FT7k^l;<3(ZGYUSQweqJH+rtr036{D{{bM8{O^q~4S_W)WxJ~ z6YhR~f96NzI_Vn*-BQMmdtRycuvxxMjzdoKq}6N1m*@(hJeM!Fca&{;UtC|`G%zFx ziwbB8Wh5RW3`a)F=Y4!z`9u0|D+L4N*&1fBJLe~`Tk_4q{Vgfjk(|{*Qm8>`Y`Vb= z_7bVC4`Bhu@q5zbC3j5K*-?}KYf;cw!ygh66W8v$@3{;c*Un*OMSRQ5x{|NGW6ii1 z**PV|r|tCV8JMN}-Pox+H_^JmD<{7!(6le9vv$UNum5s)F9VC4?-CxOU>w|@FTi&w z9xgMKGROwhu8{xh4}ULFwj5-mHD6B3>!|vs+B`78hg+UZ4^(;M^RkCy#+z(`>jLVX zx%)6ch1$#l9`(;8QBUAyh;OxJ;T)7@w%uQrwR>=_dnmj&yAa=^hUkT zj+WhYG|h2;K8fEI2W&HJko}jBd<{1D7V>usYQ$<;vb+&gkys>cJsb3|XDA8HmoObwoekwFEESwnmNv=sH0IBbF$j7!m2nT2Qgf8oo&tN@1rvOG9fr>^Z_B0AZ%8<^xV4V)X7MLM;#xZiJ2V!3=KJ8 z=kbv|WjbgW=>Q;Uf|EnxvnueJ$qrdN-FAP+yQ+H@zaJ}*53&;P4NU`GrxY6QN6ufm zX1+j}`PEOcImpc8E|?HenMat^eAHtzTkqiQVf$-IsaKP8J&TS=yEG;d(jVz082B?z z4OE}zF67hgihDl7bh%7fSeKogLcJ>Gql?MB9t}Xa2uc31IPXm%+L1P93M^tNgo9!0 z;Pe9!c9%7Vnu&*&L;8g*i3EMb3ZbK|NiW#*hON%n3W#EP|H0H6ki5kH%#};T68xxepk8FCTv|&iIt^OCYs{C1Yoro0btD@Q1UZKZayQ!0NidhvD8K1$QuiAt z_x*gGAXovJ7kZ#lJRw*#R{JnSa5Kx3$n6B_j&%d;%Mltu zPHZ!`oJg%A$v?m4XFD|f1#Z}IJJNYsmhoUV;i+p=?8?nGF5}85wD78PjJG0vO8gcv zZpPplK5cIBWDQz}_l!rTP@CH28rPmqql}B>Qv^oJppnm?tO!oFVpfPQZJfH^Fz!uD zjrM8&Y^VH@k_6aX_N;BC>{I_RTAjkc_csBRiCoovsX0I1z97$(kdWR~33@0A7_5OQ@{y0CYAZP_X_et$BytqdPJM27lZ>pRc z(x24QA-dO!1wQ=#GTXGh3K3x~7^u0yq<=Xn<~UTnXML^z zDmOr43R>gIA2oyfuEJ6cDlGN%0>Q##mr+l-nzLEbP!ZuSEyc0*5?TK#5QnHE2 zGzdU@(l*?Jflw&c7NMtWV>6*+uAP*7t0TU=KTmz}$L8*8)Ap(_zMIe@#p?W=ec|Kl zPh&1oZzwIZr1UnH#9S0!EU6Yfw^<{I)?{a;k=c*0|FJs)F)%fGKUS@o-rxOtb5Wi{ zIz>iEsiNrX{a2Mty~9DK+6q@eU^Y?inW^V9Ja(`0s zU7bdTC;YnKKj={{FRbBYW+i7_Cdt|!|2^H}HF@w@NG8Sb*74qthqWiIy=$!xEEc%CshZQ8a>R4Rbacy8c~BVR+CrAJ z(3BWde>S|Ey$^pO2%vntX0TX_KIhZWZb^-0Cj&!$IS;;xwuH^T_xZnI73kLzbi2R~zi6=ug*z?!y=KH{WTX z5y$NuY9#XxIxkNWg}K)lSGBCP=NNmqR%2E(YEhkxs!NXLKG}Q*sI^wZ55&Z~Vt#4Ff;Q=Sao zY^lA?dS=$~L4ll%r}$;=(^rO7xjk=V z0g8lxAZ^gyA*FP8gCN}?-Jk+e0xFG!G>DXROG`_4cX!u2`~3dzr*mB#*gpGNYt76Z zGpoWrH15s$Z=VJO!sD$^zF9W-=Px=(-kG0xPj*Batp*DgISrKxRpJvj){y#gpN>4B zWw&_+8v%WpMy-@JIsZ)hhI`H%SNfBeD8v-21n#J$dYNIeiNktx*fokdqB!SWmy-3r z&*yu5H2F5^E?ec&WopOIE6FS_S_+A|L}We(n{;zUC{U|y)hWz zpzYG<`l{E8?l;l#hE3sZZpaYVt6Y(=q0;-_Cqacx&|FT)$=!zj$YEi2sp0emFM1Mh zwJ}wv08wW{y*i(Z-FeEjL)L<7&g~zTOvQ86Hw%CFIX2^uP{_KHzcSRF6bLF+*~ofb z?hO%X@NT0ZcA=7!#jMUG%8W@Ucamf1fQ!>Nhp^e6LPtx)F4Dob=iaE z?lUmJ$MLzb!n6rCxX^?ZswCJ7%&coX49#?_a~3O~OW1u^qG*l3YJV`;n3oB48NZE1 zfSu`c4M}7$E{^@|NBL`g!dJ!y23RQn)-L+aVTOI4+}7SNg|OI(IT0On@8uc{CQbBL z-+KcHaWfqp99y)&tHvNekj*6Ho1Y;GpO~U(S@C;k*KncW_K&8~ugER>h(OM>DU5`{ zp!;>D25b_C-`UV{9N2oSrsWJG^LJbfRM=NwkQ8LTe8Dg-vopmES+r@hsf5pwDz`5= z7kS!*senyl;|M+HuUbVL_%V@P_*!p@cBazBr_cY_=Z9hRLtgdKmKtO*n?^`Jkz((4 zf%E(3zh-b!n~M#2pr}Bjw}nDtUX=wdQPwK6Y9G<_>7Hu}I1qRZZxyS6Llf~@v!!xP zHscPc0rDTZFFHZgrla0jM=sjFK_U|MYZF_5OD{e1Cj0COM=doXKOp&OS>z(r# z(zDXB9zF4cF2KtUj%{QDyqAw@r;bYhe)&G8Ma37YmNqwhCXJ+GF~+X9;+MdO_Dac- zzD+HkjXN4U`A6@ar!9$TxSo-N(DXrm3bgil3(r`CVy&+}_)d3gEs=I`5es^0$0n4#mv+u~0=&hQB zifG1$L}~TFdKbZ&13_7+Ez~hdDVB#xpf=qnSQMA1dw53*!kajBVE^(1E@s?GrPw?8VIPLXUNWE+~XAPllsR3VGZ5{?x^VUS(@K)duH=wD;~f7(5kz{+{_^urv1w7Ul&-6l^LP z28R8FnabF5o5waP4}Z|T-)Ob(Wx3Y(_!_Wxw)d`U!JCL@yPTyh_HgmCc0}#CN_D&U zytKVNuA6t1d5#Lk%j&uQx&#lV2G8i%Cj_!$7KAW`%q?kQb>ZHVx|vewTBw*2lgpn; zWs=jIt|fc_0I-p$M5bC(_n=5BTfc;wY09w0=frPV$SMyghG}sUy6LI)%|mj8Y_2_r zAORe}zU-^>@;E=)>4i(ALnfzu5Xat?M@G#1-#w&iPtQ|4AzYFr!ePU%ZS=d_yv^I1 zXsEm{Uj@&7gnE;ylyet*2>S(>nH z#jJ>;i8A~g8j)smiNe6CMD{D&TggJ`Il!+Bsi2@r%{qHI4+UJ#;zC!0umrR+=<>&$ z|7FjY6cJERbQSw3(o%z|E%p-E;)Ch-t)%Y|iGC3tk(ia1kX;ejmUy7i@$zD)rk+~v zN~AW~;Q!^5cOSlFBh`s@y%ZA8dC9T+OCzkX7#}Jt+$;lK`2K#O#W)m}s&xN6{as1h z*JZ+5r(g>3LrtYGcm=39{@-ZJA~|xP#};2ykWN z78MH%TiVQ|`FZ2-(zH8rEw#lh9+6DKDrQggR~VI2O@@fuO6=?!I~tMz5vpWn_6}Ap zm_wXz$yJ|g-5W@lSe75ayjXS%(;ic5p2VQy=ed_mTPGjtwCgys;>E-bHS?#I%>cSi z{*IV#^+zhXeGp^rzV~o@Q@;_gBMOjry-P*~lTn0^CqTVL{Hmlo&&hwOBt3x%71l^} zw+|=1nQHNjFgc*IbjbJYz^7<GPILLe%G6QRk+kq=XS4!p9@USL;El7aidr*g3Z;Zka2}Q4_CgOuKD_;Q zrQm?O$QcK)0)*UmTknv#dUgJC%i#MF3OW0^scFYqxAqsN70kZvP0L3=YkYH%-8$%D5x(T>1PfE zAiP#JEI_WqxXzfE{4Mo;8M*CescmRlITh9LWmEgn_gmQmM~S!Fg#%Zd_$fXdo!Cg| zYt{7kvs(<$RKJ#x>T#h7QuhrCx=*K`-#k5?M9S}ug%fe(Z1i6k;Q!mt0AW--`pj;n z|FP8Y&#)+8e)Ay7gz>_A?*ZLzEKHkCe6$`g!6tpud?`StSfu#^;H;#y6eVNoTXki) zn8s6^eHl?NF~5C+?(Wnw-9+MKLaA?BeDWiIxDNeRE*BPnyZVwHx!UbSJ8=IoumthO z6!cr1zD)>r8*pE2(;2UY8aAA{*ntdkLoj?Zc!0@zQXx2=_VoM985R@K>=)UB7uOnH z>wP%8L>Gc3@slAlr7vA(Pvd=1dFq9W5)O9G;P3lD0wheVBOth|>d#kc*ZjrZ4gj!)|DW`v)U1HtLr)ZZJjD1z*9jX6>WC)q3INqiUl5~zN%|Wgo7~0&EEUF zAQ3Ui9<}vx_C>j==)iZkL&5;E*nlZEP)^ZNjzJzWwk&^*E6X4olgL8qb!IsQ@Ij(R%Yf#`YcnF7d=T$qSYj_Iz1TSu>ZGJO$7(AK#Ml*PXZjA7tpOSJx)VwO~a zO4*z%kv8<3W1rHireoo)h0Ug8v%~)0o__l`iW0b-=hhwMLh}L2H9-bHS8MZ9ErV@8 zeQ+&=g2poNC{^Oie~4rE&(qs#wol(4AzwK_c-RkJKbTIpJ?xFs*4&#*9izW;WFNUB zXU9Jq*}xs`To$`rykh5Fb5d{nS4Q>)Aje@(Hkt{$CBwXj06FjAU$$mS4j>M2wKtoN zzD}*MCG07~jrlI-`g)CnBl3^oq$|fQuY9?+M^+Y)<3*B zH;mF^Pq`=w;yM0aW?x7}yNSnZ+Qs3zJ+-H&SURPRT$EukGTT&dvcTp1Jd4w6Y4B6t zxz2a*Q7-L-7}^i4##)RT2*-pDQh8H2pAtOk0bUFJ?#GNclVvK=?q3WJ+Il+o5Z?vGA)UL$JD z)!3UFc-0M_k$k#)be;e+Nft?mrVr(coH> z#wgH{NX|ja9s*kzVwDxll52c)Q1x*G3|^oHR>$m|0y8C|Qz>TR{xZ6(4Dv{ja?xgO z;w^Ry^tlh}LA5wx9nE*?#8H4`u%lhJE@Y+Pc=i?Y8yM(|N;;>TSonG3(pNR;t*k-k z_{&iqtL+l_0R06A%Y6qrx?hPTc8B{gnT@9H6cBl1&l_?EmiyAYynG!otqp7!l>RK` zO-=G>aUprNy5<1X&}RcxI#b>xY@&kRk;Wc8Dl#TOkzGzTfpjB* z74GO6O8&h>brq5HNeq;H%z=92E?Gu->kl>>aF?UFVchqoeGq5JwUJ z<&MGJjBx1+d`Vak7_Bnq?%XyV^eRmaT1hf|y3tTDs361$XqIBZz^3gt)pBYkc+*j| zMx7^;#Z|{E+fi@p_$$!q#4oa1m(!Dcc|(c$lh62`k>W?h_2`w~Il#DEb~*E~$NC^^ zCc&KNnwY%|7^3+O-y5=n^P8P|tMpDlxC^JsjdMlq2q&N7&;#OrVX;p@F^v6x%<#)5xDxO!ci7a?Rd zNF8Iky=NpI6>iIXFu$YH<`2|7^1v`0@W+_8*!K>0Gz|8C#O9d(Ari{9|F}RUhx$#q zN!^4hP_N24q7hlGKm;h}>(^$r!VpF1%!pWEu@n!`VR}2t%{wZ6E(?0^dO{CQ8H`8uOf+B5C~Crd3Y3=6%3bmgQjf z>yzy%9;bEO)4O+8q=FbitOxJoL>qpeg`HKtH3{EgA=FX!I$J+kY)k|u%Ur$2tnKBLA!XA?HJrNWXL`W{wceS>1rwlgDJsuM%D|Sl<)*yz* z?`ff8KAfNAyy07*w{oP*P^F{=!m1&$Q@zsZ8vBPffo_WAop_9O;j>m-Jd3*dqsI=s z2}HN3ptfm+fh(V`-2a&@w#i#}KX2~*@~emazsDdyyY{w5WV=T%#CA6d>h~{p>JRqX z$cT6;p6N=s+hgYh-G@n+`UW<8gQLF=*}oa20rC2$VTx_%^t0nPgVZADcg=&1tZjyx z1Z$&}Zf}p9_o#B%*d)S2UgxM7Ph>N}zFr(P`vrE51M)mI>C4)8gPNM$?(S(SwHjlR zP_z5!jo9}#G|~0$u?{FnhK01WwiDnKuiRYl(!BguvPizEGX)JB?%Q^V{~a6z6S%i3 z5Be-h$=gIyL_;jlz8CZchc1o9SfCSs0UM?n^SJn>+vV z992zC^i7Tq1xp@8#MruJ&Tt!+oQX~&GsH&~a6^WQ^}NP{m=>$ee?#_L2)Be@Q}%_f zVO+r}EuX&G`e4r8yLXwvb+SH`D;`S7fxxM~MhAyGl{K~~isi!XQu23zPm+pziEhqj zMW10wz~~1*TUa@Xx8G@1CAC2_F|c_Z>iovlc9Oj-kF2!6+Iyeb1HXs-aPs~)L#Mp& z0DsWkAEs`&2;SM19?6`<;49FR%w^?R2#vNKIJzLv;N=@fSsgFBvl)B!V(xW}gtW{* z!JSRzj~IF~Y)0h}sK%Es)$Vw=4v!T^eN)R_n(d!CbLo?r-|@fm zR^L#%DT!?VIM9vg18qJw%^9vVNCq!#ip@K?bS_S%lb)`g zq;*T$R5Gw8L@aN(9%V?YUV5Upu3?&CMa{3iW~OWQ>g??@YYp9UfpI!k>r>!Of>A}M zpQV1umXXcy51@Xh6pL5jJNu$-to|L%NkKqrdskz%lxV!?KcXF-V`j*Op?A_iS^A(KA2dfPeha) zALi`chXnZ#aDvoOyB?;`d#I+cb6C`fHcJ$twbI9R#oRRx0S&7pX*c0+s4}L+fAdtR8X>fe+Ei z4?BieK$H{(emosfes2x0ri$#GDJU2}Nnbd> zGgx@g{|%df+cHkz_&lgdXT1Cl3Z|_^$;LkHcfckLDi6^Z$3vUg3`=(mU~$?QYF42Wx9wjd$FPjklV=wLs7 zlqX)E%=eK94q6__n0TmraNe$GXV!AtDnTcErc@B>j-Kt&KS}r4B!a>-CYon{*UCHF zkRlU3*Wy$2JGt;CZyRQ89d1d?ybAalm(($~MKsD`O9-*l@yz+B0Lb=l?vhLeSO&4G zSPUcMusMJ)U{;NGh2PKK;cdGP=k+0aS&t3-mf%A)OQKZU8FJ2f=dj$-L{F(73Vh>O zdvm58&DkDZn?u1B)pOi#_*qu9lX1ddjBHa!A8CeAd#xoG0$zX5>g}`L zcJwW}eL_mk$BgR1r4B8Pa4c_9oMSL>3v`!q_P`_@0@Z=z&esqeFd_dXO7Tmq{j(OTeP(r~ zL|nsnxOZ`*(-`Fyphz8ssFe&XI<7WS{1UjkK`mjOJ<+39F2F;wb9v5P?1lm~a9^RV zi?3U+&2)QF)CNdNoIZMY_q1qrYq))GuDEvUE*O1LFinouhrmV>XgX}Vemfx~YEV$1 z%$db|CiLym>0D&)%w27Km#d=Q=pA-$F3w9l!P0`J7JhBm)*WRHZ_=j8`u3X=)bC-Q z874L?q_0YNuPelCP+>eGApC48*_2$s0rM*tvV+dsculVU_EaiUE9PN96(w$L@RQ8c z{`0Y|wWASAGq4qgf9Tx=NuYpYzO zDeNwDK?boKw8e_wCs?^+KJpfx9~(3PkvI4(lL2r(m`l|>kLs^pxoi(hUMaMhG!om= zF;#eqa_ZPUb3W7r4iRbC-#q504lG;2W$?f@;hX@zaQkOb+&bS zBN&>ay*-!YIzr3Na5nCuV_>t)gvx1fe%Inx5>%S=Oh-B03miPXtLDv<)OkY+++#h2 z2S2Ml;pm=$l_*@2rtL8_?h@C>A%bV$`mTuz?d+S&MGZj+Yk>S!4EIZ-B;5TWH1O%H zgGrE;m9^sC-0f0}A=*L~(YcM$q8Ck{&<&3_$4V@Qf-wa>%R8pO=dPZgNavus(G*qDkSLo!jIgMmh)8tE z`tnXtNH5l#Tkh< z@Q+Lr90~dg3}oU(bGtqENFkQ(VpwHy<9ywA?65HS==n#+xwFxqZs4M*#!Oze-<3s5 z+9|MoCi3B+sPO9$n4YZYRVhz3G>CZ76-8Q_oB!4O_@t(#p&4I-g8gN65ba+DOhlC}3L*ZuB8GV4A}Eb@SmqgM8Lb-R;>Kw~*hV1z?1+;pHeF z_9_scs%e2>#LU8#)^;d)?uOhceg?u-^`9dRD0TV7^ov9=Z?(4fBiaq=%r8iLu05z#iH--tNLd< z!r}Otw}8ILIQAg9_0V4abH7(@VhEATX_}Ej(yL_|kIe2!((uxf z^0Y1dWF(+jUBmXm>jU7JIG%ey1pmbg3eF@$y5ZZi6Tcq8KYEwsrA6U9FO(rb!|6!j zHf1!2c)jJ^ff{9OSsSM zD)53KD&pps$%Jjh?uGYV-W9oL9M3S`5Bi;&#xLl9aJ=Xu`|sx_R}uz;wJ3%yOKx>( zF_=h_MPoO0?{)|C)Z~KQ955?>@33i^;6Xs?$x6+f&uvQGl$Vf$JUJm6f&Ve&--uY&*1Z3(NlH z({QtA!I^;dhn##qpyQ|H`cTUq8}(C2&XDz&t%;bvv-pX1BsB?Cef-snEKmSepV`k z%*I?^L`lJz1nS{c@H?cF-&NONfXE%|k(QR3w^3jYQtZ!ONR@JfWI8Px@auHdI&hBFXy48Oz<`=WDz1Mzxa$0sgf5y(Zxiup_{|OG6o%)SVUe8u#p#x?Xr_B(47SoLf()(c9;*+Odk92i) zHz&%&F-TrA8~w%Fo~&$w3t%2NVko+HMA9^Uyoa_roJU0Fb@&KmA9k>M@Rbj`PIW+j zt?8^Ml+jYU4InfLnIGA}c;%It7OiwWarTaH0vQOA8EFFKz$8Tb3TBPHmj{0586AJ< z&`jv;qG0YmznTp`se}7;SAX!L#uQ~xWofKyO|7Ud^xyz*Vzl^ksv^4MEcN#q+vgK{ zi<&Fap>fCg#%+ep1YZ9yY`41BC&IqwIxlQ#4?R`O9Wvpa=~pXvvlLpa6>8r}3CLaR zCYumZ3RI5*Hi=p&^1(flDXF3*Z)(sA5n-q?P)cLGzb}uRR+EDv2@4Oe6FS+b%K0U(k{AEosF&W z*9VynD+W(d@EBIQmba|V*3{*0iWBE7uY1(qZ=Y`ATP1UTA4;ni&h1;(_OJP8$=2?p zjfLc-Htc&q>UFtm-S-4V9`hpx=?icol`+ccfU56cY1uJcaL~vj(B9sD-^a(N%6Y4! zIS3~xG}O5DGXb^3nfD{nPN154zdRF;74#9X9xu`V^Zpj1lwHAg-@c2a``D2od3y@= zImNRu167ONVR60H1sJ*mT?0OuMGM>0&$hQo61=4T&4t`QUxDOW=={!oZ_!~+GatKX zt{k=A{wr(gLES-1qnP?1*`VIamOxV;J?!T@*xpCj6h9wd?)cV#iSuWvTg=bAm%mNs zSBQ@;zAq3(51hd<=pPP?Jk?ObStxSiw8~l1t>U9x#v>P-da*3`L~gnTjZZL7?fcdR zDrs#&&V9Sz&r)48{^}VB;j-XKJ$^GOeGpxYSr%?B0LC=iLJpQC22--CikMcNGpiP| zy>m2l^bGLm`}zA9fj>V+Hx@atB6Y)SQ*F5~Qw&^(Nbk5--OEl((v1vuM1CmH+A+k{ zW18h>kcP$7iaW~DlWZ22lnVpHt(ihRG;I^LAlTF5wa<~nk}$Ij!p+0@wUMSXolqe@vSVS|B7Y@Yi6crRE$v=ib(wOT>ZD$!R?<5>Cn=%i}q0?p@a`fUPi*DQbfEY&-M?EI{jC*Q||s6nd(+bOWby;sEpd) zjyUyrWR$Vu`IW1_i{2^k2e63}^4@MP3&ZQ9=tws+SvWp|&wE{~QN9yTdn&x`wysHZ z@P5v6zV`hJkJ4W$hpCK8z$^kZ@1NBn^oLf*wfHh!4A}tSA%$_^h<%ItdS#PM)iM+Q zw)Z0e&v8YMgfxp;%I$ad15(e0eeHI4Qyufp!}skw`I(*tvx2`#* zc19LfVw_m;OSI4;xWo+i0ma)4)`qdc+b#}thFLFyvvbu85Egj@L&LG(#%Q*+N5j&+ z&lqUpghQOl2h^=6lx{AGtY_wz2HX3+_%Av;UL+F zEF7l;wJ???qI$_0P35zT1ttwnPx@qQjbM+;{__PzF547V7Es#`&UF7MgNHqgUK{;Y z%W~+$upuZAr;2YDSM3GAk(M~*3>>wnI}3)!#Sze{{rWR75JA9Zf`yOY1}taM?{CTV zqKW?g{&hEw1-iA|-j@}gQzBnEb8UHlmY4;YR!WhJirZThlbHwIAM_|-YWca!C{Pti z?W^wJ;ip&I{Hty&dpHvZdKHX}P2R0^6u?^ycClf^ykf?E)16T`Q}3^8P}bGsm8F zKOt4Y{A8tll0)_NYq%?LZG(ndHN3+;>U$~0?t8%vN}rakKL;?f%8u!jmSem=tNzJz z5k-U@ESMWM8;#oiXU#s7OPw)p8+qjtGBQS;(G2Xi(|kJ-4_@tlT`=PyEFolYDR{H; zr!hk$U@8+pYE9pL9f5nTiT2E$BzFZ5ToKdI${i7wHqXdX$bg2`X z;3+6K%e-4kE)40$lU2_0GE1)JaS+DmdHFh{=FBCKlOEa6vmdyli=`;A+8+etPqW(} z#RjpH{=+GddMqI?x3oJSHOc3IAYcdGc1V0RKsN>Fdn*EKRvCbX=tX@NIW&I{=CJO9 z%saci-7{KbXx^844`|4iv9Zd3D|k$qR%5>}LFND-jxU%Ti{H%&f$$=dAFP_+Q%OR^ zQ|)!7WSxK8#kNN*0Bt@Ls}8B;+g`thftUr2pUKt#EF>X!Se5ntOK(h^h zB0t3#zHG($_Q{+&<2#cJ_v0&mA|wRkuT_^PXu8-jWfgzUU|N>9@-O%v^1*1ehyKQX zHc24#hi=cv<&U|$Ymf257}_?)`ud|@9#_|av+v2-;~l#$>))r4-);13h85_wexNu& zQurxDt(^7t#11R-<@Gvehm8Ga!!L}q@Uq|2_Wq>Z|n`%Yqb+F6$!NtI}0Y$t2? zsAI5OPR;go5Z;89vhDfF;Wx-_M2Ft>68KrX@=uzQ?*ur2Iht{6I@wYOe3~9BH=$yA zJ*8#VdS zAoo_rFtofsV`DDv(0yw@SdRkqoDlTXl+JiijmTC!XaWFHSXyd)x;qD7HWIqqW*3wl zC+blXx2LKXc6NFj{n5~>T&UXQ2UfzY-(@c)1%;FL?$xD z2Ljt+GjR$&IsTBOqn1b=9J4UH4;Qa2e7lg9g)M6#`(_U$pUGP9sK^QpP@mr&?y>~l zSru@@o1*INIO1Cv)A6Izp=1%)XYPb1%jIG0ESzcFtOjiAOcE4R zysxNs39NhkG0GQ*)R=a;!pC39SSY-`cy0IWL}cng!(<2H%6x{yWfD#*wPKe>i5i+h zj@5lFu6*lFPefzYv*tTgatS}IC(2@DV+$=u7=Nnf9#0zPq1KCuxL5`CK8usrL znW+&4$pxCOMk}in%3hNI1JQlN7pm1XlFb2ZhS*}Lg3|l~G`h96wmtZii{1~(_Qo^%pXk5Ki%HR*e6;=1^S2pVqi8uT z13So1oLK5lt^?th54x`zxLw`&I%ANKm>ewK-T8eKy?~!PTpvP%c0K3qNy)SQrDPBK zJ^S>G48?kHuaCaIJa+SU>*|f^mjV|%13}q*na+{yh?y!R#qo)^0KGwBA3bIt` zG;5S2qnPxP&JI=va@CmxZY~{`dy{Q9BmI#6G`8Z=@dglUGX=&_G0=RSi0N6T}`XE|VA-YowmDx;5jpV=C+ueN&Lop&5-&DP7Ww16De>GWq zn_$BU&wbf5thUTSDNCD*Mouu`V#K!4<$C74&3NXQ^axp9%Xr^hWgER=RBiIwLAG)h z22fI2vfnuo;{=R5qx(uNM{W~nid;E4-k-I)!z$E0dkx3TtFIJ4l>YWE^Dm3vU zQpd1$Gv+O>(8N{;DAe7J9q{Kh|I0tYu}uM(aOKoHz)qsD0}(KKo?;*HvV5`tJZ=0` zwF`beQpD{4_X4c8TIntiyT)s;RPP-6sFoh*(I}=_)EVS$ZtY{7W3C-uJ?XELXe@OM z%4&6sWa_MFnvnb)+|V#UqK&_TA3xGG`2h3aqvgu%#ztq6VVl`R83Vuj;oJY?l>W5h z-?QyW;7Q=N`v`59O4ZKRv}(W1;N8RK{3?%ltAWoc1q71VOnb1%n966+_Vj)mv^)U& zx;&hiG1#+7NlEtv#0{zBhehtDG3%Riw79g?Qs;HC`Y+{<@TZ4!J&C+BvCJudRvz7J8XqSFhHHH|PrBS@3KbO< zE~xDvHAftm=Va-(z9E9B-&Y2W!uRm^N~IR6$2&86)c)r04e&VG9qAK_{Fy(s$|bpK zYv6Z`%E`%{UR(@h$`ZkVr@w)ZUi>p-_h*f9wTq%tH(BnA)2Vp(K`~8?06-pm;l&7q zX$C(3vl$RL4+&WPpjB&bXD2))L^`EMy|*DzEG&uoffL@d2QjzJHWx)nJj#U<+s4P^ zVT1J5dbpVMBuL&|U$u92(GT*p91yXaVR>B~8^eeF(kN;iT|<-~@FlqaYn9d~SI(5t zA1%~px0#IP_c)$iTs%}-&Rd!7xkxg$WBMadD^x?jy{zVVKrcNd#3&=e~QASZEQ7K*pTgS{;aw9 zVlhHqquQCFp`n4@Vo*X$99I!GSqiX^T?sr9M;jvuP>)z|2xI^Is1I6s%W!G*V)=}B zntFjQy z#nMtFBPd~?f@x{*E)fyY<>jS>ynL3&$+nS^5%h{9o)sx6UrvSo&FA2ZkpExDG8-Ei z{exW>O|RJqIV28vD5;Fg`;^TaKjac$ZqC#r?$mhcy!m&$)G{>jo2$^!Pt`b1YeFbM zX2B}O`G3#+(UbMR4hd@VwY4?Jjp0zIjbRq|L$l?c#1NqrW@jy-z}t)e3m(*Zb$p26 z(eUvetOg%q^B!)FA>ekQ-GUg<3h33jb$|sMl@sK zVly!@9dAvDPgOY)uo|Jlv;byjsrmarwq)s4+9Tf8zkdq<_hBgicUTnveEaqd5iPE- zuVv!dgOa_^+YzqU2XPS*IPkL)^MOx6I8>|#&8X~zp+y7On6R&YL$jrll2ZB&Vm~VS z{gvbUZ?*4HGctxjPJD!g^#_IqbU;`@7@_7Vipt7NAXXqoFvPK$V#BDZRM8M-4h{}y zi`&_6syPwob@4Caa$MbQvqxfIvB^5YLIKFXZ|!ERdgnBL`{u(!!S~veFsv8PoIh$v zV!aUAtQDupP|YiI^ftes%gF1)6SbDjIF@@Z@(arVtw@Mm$Y{q2B24F!)EANY2gxf7 zb9?W4&pdnFk|Qd%y^?!ZuHyETbIx@}>mv>tcqQd)!ZXV-mWM3+4-Uu_M!wXSHz)8* zHkbLhx~|}@$H&KNOw6&lA-4`YN4e2_e2V&5h&=1wYplDkv1ldi?O8!hSUOxAfN1m) za>37+NG<-EL9;Vk{W(g>Kx?oRAS@XczI0X3sc&#Vo>Sws&E=PWPJ)DPO=6@(gRF>m zc%~#kM`utic&#TZL>ZNung5|Glk#3A&#Cw2)~{129@Ou_yPIpTmoD2~ zz!_*(+EaiOrsO2?@o9FT$aDSc%d^qZQDztrQd3)tNIwvGeqwATQhk-sfZsLkO)`d{ zA|fWHlqQC*8bDFA@LMUO0=EkD{lv<1=AyNGe|*395o=wc#J{ZitXqMHA1?Rw>lgPO zMkIf?N~`Zz6s$Rs0(w_66AgxnBZJRDAi3ip9rdPnVo~TXBjeqj)S>r^gBHVDLvyH& z+LDIrv2{pK2v&2sasT)1KWR8Qqr*se?Jth4Ic+8-pwCx@6({o& zsdnB%efSWXKXixmYY${(WI*YAK}t#rB2R2#A+w^pr~-|!1KqjL2_4Y0>}ES@quv*n zG)AlhHd7bMGr1uK)j5fGkT4Bwz9dEqm7dG)60#JHde03dVve`xklgfVE4!!$JV&)< z;Gk-=;O0?l8gEn?Dnf<*@bRe%4+{&6pY(+6Wx8Evfk(S9Z zQk3ymyU_6vm{**kztWZ$D6Y+ZO7@-?zxN&A+tSp$3`3Ij3J--?rkP}77jg6Yq#y#> zs>r+~cY`h`n`A**u<4DN(<~aB8W`!zM&#EPIsKHkfDtDc+jH05X5`rSkBeu(GZYgd-933zad@4luEfN_;Coz_LP zv%xMRipq3FS9OA>4mYi;IyEopz72#YjupqTd2rI{R-X`v zk+@*CY-oR-n+$%aUQ?DkQrOkLCMbULbj#bbXsf~JO6ba2K*ax^vWmu!lAYO(nX}d8 zSBIZva3^5TPONx}j#g3pHDYZ!E_9MMpC1VMTAwc@`{qs2)!%btKU>{g$H_Kp@ov2| z356^w%3bXnXNuOko$%z{t5fZlwKr>>1rF)9MalR7Op6NkH)r7sU)!>bpCh*%lO5e? zuQ`SH*5O0!$bR~qhvC0Ht{?_89a5xY7;*9Ne*4^bLlu{?{^joL?AwU?1(A->KFhMbkApXC6!5U1KO1LlNwz#rfXwd&wX?rR|GA7B< z(}xiDK(n~sbE_@v)bN1lHCIyMwt?)3=^aio9#yuHcl()q#4 z?Q(~`KQ@F;hswdd&8=Z9Xd`L1Ca=Ba@J8fG_1h+GM~in+V08`k3KVnl9VX{}ihiz% z=zEj8r1jvGxH7`uzhIy@*d!wgF5Nkwiy3tE*mvvwEw=T0TI)4dlJNh}BptJZIm*E~ zGp}F2E-~v9YI$`3qe4_gMFkcqsT?3L0zOw}>1cZW%hNrk9cq<$*opg4TOmSr-OaTJ z;2u~EKUzk6b?_jtj1F1;{X3&ZC-&g`IL}+nTD;=wn5|-8RavS8?Q)-OZ$r8X{B0(* zHcEznEbO=cWIFRRl08^dGveN^4N%C{Btq)S9r|%WHM4iJ_)Le_8#S}z0`Ku6PKhI_ z#X4clNtPg|(;|h81&3w#EZNOsZ=bi{O}Q8ikM&N!#@Zzu$ghqocoQq$Obcue+^JXCLv3u)&>4;S#kwS0U3kQ$VIYDziLI?II|s*obo9qTklZ?2dV0d3wwauoieuLQ zsPSb7eYXW)?_aU0y!YjvI8Y9%Bw-3IZ+Nh8zskK%iu#g!2HW^RsAe8X3*lQtbgS#oXLc+ppeQ79}nVEimegH+% z|2r^^U0q$M7#Nv8H`iV993hdBxPZ*V#~s%D9z<=}U6}(f@PDopC?8avG=v@>Rb&=L z8In8}-(Pnm3uWx-?ogSwF?#sQ+5M-1w*4dtl1*~REDrv{3Mm^E|6^e^H}h|~;+R-f z=a~7)e5n#zg8BLk4D|G>pOKfNk&Gw>=~XXQigMoIzi*!4(p}^J_%t$9LJsl_yyco^ zh;?f-^@4C&Ej+YBby+qWHELt-z{rRx5DcG*dA>o>jmSQLn)(I@Rh-Or$l2J~h70t< z%gec-t}zC%mU>;HARy0hS7Rv0l23|=-$8brFkXTj+ej(LU}E7O4#Urp<{Z`b5d7QO zmgaZfJu|}36(9@~kq!McT?d=j?^kx<#ELwPSaQs#PnpAMSQ$*_X9n?A?yuG&<{=$E z{J*$8ml*l=YXg*<=lyGkFz1U5uzwm{cn4*AGPpTvlaL_DQ<<5YBk*Qk9s?R4{ZkH( zDBx@yot#uj#(CR8*Mcm(-_13%J0LH(2kp&Jzok52Eh<{+@s}WN*2%Kbwp9%{7=NPK zZc4QJqIGNU2GViw9UgnnL4XW@+8lE?bq~VvGMWyOi%07pMg@r$6 zOWR(c?mS>Il|gI>KbL+8jBy;6c!I!iI7|RIVRKwnfChr54|h;R479NPq@I11KuF&X z%U$|#>Jb%1dOEpkz){aR;KjB63^bq&z@l+P_1EubL6SHX((|$_-;&iS0{@NCK@unp z=L>Wfx0f84tdkStAHL#xB2MWpxAd3J3%| zxuN0VIX(SDuyK!&q&`L6iG`Bv0Kg;mqeriyBnu3|X~$Ak0N4k>1W{vJjWT0nW9v>! zVkPz0tURT+9Hcy+Cs30ok!?JpP8StLGRnxv=C0~UGd23y_Qg&jBv|91-7KG!SHi{w z8L2XkV1I;QE%}v;;=9*(mf~~nORu$ z;Z^rqI{+@hE|-sGzJ-Gm(eXeQYx+KG+-G1Fo_>`;It9j{Z)nK4KV1@uUh}sfrr;$O zR0Oy0++ks3Q#P)Eqa;iz8}A3(4qjtJQh@h}C8_nS04jI@FA8HlD}yj1F&i)`b0LWk98)n8#YUu#ss+VI;0bohQ%P+1X#IBD3K5c*0MgK3Pqap?i5F~IK0{gzl@n*yDa5Ut9fwM&%K0ZDu)NwI=@4j~3($Uc|8^}b@Rm(E~ z%>?+sN7~xji2ZKo=m;xV1kO)KN5|d6Lr*uVxlom-{(l!QN8IJSlpV$&5{JflIk7uA z{^MY7ttw*+lbtl5tVFR!fx*!|ulwql1;Z^EL+xy)NYSm`>{YZh0)t9PoV45xEVOG% zVpV8GNB(>nXW%Q$QQnD~XXxroa~D>J^e8U7(9SJpuK4VeN%WxeNpoXLrc(098Xb$u z($<`e7=!QepOEvX&gVzd?~S{kuM!d(8d%CRln+;&5Ckj#KepaFDyuK*76uU%5Kxc? zNkO{1lB&53=q`Nysx}`g$ySux)n{PkA@4n-XJI4DL!vUV>Ip^%P_FQw#HFwdq z5s$jlXxpH-@{`CPL$l!YD0uY-7c7khxhyeToT{75av8e8A`V`TOiA&hZF4I-^Ma27 zl4Jr35-IaT3$i6GO@e{WEF#A%!QJ&zuIg6=L4s0R?BG!3u1$m7i{*@GtiMb4TAe=>`fr_s@Ieu)t%svgcdC;MFIh;OQs`B&mM-<_E9B}nU1 zOk0~WWi!LmTYBsBaY=KsNuuJ&NQgnwR}6+l#5nCE&xmb!0!POtaoU@F7AAdr;?H5b zeY%yPO=<4R9js0k1M%1Zw0_X9U%x2Z(O!RpA|xjE2Kz-!OiTwrB)k6=BQ86lIr0!~ z53rNwuv+yJl~s{b^J9$HE}O0Rz&~}_><9yZ-_}dJ-Y65Rl0ADn?pFn=c6~zurGT?x zwOW?M(@Ht7F9$?#y?@ISc-#^J`N1kkHgtGAG*O~f4h{zAn|);roZp;WkCqxSg9QIv zo*bX8fMo_;5z!!sK(Q3;PgV&P6QS$Za-#RUmKK*Ctb{{W(5u|l3SxOh3B`*vy| zLNLJ1l`&9)hX4x)C$D<@*cF%^(b9W$w2%OrlB0l7pFvLsKG%aP(ianl-u7F8ury!k*kkGGgB_XkoLX?h z3}6Y;^C4xxDO{cP1{~n}@_zsRYRsG`Nc>Gezz57Kt{)y2x4(s5J7NHQCfIPUS^ucpoh61-{0RD4V|2t6p0CPo3P;1M6(`QWj zvwS%KR-TQG4TQDb?29QQ;sh@e^Zy{1cR0*}5W3K0@(Th2f+GGjmw=R17!oe)4;`I< zKyfB2F8&`O^442+D`>Nd@NGNpdgDn8V9wPUUWrRdAt53nwmjdPqKlBdwT!4V9EpyM zJZ*QGh$tv17|WL<0TMO=b@llJ1UYc0epOXfr#Cl-U~h-iLx5C|j)}?I|GSJ0U=^qx zgX`+pfgI|p9k%2@BFTv(?2d{JX0)vUPLn&Oh#e1t77{WuiNG7m(v2c1Q0xbuzYMIb z%GQvjH(O~KF>VT$z9@A*_;kRa0ya_vY_}j$b}lY{N7q3B+fU^G!|ZU{?Y4nX?Q)$q z9j`W3X}MiT1YCUwXjp@AFNAOi;4>*DHtvtup{OOp6!C%q9Ha-Y6N63*jKmvS+1vj- zoGmXz7JI);30PF6M@{fhz>e3{)MNyRXi(&V?rT0h4uZoItVyAn{KgH^Nt`F8X7C`S{kVpqtFT{(w0zaKP041{uP$6)aEfo0RTN@k|^#N3`khX%|$+9MCEu(YMmoF> zomo{^rj$<#U7lu9!Vmr}?pz-k12aynBtrbQy>gP}s|*TrwIaS)T%6h$l1ACWefrnm zz6nTqm*?W_qbiBWSI^DOeZav9&d4AD^fe*ilm8>giffa7d|*tciy`a+po!A~eE**{ z<1H$k@U2C$KPvXMQUFntFk8h$T}+Iay(SEx>YAkMIjpvbJG;NSBo}6V01$SN`nNER zRVfP2de+c@=uazJmW;~2yKMBp&Z^VxF~yiwv;1#Vi01EDy57~CGcqxeu(Kz6Jl!*x zPE!NQ8v@Ktbd`VPKn{i=@O)$d<-`vF6eUyHWhy{M5{bC)-;BY%~;cIV7u#43Vv%@{Hh>S!Wh;p zu#cEm#O>Mx{r>(Q#GDUUQ;<*q8AIsXw_hNRiiXm-5$;3JNj6-BuT>lnn!wVZHT+ziO!b7da1URul_O zDus;fSu6`dyfskXZ0di^SW8MzY%A%t0@S0B@VHMfLN))-yu9d-xLJ^e<gzaAtWSPjlu6388AwfnaB#r~d)=mMYVk z@V2(EhetjR>;o8ACW}ROu>6{cP%T$}IMf;_nvTZGydWx>wYt-K z1K4e}dK4mfzKKK!_@@It0-41)#9(v4iw}ke3TX<&6vvXX4kn5!sz#1Gx6FFH)7XKF z^We#`KWqE+oX+iw(d%ZE(_do(O*!bA{RJm{18+S~$ zwxy87S&wupXug?$DprM*q)3($L!_7x*h_Q+unh>g^|iHcva;_%yB6S%l1rd_vwFO> z=Dpt~NoKQ@20a-_+W{^T9i1qKLS|rCT4L?yL(50>&5LL46sxoScVBh1Xa~sN5q{>4 z#|cZB7+t_Ds0wT7FAL_ux*&ag40a*?^U(p}g-5@(dPhyuqw$|B48t{PcB9k%TL+RmArrd>gkm zEESdNA?|@*q!0(Hw_eSc%*-6Alz(~y?x`Av(pGzIpDN^|$Hpf*3UkrS0|GyUDoLd) z`ezpUIE7pS@B^~-XrrSgHxJJonDZztErr~AR!PZFcNnhmbTKxla5uq=ZWc&i{~s*? z7EbVrekQw=IX)nq`y|EMd;5jUc>pA&I4hr>G3|`?d?lx#BE6Qq6hTf&jfD>*kZZv$ zyX|YO_Qa(3D$l`8ACP{=qnR2x=|$A6FQ?cd&uYVeGZMx?c?)@u{MbR8cCh^e!(e!r zaGh5B^OFbhoU!po^pCnGh5`zF1UPmUWk#+Rj_<)>N~Xsf6q6b&1mYDn1mKvo8 zfsJ;4m5o01^lUjxt|YP7T_QrZ20OgeChVg&vIO3q+R`4<$Xa^-I!4B&lh?;xUxtt1 zbJcxZnD>@*I&H;5g4rwe2+k79qFv~I6~{6ooZB!|RAqnLxvsl97n0I+jvcJ$`PZ4= zDmdT#kWJ1TyQ1f+fph0H(c7b8J;I0@^H!r8nnqKT(^Ii&dSN5u8`{dSqUo-Dsq5z^ zSjO?qkbHPczp%9rg58U2JFn;(^sij}VYEMw< z_2Sp7OQVRK)#f@=c?Xix9!_z%Qqxx?VF;;M!h9rDqM~kPDL80oqAOgF`E8gOtq-P< zaCbO^-4jsowpYiMoMiA|zB-8GyttiL>$dvWUP{eoRWfqwXFEDMMmu3Y z%RWPPKVY>^E3EUwLb!5y3Hr<6fMLfZomi7|3d)NHY}*oILS5C%d*y1A(LbJaIUA|V z4Iv#x&zUDT+PpX4?>OpjWgdjj)HSp-+8^D55r%Dv=zS&rWrlv|mMPetZa`_3ebm%c zKuc1jZ0+H?mLFIOWAGdjnVEg-&V<(fGAAhQ0yp$g6Qz>~P z;xwRdSj72ijf1wc`)toY?9y~BGzfnou5d)e^hr!|wuTM5ZiIlQ0u>?9 z0jRcm@BlET0`|2uVCY2;%uOK*f`|x|EDct(orPLJ#g~ws4uHKC@e0d4|^_XgQUcm^8TOmz_ zqQxEnJcd3;PsFul%DI!h)FL3!&;%nO1oEN|&!e=io)#i8pUl8;VH$p)cZIT|S`U?9 zxy>)yG|=arcT}d}aHk?hg-SsFBdScYY-E9Rd00%;DW- z5=)lkS&^v;o8!*{-;ubq zs5#t8fIX>w^L*1~D%`e#o4paHx=RC~-0EzPiA~9*H~4?NO}HVD$FWELT~O5VB7G>* z^EG`ioX0cBymH&-LZ1}Za8UO$S{vlYJc~xw)Fo`u%e9RvyhW8UJ|+%7-`#^mj#)jW zg>tYgWJ|z^Iwp7lzypG6+Y9y%-8Pl2H@TxHFu1iR^&9Bu_EV`jZn8hn30r^W&7ADp zQz44!bF#d{8LzO#+L(&p<7v<>&KuaOb7juo?D`|U6il?+{*$wAGN)SMNStSeE02&M zf4*;zNWlLTC|DGlUF?+t&c)rxq;>#trhZLucHWJ?df!I^>PGcz{BYL{o!}!f*0|Q& zg_?|3VkU|x3j)uJ^KQ%ftY=SKgAs#5HVfM53Ck~-n3(My9eeZDRFL6%U<4ORpzSuD zd1`_|t=b0k5x1*8C|0(%S=Jl7L1>AgQ=4a18d8GwG5^rRZ9ZtL@$w=-`t)V23%*(9{rN+}5 zNqf2}?zOm3k-Y?o*Y3|^f{#F-*0e|FkExj;QY~5k&cx_f2^q{ta`QAEbq0?i-AT~8 zxml6C_X+sf0%w}1e(S;Oms(}_%3yH){_HJs*>k|Y6wdfw2RVWAfumVKas!Usn+Vy& z8`8dnwB+vQqM7nyHJFS2DJ>UX79{&Fmi&UEjooSA=?*w3tE({{1rs_s0kBHV02lVh zz*eQZOf57$hyfD@-m`3@mgip~@;N!ahw5+v5{Zl{^D z0DqUQXUBTZLoDq#W}i7T&qeGIltuvPDA1^-g>d$b2qV{4CtRu460EuhmoEEWd~Q|g zUheKuP^i8Mpnpul%>2>O(UD9tu?hSz4aN5w9|g`<`4k0&>+Y;ef`5%NjPA^j#pvhdW*X|g<7O#r)V$lbK`e}m zjqS`9i&3r6heyPu%?AA~Q26A(4#PW~ySVEqJ91+?CZ``=7M+VAoW{4D28&*-9ffwr6u^jHr#G1mqfY#*Rq+H`(}&W&Rn2O6zoJp$+p zQszZEd$;-q>Y($Szz3gZ(qn}mj(J)Ire-|=eD+W*w(S2;nyqa5!jH6fsB+a zUt4lx%a@|b_aRqtI!ibP!{}fy2G217^hR`#WzjB7SYq!NTdm`&-rW4PxA`~PuD7pLuE-_Q z<$8EVY=i45YqR}QJ4Fn| zIbQCNe=jL1=>r~~B#`_hf+tHs@gHl*!orYV@CP)^1Av-|$!L@iQq(*@KY&ghsbcgk z6(+}Dn-8Goa;r07o5E#NIBfjuNlwLwqm{ei{0={2=M;WqNqeLc&ByaGYw=7NA6Zyk zQFI2SwbSr?z);IE<=j39kvM&+HZFvyk)ft$L+|B>qy98bwuiG339_4BeRYNkw2{{3rJ zMniY;jrX~aOo;0$31q7J#0y1&Xlo%Pi0XPw%(94sW>5GID&aakhd1x%5Dr-O(&FFU z>UHBqNiQGPs638mQyvfT=HKCZf!}t`vm8vM?RNA8E!Jp6rG|4lsd|KIZOJ_Er(m`k z(&?>z$v96%bwr7bAhn#VM*EObV{73^UlPz~x~mD#;|;W8AY8sUx_R$IIh| z_>ux4Jb*OKWV-lnUN^E&A1|n7IOu?_uYaJYyYp?C%W&N+4|a|f9ZT3mt%WW4u{lV6 zI3#&$<+M>2~;Mt7->yJ1z;zoHsgm> zmwyPu3nr%rSsTcolqxPR3V!|!a(r?r`{?N4(L$V?caj#KOZLe=E`}wF!I9nR4gtHq zezK+_J5NI)^eQ&S4)pvG%dJ7FLPSIp*5u7^qNX<5Q36lq;_|uMisLwayxvq@U3OtU zI`#2YxUhE5SCJHDdlQN)vEe~t#%*L}VSi>1liXkc3)YvMN(|(Tg{zGh{u0C3j ziRjNMgDQ9V^9UBhX^f!Ch+=IQHirNvZ!_~&BQg=Hp>)kYrHa5oU9ngpZFgfm!$KL9I#$#TB zaK}?p2ngUF(EPXqeQwn%qYot-wI0rSD(`D7C|og~ziKHPl)HB;*9>yo37xqdRh54Z zhr?yCgthR1VXr%Pl(tYC$-gr-ny;?a^#q!)=Vz-gH$|5R$w89#(G1~3#Y^|lB`EfOS z9t0Do<=JwCDr(YE0(ulEi|<%@1m>4Az57x=J^UUuLG_ zCVkg4Xr@@IjrdGZTNGMptj9{5Lp&5+mUad>oWdSg{dQ2b_}tyyA&1=dMvZ_5l;+kU-%^QICA_5thO1O5c?D6;UxH4h{);>OFik@C7Yo1fX1nIl@I zvlkm5#CtgE?O~vn_42-a&V?>E;b0X{-2cYb3og4{hX#=$T?n@7SW>A41K z0LWgaXVeRN!lTnOc@9OG1-LJ1}uM90K^uyBa{?(bn6WFF)c+PLTv15^rN)WDZC_Mya|MZx%G z0#7c7snCM({08NgpJhw>{Jw?uL$o!MM! z7y%D91;{I%zO6Im_oob9Cs1B3HLT8luv@_z6y3WA;y1(p7DKrZJ>dvR*b?LW7 z=!>*v))nL-HudO^q+0a5!3|h>(hnh z=z&2y3)PG%wC*n$DUVc}?~B^Q&o5Rz+}3843d~3S1}-)KCVJ2;8eN< z6?u@vp~BP*pH~nUz@GuZBmvNk+F^0Q&RDqj4GinvJgNZ!cAsY@ug;}+b+89A(jwO7 z#)&EiXQrVUKgc`|LoeyGA%?THyDz&BLruCn#}7l?%<1Y)eVyH<-)ABd(u4pG0=56R z{rS6hgC)JvQpz|>%?|UbrFhFSY4PcHAcO!uNL_6B^|x9=mB;?0_ECsp=_4_&fd(-tA9+e${)KOehInre$?)vM8|DR*bXOhw3~15ojH z0cZ(KTVQEvX(6!kha=s<0qczUM#;LUc%Jo6`fHk`o}i%v(V#}3oxq?;OO*z-1(}eL z1}HDvh5nSe{lx~D9Un&BiQb9bIOupoeo$Dwdu>bX<-9w%sM>TU8~yNe^^e- zB>nT3Y0>SEuMSS0CnQK)blCjijX}FL8K1_^Tqt88&v*>ul2vfwqLS}QGF4kGcF(R>mn3Asa z+){6caZ6#W~{W7dHjwGfi}jtuVy{& zsaMRGhnt9=2HY%5S3JFLWLhKR(>mJXhxXQ{U*qcOeKOELf0|Ac>{`h>Eznm6p$RJ_ zRAp6U$vzoNXcKBZPp*J{iP{o?A;mj<`oGHGWH|Mh5_&LjU55R!uONzrn%`(c2U zs^#^J@tznE?%&xe&J9HK3>3sA=)OLm;E#V1RyJW=K5?hkR@{#|^L*-gvA1ZyTTRR6 zsS7BNa}y2?3d1kC=W*7qDcJC?7ogd`vgoKj{fj!XRI&25%8rsY({M{QKsYVf(<2C( zuQRUW0f;9ff~ac#loENi%>vv2Oq6VZh!IrcnM1!BHVv|4KH~b6$I;;TNGUa>R~T)x zePw89&a9=AYPdFd_Cm&cy0PT;5ASKHIqz+2_=Imf+F~jefV%t@NVmYmrkg7dhg>N?+1+z*_AHB}-lN1$5EL`(5%? zL!O)2oAwE$Ef(8L6n2be>v{Gga)(&G@OAJn$R-mm;Rw^fZq?+tg4tivGDK+J; zqbQQZ`pi-p(0I`F;6@%kk(f#Vd_n@a>Q|W5IS-g%Uv~|~liWRL!P9K~fN41*GNb%B zpa4(=mYSNf_&6t^2rnKs5ecT6XIWUTaKp7n0X<!dz@1qZeSpF7=RH9T34}*$X99@Rl5xDn}l%G<>SSz(Bl_VCRF) z9s(bCjD&KGsME?qQDr1v7M1mBVnaT`b?CWUsO>3GGIJe854nF zej=Y>eZ*7#xQh14plM!oAUPi6^K#=~sxdP@cMR~o_*+tQ>-dIDrdKU&5HyI1&-gr| z`_eK(Ki>UTaLv&Jj8o6tLizXRgs*n|uda^bMC5853kJT71Z(Eoe}~`teU6BD2hNF_ zBt^Y;^u2J6ufDtKZp<2ay*q5U%jMh$;| zCdZNKZlvCH!(a0B`pt@3TM}{-qyRXnIMfOBqw{ZflF%b-Xj;a(D$c1W z%kaGiPP|ehy}s%+^EM3nY~`5549B;Ruj@6J;EeWnqiyJ}>&>+SF1~R=8Sz+E5i>lg zc7(Tle>>N8Wc#5Cn;Fg=MgVa1VltAe`7bL~*xWVz`w0t^r6NqN(0DJFNQN-AYfQ%X zR0ImL&5%){A3xe+DHM!mO7Ve$ZFV8#pA`B(K>L9aWCq|b1`#Jd-yhIHNCZNBm^;bg z%g(~e+3IEqz1&AM!NV@5*R;<`1DOxEhTVcTe&@aG7(jPzg}BOZJDliCGQf)?TiU%wsrURd%GM-q+_Jmk;lKZ zJf@Siq%o8j^fXh=%(56l12XIr2^__~Wg&cZB-3b5dy{ZS zTo8*NwF5?X#DgTExD?P9O=n1%_bCl6R^%wr&Whu!O@Y|C?&9xd791$b=NylT)p2kn zoyedM^!0>%OdLS*4#<9|@AwjYCbhrw1JewLp-(jlTIFx3ikiV$QL3a}Bh}?#O;q)N zC$x|dB!$}Gazd9_yfHrRj}H`WFI%C$W^!lcy_sjJ2VG1qu|6=1P&NR{4$|zLvF?Hz z$a2cqwrCTV{@&^bG?>IuvJ>*pTe|;Roc)YMaN5Yi{7%{}ZU4(H)xl&;Q&s<=w0_ImZBn zH$>tF(Z-3NxQdmZR|v66Uk$j2%BSsEgg$cr#H)TN1QejCj3m_R$XM4(C1VW1tH|fm zzCVL=dj)tab-}j37QoCw4m};_#Xd_E1_rUaREPqv48X%s*K`>&v-dgT59hX!sO#k+@f+m{0slKqDd0V*igGEMITeXI^Qh)*( zGLj~C?XC9k@j`d9jIg!chPaT8dXH?nohLW{y&4z*v%cDe28sw+#T#W3ABk(Q&-Inv#zT=BWoiR!05mLgbha!0Z4?Flp+$+3ujQ ze#BYktBZe@-IF_)&Mk=(h8)d5F{>AsjHY+`jEKMn;^M&*W=|kxI8e)X_Ui>9M=)|` zb`4Jh^Q+u9=%I%z>Uh%NR$_vy1B{v4|Iq@(103vk2V2JKqE5W_xcp=I7(sFV@Ng{n zm+i9(zb*U`d1(8~ge8=boWI=eNdM5#7C3Hh>zJ-78R*dDYvcO#7aQzD-k^L^1m}^H z#eBFjDwi_UIItnu@lfY1n48NNBL#J$LeK*=s(i+zk97awW*Ll*Ss1&3nGK_mJViLh zx$jyfOPDq(*%^_upPx~F(bx}{Gf%1;d4;VVk6a9U<5CJHHWYVrb0OkN#ziMW?WTh{ zze}xv-Pd=$3EDTW%b7PL$6F4$?{tIVjr$1#YtuP9Rx(BE@GxZnF2(SN+vHYvVkrB4 z$yXbEeXBm)vz&9Cr;WZq7bV);JM@`~jsOPVyybXn;J9*U&hDJ~A0{1Bv6KXv8%h!d8CvW%i=tfNkDYPN3hv~*9c0+*`C4+Ir3~88s&fX z;-#aC@e|;@0D5#lM@^bfKm)FP{L>=In)*jnF>DcczWY$p7>wp@oxP8nTd#iT8*A!% z&rSCU?Lm8)*0uoFe5WngFBJ^irL{grJb6BKdnH|1Umd9&10xOuwbQ}&urdLCeNvz_ z{Rz~>kCziNM$OwR1+|($@dTNFb-%y-2S!7hU2oR+)f?1g+C!dzF;ptIb2JL?trvLb z3T4YhG^f-}*y@+q;$RRD4^dSpOL{2|Bd3HiRlV;`g+4dS{X#GQGya!tv)7>CLOTta z6e<10^K~`DHh-xJNts3ak+3NLuNPX%bS9W6i=pjm%`(i({h@oYY5UT_RU0^ajp8Lg zzA`HRX$EX}PROpURS94e2Nz5qQ$-du(~vwTYUpYj1#}}}o%MW|JPPqtvpZPAcV|)v zH7}NR&3)ee&GACQu#H@;UdMCOv^`bC;Id~4X{B^Z!~r{*^ywyN+fvO&<>Tk}qO+^3pWyM2;EUa|f73HH9RP|~YzAFHzpz}I=>Csa zwn*BUrKZfdR$eMl1eX*=iOP$OfPqXYuorRU)J9ynJ#F_WcAJ*4otHVq|9Goak|2{5O@ll zjyvO_s1EsKqk)NZf1#^nI=-*)B-JjwcvKJ&$=C@9oQrULK8~ zBGkC1+UE?LKT$my&=&CxM70^|LeqLe=mCi}z@kK^>O*7O#|-2yEqgw5`pqUFWluRe5%z=6_4 zPE|EfkxbwyX~mw3*vI^yN* z?fwin_vgLW&AjWW&JN~GvCQzGuAJ=K`}uj_k$ZXL>>J;>167Bw&SzCUeM^6KLgn3? zde@7lCp-qvv+1ZZMN{225l( zc6U{EdP+BrgTgaESgX*!uRq|+frUjxJf}M5rs1x)eP#xcl9^e1GQg$y-X2NHgxO+v zXrhO`(GpjSgzTBm^#)%>>8Z#1(SFYLyVWm33v2H{Tuh5m>LD8Sq~u(q_<7lK$r9=v zI>`a?(OA5kc-J*y*hO5C^&ggzcMm>ILbRhHZ#FO7xLvq8Z_*!lPF(B1(`8)nWSpSg z$ke51+m*~Ung43P+(<0AaOEWLS?uT?e9U|(c}a(LxRBgs%Jx)csohRpt`9mj$3@Ug z^=K>n!&CM!L{7tyep=FMO>@}Ggqhb-`+V(~?&-3VqP|1-I{{CjDPsYQwLnG;Kwf18 zjw3+Q4UBMDTUsKtn-o5#H8yeq2N#I_6ws!FVKq<|?S^~_@dkq19$?Jj1neZ8z$E3} z%FA<}9cd=)l6&5t8b|$ub&k}El@%|9De>W(WMGp$e6sRyu33Ync5i=g zV)nsa>)}>n$Q=1fbC1KA%yaNTk;w=9ce%>*j+b~&`%VhM+}YC1;lrM%ubHaaD!V^f z?kqdIaSg#`&h!*p+wDm_(Tjmf9<+{U@l#zaMsJWE?t}HOr+$?ZG2nP}x^PJ0 z`&_PU+i06IEaXn%`yL^3-46Q$IIGMvjaMb#J>|-?M7C>TF%|Ec_6ghcL42{^D#-`> zo2v2`nFM%04eHFgrAC{uB9U;NAG+;ZJ|b_Qj%4<$Ood43?%wSGR7izQelgK3B5MVu z>uQ@0@5#eHQ<#piuH;j{L+UeCG*XWM_POJMcaQU^LW6{li=TS-==5S}3g{4$e^&Tc zv6{dwg3E%;YVsT$wlNZu$AY-bL+7rk*}zgbG9QWN_OU8mdl_HuH+_0sNg8%5ii?#c zm%V@Xr5&}_>foRFYLoYNm+Ynj()77{59A)w%hIBRT;vH2*INiv-kbj<4|}Q=%2`?d z4faOvX6svYcdr>;xVh_SOc(zW&spz{du!3vg;@|$S^kT6qtPKdI_~omCf(1Qdc=|M zsam);ue_$KO+MB=rG+0QHd`aE94|^V_g|CdV2=~^f<#b$cM{>ewg#_HFz|Q8WA^M*U+H6q+im^94%v%d>AIqk*Z#k~I&WUD}52f4`_i z!KFQ`eO#C(Y8`bytW~PJ+6~DJD%f^H(b6^tM}ua9;`kjFz1P`BkL_w3G(<^QU44B& zm0&+SJdDR~8In1b3^uxy2Rw*w8T?4Bq;V)6~P zc;zzhVo05+{3Xx3Fe&bSm3>@ zIqU-`67x&TlS)WSht#ocT>beHF{b1mrvs;wi+ph!v#`1H;0B7ht6L;Wcq}c)12L>6_Z$=P@QFC z9l)f~TDu=O;-hdjf}_P$!~+G%aCsV^*{)b}#o*=9`bEVI5800zr)8#u$&&ghDyYaK zr(!<&+Wuva4l5&B*u{X%!LzoiK$+JQGe=IZEI``~u z#Y51s>v2#`N{oz2k^}W~v|+sf@+?=Tf;WocjY0 z^huM_?^X9Zi;v-~CPRIZOtr)XS}ktBZkB!2Mo`p?^>;QIXJ1@PmqWI9Ave;ck}&v)V*v~OG>04ucWbkneDDaZ-t@<o7UJB^MeVFI z7%NoHHK?(%E;!i}>iKX|Zo9|k#@E2-;xC+kgx4!729AqJt`X=Nz3>@X`bSV`qG(8NIa53 zeEvwm8J;M*)c`AFP?~WE-8%IAKBZp?oN`D;!T}GXhvx5g^~6Vx)tezzyYwPa8W2Fg zlto4sKTRsMlJ!l5&VTr!zIUXW9%29xMIr(@20R3VW*y9w0_MjgjHNE3WeXG)i?srRrz9NstVwJU3z==` zCR_Y-4HS)MH-jE_clXhan$OsguQC=}mnY}_3#a{x7_hKXR%chwXyp9fe+E8%5)5dj z62frQ-&St-nk|>KuG;%bBAG}L)tah{G*WJ!RRz@fU&W(S{W#dXhl6qhluG5!fIK{_iaSdf z6msyqdzX%W?V70OrN4^)_BykB6562kzjz zu)3H?QL|u&c`feX>H>JvW&D3sD~}O#xG$SUd^Pf2({P$`$WdqTv94x^d#B^E z5H|OuMRMEzS($*hyCG7xFuw9L6?p&`{74PuT{K&O6$@qZDq=$ZI#&|7WGoZgj+Q*? zm{^~$J1FC&oNcL+md=-UAFm% zBwb}tAx`5Xg7osZSnuq-c>56S1;4~OR=w8IoZaJi)Z1>}SoZ-y zSe$|B+n-=btEzr{dOcog#Ij1Geu>}R92laXo1?3d!&yJ4CV7$Xmsw!^%e|<+x^s4M zJqqz0ntj^wGZPK$fjDd#nU%#vP+y9`TV&PtxU7HKWSu%jy8a3zj{Pb{V(v0!BH=g8 z$XDORPLjQpa{42ddQ98xOlr?Jd*?HPxE+HBLz#;y=McNIAAYVN>)asuvWjv~X<|VWd83S32V^U!xGSTfvvJmFegkNQW7Gnw`4aXowto0E~z&bYP=Ct2^*kzfBK2ST+_31z1y&3Mc)(Lgxaw04HQ z*_Np$d@5h@V$EaKJ=cAx-hCmc#TtUAtd8y}@%@Xur+G zQ__-CnV{@wkJg>Vca_+~T8AyN&=o5~7@2mJGtvfg>~riuchadT1 z2{s2y7P9sAWykuXlhFs53I(|4IbWDG$S5h2gccmjcNT??$;hG;5?0viymMfY@To4; zGPGd3pRRVTTbAk*02WtG#@aAb2|Cl{TGtwa@1;8}<4a#j-*nr=mp@tQ8J-WGkE1-d z*gIqY(#)zm%nCf z-=UO-kz5uIS_AqaUq*UT*Er9=M+ffEBDt~M{roSMQQEH^%eFh;XlVv7zFMCg(inVv zLP=J(U3V{|jaWJ{FvGj3J=Qm$wCd0J2U9<7B0Pg>K$JbYT(VOk<88ax zQDdvACF0=7{u}qEtYK|hX_olavNiKZFR{gfgvb-~Dy@HRb!$AbJne-;&gx2w*_PFO z?KG|^8yse9q3;`(L{WW2Io?q%_X`##;P&OKfdi{)cevZ3ZC`KydgIbRGwu(=Q~5@U zlz(e$a?4{#VwxIF=H031TT-Ml$%6vJ$&Nh8K<2Twx6wBP56Whc?#IeVf9?)jNIed^ z(CDmOaz>?Pn+b-+Lf3Bik*K81&ZTx#XI`${$Z~k?g6ru#8pb1jgFXl3W*XT7=>G_$ z^c>#^E;s+Hu+ttc%gXXeNWffiyFg+w<#nEVAXl^JiOUb&u`IXMRe1j^W;s`@vGm=I z_Qm2x(D+Z6lzFgg{v19NeJ54jSkn>nt`wXfnRz_n=;jNC(Si4#EYL3r156DMOaO<6 zkM~eM`b8%t1%aU+FgmPry7~{|Kk5#PLFL^y6u0m$95vIR%BH=Lrc|b>zp5u!*=lPbMg5=T5Sz z&ud4SO>;H&nFI+*X<-8*8PSR9!#VX#4DQoOdUW5cU4T8R-QM{v8kwKc!C@VSNrH$FuD}UvcaX&n|yeL9$!Y-k!{o%t+hU=SUjioN{)g zJMy2HwBLK*5V(2eb`fp#tAWU~1id3tNhK#)tz3>Oq6F zSfe(4M$1)dk3C0AjanL!2tg!xqCW;W#Rkg%1V&`yB43#PL`RGD^*Zf)+&46^&*s-# zJ<#F`bQr5u!XdLx=PObU3oRRT-%F9^=1i2;KOoXb=IJkzQ|^r+zI#sa3Bz$;GNf~P zdsOGf7Tj-QTH4b|?|j8MywcJkL$cy@-F_*-kL$N*Xl*nU@tw!xG;_h;&VG;4pdVZZ zrZ{PPV2(#4y+M1cAuO?u_7;{DH75v=3^Owcz*&ItM;nyO^ zgq_^n>;z3#G#CLr&N9zM>f(n~dpd)6-TRx|)O~Pgv0kC^0c<3wMTc2`SU09Tn60VJ{F$V-HIWu$YSe8ho z-JU%1C+=QA;uL7VtS#UC*(H-n4+Uqp08f-G(B*(w^SC}>zBr#T(laqlTxJ~4lo9-Y zY`q0kRcqHhj)j3DN(urZB_JRra43~-=@O6@kS=M9E~Ojk5|Hjt>6Vc0?#@H}*1qrk zzIXip9XI{@ZbCxT5o)aynX4a8eYkns#R7%F2Y2U<@*JL2YE{B*| z|7V|jw)RlXv)hZb`K)0F&#kt;FZcr#af_6^_7UkbWf|;*-R*}D>4|wv8LD0p^XF?j zCuZ0_8*Y9>w2Zgr@X~BUtXJhKzCv_TNJF%Qr%dv=%50fP#W0$){@1UMFq4fqn1b!? z8^~Ay3y#Kt0Tt7xM{OV-`lOy?2=}zed@Ooxq|`KzRO4Rnx968Y&DaLte+ae{N5?C9 zVS!ZOru!ytH5&hUq4?ZgawF_pazqcE zgn#t`>$RXRMU*FwuwhEJuD-O5?+o5TgCF6&C$BNUh+%hqECwV5I%b*`=)MR%u;G~Q zSJ8%i#?OozC6mj`S+*(irrqD3BbLvI8;Dl56W{5DX$r(idW#GGymx^F(s)mJF<7aK zuD!kGZ~4T@3aj?o+`Z++>9;iQ->P=+$A-#0`1}YvuqR<>Vp%o3^XbhOeI?z@;lJ3Q z!AZ{zUeUnZ8bYNFmVuTaP}W7M4^1yEwEzg%+|rT=R@$qhbXYG1vXK!dh&>#4*^$oG zuFbW*kN6;G!`3CF*~YD#NLyXgdQmgXcYQAF^>@lK3oR&qKg6HJdc4bwj25Ig#*LuH z53scgzgYapKx$8sy^wBvd<(pvU}nh+)a0<^io+fZ1~rtlwCSor8U)~RjeGyTe{nGf zY&ze8>RA#?4K6R|2FIs|jEn&=Z16RTPUYu6i%ra`!u#>=O(K02KhOE6AR4156~((m z8s?kTNO1E$b;zw88x9YCDkfme$ru?&oKZHAl;y!R`ZS`z%u6JnQ73mQ>>Mbt$SY?q9)4QAk-#ad4s1)9aIbJ<0--h#CCtI zf+K*0lZJ~+Rjc*|;*btkhAYS4oiKebceHsU+0c}++D>@M@?c#PF_i#9F$(;$d#vi) zn!9m`E9TB4!mb!+ehJC3rH44BrKJTv!3^vF(E@aZhhLHQz-a(OomiN|gRybMa2wfq zz@6_W>^w_vQo}!dxB%IM_25V{`LrNG~+NeF}cXM-dIXhaA z5Jfz}n14kO;EJ|KfE1#qU}M$Eb)m*Ej9V(9*krf2dW-+1Kx zk^o3U91#&`K7<*Wp8f?sgkf-PrNPz$_A%VMcV9rY;p6YGh?37tl+Qc_vKtK8^qap> zjMlhn05l2C^K4)LW4;di1>VQ_cry5<;8+P*U41)TY7qpcE!^DP@Ou{{Mo6z;KZsz` zOajyMS2Zqbu&_K~1AuTe8VZUJu;^ORFRxJuWg>A_@M_2v_z3pScfsluehMx!CTu11 zFkB(gL|=?~lc)~PbMJtF2Jp}WC+AekFyfAAj+w$Pem&reU5=O1C7SM>2SmPny|>;4@$^p%1P};K?Gm#C5T`Rm^DXi#dXd_wsOB z&CIg=Dt-Hak%jF24}pQZ1v=Em*R5{Q!qYwXue&^QHvpzbN))({ay2fF5cv=UC91Kp zFbhXdbFT;3F8u_-fn<@n13nLWt z1oPEWnyTPb9I3#-7Cou3H>=>kkpZxo#kO~J96?cf?Z%C10M6M>|N4VB-6PU$MQdBz zS(sfya+jcZpI%rcoy%}R!mrR_SE~2%;>hX}LG~-WEvEjVE92B`< zu&&yFwWPhPXM@$s-TzGsZGM|{L4EPg?(Qx2kua0~hL%)13}{6xncneOedQFF^!`0K z8oa!oW8D3R1SSu>0;AwkPkl>ZAEDPIjXv1_^Hy}COkyx$#rt#xRdIYg$fcO7>}Su_ zXX7|+<5OWDrLiD$m(VdJbnp!TcrQnIN0Eht!vKyCFsJ$f^wIS6v?};Sf{hV8-ud9< zh(wR{_lJR@(H&Ur4ZirW4H=%YocCTF0Z8^_aN5>f=IQfZSC;%Dw+ybh`?$j0w%YfX z%is9+MlBg%mOem!e}F(tZq&HPUDq4pIsIYx-B+W_O}I5CBBCjaE}5&wYESUso`x#F zV(cxn4XKWMZtvSth*46A<*%{2HF8?}_{aHKm}KUkB97#miTCTZzQY(kat_y`*RzNI zva$EzrGHX6+(Abqe2EFsoMg$tyy>oMg`(y08ybt@-L8?=mTl7OzNQs*!i}o99no0g ziWdH`#z}ZZ!b;s+_u+pp6DuDdADrQ^uh%s-d4adx<{5ee@#Ye@Dh310n~1YMxWOUb zp`Slr{`m3Zlc#RnP6q^DV0KstzL&5G-Gi)?kx~oMgF9?W!doP}4GyZI=!My5sp@vc zPwjz);i@#kGCD9f=-T#pSHt(=yT7fn%sps=bT=c~o;=bGS}{!&g$u$lm;G0NkyAh} zQ&}@uFS*R_3@guke^>D!&YgWaCD8Y)@ekK9%A#{0Gwy@$u|xC&C9p>P|cGxf^v zTEBTWItHbBRwMtQ^DFOzrz9}EFdkU$4cV3E114G zt1$PNjW%)hIWTU&J9x-+;bnR;t-amyrPp$~5AE9coc3MIvp6#Qlg21FvQ?{PKCBJr zpeFgOFfVM>HtcPF@$&8Uq%jP-&#-TJ8Q4WddvkA_7#R#;4;1=~)e;jT;%hGS#}xIv z<>|9Y6zTozGL{a=@QE?oEa`XpVf=SeVIu2;{ox;ra2ir|xjFFLGB2p%$ibP29E`q7 z<<@h9MaH<0<3S4-0}KTUo%Vi5bokN6@H(=B^CIF-56H*ed-pPJQ|_7p@M^o<=L3{v zK6tc4wXFNvTp_+Df$0Iy4*x{Zx~%2o-Pl*rPy#u3vB!^1uvG?~*F+aOmjarW9u|9` zpwKKO+lyBb~v2NFFW0)#$3QwJ2b#j8xM04bYg!9{cmjs`-$A8pD zSG+xI461i7^^0AqrSHqQHOpM`==00Ke-ny6<4j*M9xY>;nws(_;S7YLFPh!>#k+T9 zJ^2!0P_}^jl>oG|kb(tXGSDtiD|_6)N0za}`smN5CQ(y19buN2neR`8Pkg6=h~<=x z>wZhFNqG2*rHZz*&S6jHRpkNAyy0aMLs^|b72GqF#$i_`NqS(RrKO6DIhvGqz|hvs z=x*FZl+YdxV**`>>A|?vxNT?&e{u`d8UqE6?aT8b00ku&S6#j7FZDhNzYX^0bnZBN zPE;VDXwgAx#`B!4!7Rq&r(|_ek^?@0lHJRZ4PSRw=T?LoOgN2B)n!5zG>q-q3nMIW z@JaIeSyuXOld1yjZQ?7fKRL6U&q_hLuTxk;WwgS#<6L4MK(F|t_^+}MMyGc9&oQF4 zn)rs`on`5DktTSN>1M|%`0qE|={9zv(z29hADGbx`M$fg9(ojRq48jsa<=t%)Vl-s!X1wN0^z$Za%fRujFcp*N`sd`~zfGRA&Zi1A5?O?tKI?!iJV$*kQ_jXyH zmQwL?e}&C=7pr`5;<$_Uj_VA)7H*o2ttJLq*~Jg`gP^r5nnX&(Pk%0~%n4PaDPrbj zkP=Oe!vHUYlk|`Nm1b8diqpxgjh2?y6sV<^PCBVN)_`=dHfc(`h+bZvi%2a<=*j(^ zW{vOU$`u31h|xli#UQ#@7Y3{)bHdkH6a+>bY7R#@FZ}qvDO+nCSDO%1GVBEvr;I{u z4Vh!m%2yBc3k@O@Nt+x(*V`WPbF+?XTIMyY9HH`zkjTv2*r2SsjZFa=+*@ zIbh|&V#CcLOb_~;uu#7wdBxmxY>wk@!Rxi~Nh4w#)eR0QKcLiE{w&oe!&%P+4$Pl_20vB zYlrF)f7O{~5_rjf<_adNlkW6)=D@4-J5JH_+$LZ$%5L0)dS=>Iyc;4_pd?35Vc<0& z1&0r@FD>ZSrYQdh)sTX2WfKBI;Jf)o)m(au9zKKtdp4SYEn)w?Skt zQmqHlgT!kioBLA?j-`UFg8uzw_FT+XC*m8)I&1!qgr( z!n6|-|3tLGnM0wJrKkfi0EF+w0cs?dr^Q!dF_CNgUHHE=FeewPm<5He7oqmFft?VU-6rK#}JQOrM=@CXoi@+EU6F5bx4`>UAlw; z)ZdjlpN?AV1Bo-CI3WAMnu%Q#N zN)Rb>@fTaxQ@!vw)x5}Mj5Ot=5GR?9(fUxD{l_Hz2Z{R}P^^F&pe8Lc z-jXvIAJ_A1G>+&VP0kIxM23^o(fzX%M&R{k=k;-2PjAQ#q|{d9GzfDz4S(c)k6D~O z-CtAw7qg_eq<*mZnd!`h`bOHX;a1w&dIXZ4pQ(C!isO1O>QM^j1gfc9H&RoDM8_u% z)i`XB!KjEHG?Z0ajU-*{X#rtZXc`-06i8ZaJL~o3dy8Te%nSlE^S>%LJ?`aZ>|o*e zW8QfPU!h9>OGTCXB{uqOm(;IrxaTHY$S+MpbyWjYy2m}kv|t5|?~mJ`3|M#8&ngDX z_Y|Sh?sAusbIB}5od|qia6Q4s#5`8#tLYAep7se$KbcR|bb$0AipPQJ_;|aJ-XFR< zUV}Cik7i8b5|x%qa!z zmTjg9E*?;qM&}o97;Or75!8=YbRKHkPY-tIbeB6Y#x%`aoOH9X|7suhY%fymIb-ra zc;}8uI!=81nYj$MAEv{c)4OXJsEUf&`PtVhm1N{6_TKQkw6BbOfRq2vpZ?#YY4@Ex zJ%LZ6WT1@83W#AdpL^P2CHVL1c3YF)4gV`o%=_)F3ky|OXFNbFWoV+YZEYj&<3|I) z9VV&sX5%jmD06_Jnul%zPC`kS0s@7VSOZc-nJp@MYLg&+NHz zSzSkC>qSO~l1Buo)ak>m_lx@l$nmmb(emup*eSN8v+ihHzA1rQ5q7+6OROsGeVeto_!A?ifOvBDQQyXZ`$!CT>x zq4%j#O0_U1dF2vI>Cu9PIwhNpi~G>eDQ)+804CPcsX%n;>k~$-;gzLm+H>*!YCD=J zGP3(9PP}OO-0(yG-pzo;^CR8PjS-pMfNWuUnE}&@F#kS%(A&`P(OtlmH0|3DEIT0f zhBt~nwY9aoSN@jo=LJc?dM~>BaAddmXQcXwrS48%N@K%o6SGg6ahQWBWsfRerz(L@D^pG40YT5Z>Tt4s z_t{tbK2?}izLEJ+w0BSX*Vu@nMUOmN%<%<18%fO0{e{)|9H9ySy6*bW=#G)@f{7Rq z)!+>d6nrGb{kuxTRhIxVxo<|V#kkXn%WT(_76;_6$+1UA^>%2+-rF{zP2IV+r3jYL zzkdGw08vtG2CaChGyjzTq3=x31|>Hqs>clGZCfIsy@di40WP4O1|o|MhJc|kxe*l= zg`{dix&(sfLJY-&!me{l9-bKJ(QS4Y<&acBD02X{UDeucHjzEx^#w`ggc~qA!U4k(-55P;n z3$MPZ3GsN(&ws_Y`5sU~#7-V;S9doi5&)~~Gg)J5WRwVmHnh3oeAnKVV2=J}zUK~> z81OJx{)>u9d~@1czH|5PFJR_W%Pc7Xh|z;!MMyB?KmBPCnVZW5afDpl+$?Z)kf#v9 z<@|iQThb4;{Q(<8tXTe)J&OkhIH;A|aw^Ts|cNa8jxrPRQBMJ+|W<17L0FG*!ZdS4KA5azE>tbMFAfVn}0R0w1 zE;eYv2P^Ed;nk}Cr=8K&1%Tq;cBb*uSucpk5$o^zdZAQGh=+Zo_g|1O_49vF-__A_ zT);Le95w>;@|fZA18nEd{{@-WTUuI9LKX}Sq(n^p_F^Cv1=RN*<^Mt?j|>3g6@B-P z3yHr(6rVue0h*2!QZ~SCa&Zxvnwr8Hq*iJk2ermch`4ldbyb59(zCOINAVDgNeGNI zf*<3&sG7~eeVR5^>(;;RyHqXx>evq@|aD zeH9lM|A!Bd!~8ee6Mhega(KKT9_JUtEC5pW4H8UQ(Rvey1SoX15r>4_GN(1-~Z* zv%yM9v6!eqwtslmzJ`UR+CFaBTm18hMXwPD50A=B7JhF$Khmo*KPJ=n9v>Q4|M!Ir zwjqHQf$ITuL1;Bd%|+6N;nIFZfURN^e2ABP2wnx_)y^UD@wxEIU|3EFVFAd3LK9{+QJ!; zcY*k(Er?eD+T-nTkq=;NI3y(5wvUDHfdkvM8nf)3u&{z*yS>+LCL++ul!K@_(onNiIjaW-3H6JZhIxntWS;2EGU%j8epZGD6!;4h6x>-VtBgW0{^aY1c>z30-z{l-IxW!dtYZ zxn-tnWa)cFyu_PVG=C0kQl2mh-3yPo3XI;UM4O?zBBp?*^whhzur+*sWQ7=dtExt( zm0AO@T4p`x3rjo&xsi~@nkpFqOB{(%kr0*dgN)kF47p_0Y9|gPTjD=A;N-sp|jyo=pLY#N|`PqR098pP7Vi*C_rI_*Z zgNn8*K6K(GZ-+i+XT!dPhj1~0WPGhK6^9@(kS-Djoid&FJB)6pM`-GjH>d~WX@c3e4 z-*1N7{SRXrj@-M?okkngKaqb;HPub+CSl)%lL%hIIT5QDW1%xn52~ad3pIG6Ma16H zupFd5Vf_54>c^qfddk8hQbF&#Eznkh?Jy!n0>;NLJDUL-NM)3=4=@!nmX=QdB7W!W zTxdRqN~NSrgRu;ts{LUr*aDIS`0;C7%;ntNLIClbxD07ED}`z`^w=E3GJ_Psj{@f> zg>`FSKm=RU6Qj=i;mq2B;79}+(BB}pd2ndRcz7n;c3 z^#R}qZZ4^>t*wK4H>enxBF9`SWf`)c-*W@X-L@mcgRgJt8dopaLhfd@)7~^Fb)rW= zHjMBJMEovXPEJAm`dV6KNIkGM-2l+0_((o%9ij3A@&zrOVZyhedv##rh(U8Z5-m&Y z7*mj?L)3Ixc{vgbZP*_96V5qRI9Wfm2pVX@qUZWoO-;n|5xi%n5EALJ@MHBNZ)ot@ zU|I^=eJgu=WmNDZ4{MtOpBguOzioT~cob2Ax-uKXQgvvHM5EWuMpyH6^$pLYW@hK` z*DO+DTqFr>Q5zHKF&lf5L?6knu%*LCmV}+ouo5a80QjGn^kp;=q+Ni{0orNCF50??b-JZ3OX!6OfQF!YlV9{+@gP<3ru}1Lbqt31$Ov?_nT?GNxZXX7%B$Eh@?uYKuaCF)B-9T`qC4bHr}NWL zQhtJG&kE`g$aN7`RV9LRr7MgQg^Kp}zJ?fDfgvYWSn`kciP|p63F3BGzb_{zH#;kq zC~{5CKE%8KuHnr5g3RKa50e6Z@Z>`N z-Tfkh<^RzFICDXU86~rGhITE#MfDy5G)(n_gNOO|*2k;i$27RyPPm$a$QPy?`~b%0 zFVcobZ4nZlU|}~lG^C1ku|e!0Y#aryXnP>(6({+jvZ=6L`UnLSRC3HcWpEA+irGH%=8!^@gtWJ)sLEA8YkYEUWu16IEpsLy9$PwE z8izYC@Ll>%yME3NJwCMQ&$Z04O`Jt-_in|PxdZsplK)3FJo|n8lwVB1$s=Q*EvhCV z9bJQfa_J+{^_`*wNf~frcxbfK-iyFF~GDLEr?Yf6guQarj`flYfBdk|sz!Jn6a;QL@ zq)4+ssRaq^p_nkL&)tDCVg8v$N7IIs*SA>J@^!Dk10fQ!#DfH_P)~RWpYiA6EvS{0c1H%Fc`(EA$xD;B0Pb ztv!AwsuE-(t=_#ib~|%CMd`z7A!|<*I{K>I0F$YCFWZy))RpYrvsniJc1HG6?d#li z>*Vt`lqW(4}I)uxAnRli2l{bb`oxRkr%%;C@-x-FH)S%za*JKYpJhp!#?=C?cjU zo}92?p{XIIeqMXY{Edkxph31I2P~*_r_q)*67C-+bin=2xS8C*peH4*qLFH{c{gQv zkT9J8l_SAF)k)#@_$xfs$r?s1frYx()SH?=KB{+&n3#7-$<8Qr9TO4V+i3|5iI#H6 z)0!+DBDY@J)Y?9auUi`I4&Ya)ZA`4mp5FpO_^?3K=51USn@-32#U`mT=NLvTBWr8) zM)X2Ta<-q6u=9NqcEiQ01P*j=L95Y}WlP3)??43;kH}f8 z7JHP}?jnF8uvc@G-i|_Fvg1VoSo?c@W1-OK=-0)KeBb_+&7T`y-GBP$toF~!ypvZ{ z?z+xo%~#)knJZye5qK)Lv*S8x*)4nj)#)X;u`th+TRxU6ai5>|LT>Txe zsdqEB@UzoeB|g!@uMe7+rCdUWbNIH7oR0K%_4@uhUA4HIa+SoU$;y&zEu|uacf6C^@1^ z3G#YYNvx)Ez-#ff*}}89r*ThMIVkC9Zd4B6dVeDRHXMKTN46%p&B(TX3YC?8m$Hoc zgqZc)n;=wp)971cAGIGs{tpOzSaX2Fa_!s>Ua+|TzX zcXK;nb7OsKY5HNIYQOh@tH_Ca{^^|YL8B_Luh&Nk;>QFYKVYPYcVi>u+eL-tsTbCL z^gfSYsx%zt3e3f-OL`@Ys+yC8y%#2ek>NJrOaB-R51dzx|KusNwiTGm?*EnVhDdb8 z;EDF-xZ(aQ#VJHtKZVBTs9DR}#`*@{Yi6=hX?6LF1AxH4jIFWS<^s@LaidRAA+pBi z=sxIEXaK-JCaur|aCPAKi!xZr=OptT=#|^AS0X2L9x!tT6{TEemA?_s22IpoEr zTaSq-W~mX9^6@3*X-|P_=Ar;mAkYErpd*Pw-uMCXVY}lJt9^RGI+9B0VROybS^s zVc=KK_^8WE&D@*`9#H35!nBPVbRZ}dr)Ot(yIn!^p$jMQt4jM@@8SF@fWzknUl6!M zO-)bBzy8?-9OE54ym|mgI;)+JjBd%s^CII4kge)u@@E9}&OksT%g^PfkB`q36eY7T zS@UPt9SyY_!bxFcW6O5@2c7tgHds7>febA*t1N@yf* zl03bMv|DB)%4zD~`m$AmAQ5rvuZ9H+M4;51?rR_i(j_D2pN&7o3rP$3zw{?mNp|mU z^_l$1eTD{IWxxegP<(ytx-?fMT2@*zz}=MN^`V~8mD*~mj5_;LE0*>xbBTH%K_Tth z7l3zOS+`i-L-CC#Bo2-5*K)tr|J;l82LU&ulQoq8ixTv|0vnn<8sT_grWmiUc-du zgc8q_Lb|KZMiTW7vs5IpbJPNXQ2UXqSY9_%QG=$)9n*52HQi=U`}-7TA*pIrou*)X zY+IwyFokMoV8{|9mrOu)lI3$-XXB{WE;H~R8{)~^Z?)N+xDZ^PCrcKpY4dRs%|TRlJ=5jlbhn? zzV~^d!-S69D@_b9_qLww-G|rg?_f#ff4C*52N7^`EONepjvYO&WB*`h2NJN2gZcO8~<+F^vW^zij*uHiz|CP2WnR^;lfDSZ$Horl0uF z+24*tp%{6sOeVtIFD+P7>sutwSj3|)z6Tp3`38mz<#G-om(9r!J$foC1KZIRs?cgH zNk|F0Q3gTB2JP}-(m-hl^B0?#Ws8IXl9+6PWBCIW7Nn4lPI z*hkg?yhKE2W?r6bo>uh@O-D{PHa!SXhVH}Ny&W1&K&o7Qbn%f~IVs2Tf{5Gnm(=XT zr^BBk@r3WfOG=)?T*}+AiZF1~Ma;Jm&EN5@>JESf5y7UFl@)l>LG*Ou@)!F4&8goY z1JVPK8YWrR=dM;YjH0m<)k@uF^$l_M5B#(l^h`CDOWu0DVxr~WHjnDew$K&%U2=52 zZxCIo%}U&mwLU9)JFL}cF(y(j&j5YmxIV7_#=Nn&u7qHZiW96#)8n>o%8?oyEtl+{iqp~a>v8RNb8LM!*CCZ*nb z>@7-kR1fYzAlb8rUndurZp}vQl#$%=R11hMBAL%)FFjc7!YFfMKlEU!AFpia_(A!G zgs8{4b%0qj^W>p<_0^=zoZrbK178e`!Vb$m%9C3iXq^?x%QGI|63$8#-`#6#pczO| zYD>k}Y?Qe$*?fHBX8rUQvCjHSuft|%Uo6Zo?iVPwd$`+8`X1X(3BO|t)v!h}G080D zmzO)TP-<%fLRtiy#nvH2IUB_HHa2>K0PH+FHkNFX0KTvy!dt?T1`kk)xxp0PKGzCgz2?gQL#f?v^<2a4anDC z?c%otZM&2e7V+FED#Gf(<^mSls=+$mz0 z1Err7{J=bOCS@Y0no1*W@V+^HU)+-W#Sv4d-heR+?a!`~AHTAiwznEJedeTtx5sP-g z{yb;Ueq~o~+uDGA6DK=uLUB&{FD0n@w~7_%8|o(eeSO;WS)nD*d-qmXHz`;h+;+er z?Ze57-TZuNvFERt+=w0&|L4P7klKiFS(#CD*} zPV#Y#O_+T%)P9Gf*jgj`VU@vSMn+`KK7amPDw_Sek&%(|ch%=XWPFOG(XHj><-i8L zfQJL=q9L6pRlC+L3p-nWacL>vxR(ZDYu;bD#AP-8x#uF|ltANC4h%dWqh@#662omT z{j}MGT={_>k?=6JtRss!)US;TZYg}yk0T73GAl6UrE?yJWJFguVMuhW+=!m{_^zeC z8mqQMT`R~uv*hBXa(43ZsK<9qMIF|Pp1$$#oZhBpDaVf`A1ESjucZZ6(spj_8jLvf zmfx)i!i1c& zE>hVSj_lGLm`;3W+;e9S6Bsh*dVKkN=k-Q@7-#DJ;m8sJCxW4S_n66V`W7&Do#mG8 zZ|fXPTX0po;*QaDK9i=O>-pe*;mrdHr>gu`MsCgSS2z#AumyDZ>>Pe)1`HCmw(O8? zr_37?FH_?V z>_X3|e1!S69z?L29o&P^Lq(nRPHjr6RbGtTfR)5dy5QayOOW@8NXz@P5QX6Ga=#zc zPlYhB3;{=qr128mC=@bZf=Ft%wsas?6&w%b`*oCj|8=4f2_owNb`X@aigsdoss6Jk zm(#-I`7xk?{2s^a`@Ij($?a*(-%Fm!X0N>;^Z11&xYRXF#!<)fFmpz5ga}Z9E`KA4 zKY8ql`)*d$7lt;SnTQ$gJ=vDF4+I0W*%@oF#8Rjm+06T8zOQi7Z*$po1fQEh+#V$x zhsXOp^p6asyk-{SyeHb?@S8;2uM0OXr@q1-@-tXFrV~f4<^}&!AEA$48(v{f<3;s{x|RO5W(Xn* zgicje#K1VDzrP!U;Vzhkxrcd+SP14`;5}Ydy_4(L$&9V8<^zZg;SIo{0!d9*@`6kX zfSDm^8&US>)t<@Qapk4*+&P-#&EkA>vuyqi3EIccHzvSCrDsOkoHo#xo+IDHEuWh3c-)`?=g=fF-iL8Q#D2o<)X-C36 zI>SBQuG$lWx5_?q@Q6J=Dv!5#F-E&*y1la_cBByb%@n*GFf5n5!w#=Hl;5-e%;0sP zyD;l7{pmb->xz}A7RoO?S1UiCFyip4rKP=SR_^n@ek>tPZf%OL{K*CSg|kP2@0gVh{08`uCgu?OU1!Y0M&rvVAtoD zTd-)&4AVxr_wBOT>;Oqc{67GtYz2InZui*j=KHY>Y3E$9VP+X&CVz-?wN z^XtB?SdeIYah6tC0e8APvySrs{`#Wd=*^pED2CJ!A?n6>!`PVS_ekpVoe4OUmtgmV zVrACv>UMa=%D&if8!nax6A)^EAYq6L$uO=GpvgW9GTmTu|H80xZZ`cwkG zOm=iZ7u74Aj*63J^jf5(>3Uz~{Ys6vje!n#)lRx{?Fxi{t?YnQ3V_A7)2mi)6HkM6lipcwJ%+>l=$BJJwbJJC1l|0ed%TczKy-)3 z#n$4+-)CBFfA(s_Bxm*)+*Mc=6u34u&T6SUD)y9yRSMOxOv;Pqt(A9vMa^;UOH zvNk%7vWuYjZ1qmy(eJgM~9MI6U;3ll;SI zu)Vb3`LhRvmyL6&mHrxQQtqo~(wd|r zp3+OZQ?z}?Q<>~cpeLQwCbT;0HV-Bf5E7KU>KARd#HD-j)`*=Pu`}mG|5Qr!=Hg!t zIC;U}(|D*$c8lgnsU^YfS*L`gLCfa21lrT(fB`e@NZO(I?#$5n$_pmUjRzboWUhJ& z#EyI4b|S?+%*Ah>*-m(47L0`)00n?_eaV;dJQlIpKy7CQx&*}W~>ihKB)O*+?VhCRe()ammL3? z-=)Z91xw{GCsNW`5j5YTf%hF#gAwM4 z#`@>6?J~x{vB;?~ zG+P(oGn6oVv>D(>UJLf|sEO<=a5sOB);xtDy!&t?QKRGYf#JzFqKOPWPVBsss86-b zG}QbAf>;dG>l0~Cr;WD6r*uqwn3jeM1i7DA4cvkVxJp+1%U#W^S;5hx!Yi>H-<3Z> zMW?F?mRU*rk$?T77%Jc_kFH#44Res2duLMq^0N9cv@F_7TwuA&Z{x1J)LgTA*M2D-4epJPZzV4)eLDmMYnV#ywlIh-(!``BsqPwf##IdP@~qIhk((-R33q>r>?A{%VT52#Jf&Gg6*AjO;qiD9+y;Do8q;3?-o0#c!Z5}*Cx7V>$Zi#E>~w^Rtk1}xNMV+KEH2j9A1^K za=vh++4l+4Vd_tgY2@&*Lep4CQT;nIt?FmW>QkO9xAQb#Ea*_tl-w>aD3u{*S+BPWAbF4KQ0R9wd~xojkxD z*iccobKP*^T59o$9Oef9wYJ|dsaUn_4#k5=o#&|s@mVT0OxbD(uGBV)m;Me4IpK=t zjoOyD>8M2pmD`K1K}BP9je&qA2fF~7g2q)yy%u*ld}bvBUuO_-Ot5i2dp=T!%Rr_w&Qt z2eH0B=z5Sau9QVFCbGi_zpG~rIR_6gU3Ubix;Tm6ev*iPBG+&$SuWdi(y$^;Iy*LITGDVUB)R*UmLfG?tP5N#9?Lk)H<1M0@tT8fiB9`jp8GljLbxfw`qq?hq9)F)rT5^x#t1U;f2nd{;_z3pXnHM zc2YPg`K8Tj{W!OWe_;I6J4?EcG;vON<%dZnrX-CyqdzAm+&ag+pR9{~rrW<4sTTZo zd;4w~RHFW823P~K_vhi^(of_42_YWGW~l8M=e4%czA_&NSybS)$!>B#?lU8|9yMwD zhAf#&vyP4_wA7Y6`WZbeA_RY{&_|{0Yr(aVki(A0k%`)_dIfj74e$2sdG6>9fzD#R zs|k(Cfu?yMc2BJ@nDqS0Z1Y^Jc&%AV@rRRX<}BADs*4#QAnNqExVZfM{HdwmCmLKJ z4k?HE9e@RxxeIg+jEoAM56l~8U@Do_#&AnhfuR-n-!QTPAQF1?V5#9~eeWtdD%wB7 zXx?~s*0y+12DR344$oxi=IsWOZY*?Zq3#I|cJ8&LUN3S=*I_gC^4HhckF`JN7c-JG zPWTFUoHQ2exaa7c5}uuKIIsqw>;I^zm;pg;`(@akUS33*@qxLgQB8(ruZkCQa2 zPYcWnyfpwqXp8LWO$2YeVYB?4$D6$RiX+^N_n6z<@}sNHZEJdL_+vz_d9!oEvN_R4n>K3Kzzd7}gcM^2%W#nRf0 zxgO%?TGPze9*2K!wQ-j&kgvwZ9$7BYK;ZA!wz(4$F0a!zx7RCGT~R;mZ+~&mf5<@* z2wgp^<+^UeD0D3u5thXbQb^6mOx%Rr8-jX~`&6yxd-JqRkDkJX_BO@cIaKK@m5JsC+`B_fnx@MjSsV zlc!^nl&%2cJO}(XD7|oGWo5@*55^+M6&nuSDlb=?&;YJ<<-JlIrf|3dTc*~hujeDr zjzg$|lajv}7$jw;dwt0CNHVwm(5Xk$>)1Z4+hzRPu;qsb1C)dmEXNmMw|uj-42!_C zCbPU}@aZR!9>bS^Zt)PtGFHTjK)r5@6{+>6Qc?4h`Gwvk)Vd9*-9_N_cm1VVN(vZR zDaMU=)8ApAdN(&MWt=t5@*KK&=3wLg9HD2;4ftJq6Z6AL;Szu;sECvE^b<~r? zr_CG0HG<}v&NkmpeJJ}MEx?6x?jfRR4L$iW89MQ|nq5AbJih^e)6pzV)J1P5{KZ3Q ze-L*kXSCP$x^sc6ctvWpS_6tJ9l30+uPx_B37+5KDodCO}FW~g{O5kH)EJY z6qi-fbp9(QBS^*aXjM}ziHs!D?{aD0=uZ43F+EtWQ2yH-^jC^^y#P9U$x1wodUuoT z*8Av+ik_I{#yBUWKT=kw&p$YqT=o2}b@5r3xDB3;F@$wh5Ar|~wm4~!W# zU4?atwC^S0Jtt>99m5pFLA098dU7DO09=9&x#M1K1dt%g2AN!~pAN@oX#bt=z^BGu^k z9Y_zjkJIKyxotj%9yVe4;xl$dF#hTLOJ_YNA`azoox`|12KE_Q%JzXMJmB~p;C%sv zH)6B^nv<>l{oEgC{1(1+3TZ#yZRrNy=l#+_7=IA>sl{2B0#4z%N=O=WfR9gHcHZ%gGcI-*m%)$v73TMI=bFr!Zp1h+GlvU zl{{2I0c(O@^G{N8GPHQ9)A?RrSCy5gBSZn4u`^PuB*SP4N(T7SAwv-ArJPO!= zD2}X-IDO0Q=-mxIsRgL}=3(il@Lt%iY!hInUcYm|98XPs#-d(v++47O=1QRnt-fPv zOf3*dvp>#QWV7>@*ySu$@y>*|LxR98m z?Y`*gU(#-l?;`T;g1Q@{bxF$AtX&X(xrr()Dw>^eKc{~5$h#$kb_T2meDN6;hYHiP z1QqVVxUYXefFqbw!(^~N3}f}>XdKL4tw}^Ar;sBCf)^OV^y=>JE-g7LdU^DaIYAG7 zz$U?7(pBWB+HduJ(p$oCqig5-NCmcnhV#vioMtIWiv4xz0=6EOY`GAbWC0~R?=Tsr z2wbioxR{0n^<%Yy?Hvq#Z~snvINZ5rfxUN_a&kLTJ(_rNB|K+6EHRM+G?OVnYeZEgrg{X`nD(Vzs|4$1Op6Le}aypG!XqJ){7AFYrDyD=U&PFLnfzacX6jI$!zTk7GhR zrK+yp1LJduEFAJQzzG@9+h1T#^aBt-_$QLz6$J`66+1Ef+~}}dL-&luvfu5r4QSen zptim`NU}erWyse3q75Q^mgbj78X`t{|@070hhyqJAkNzU_~b%Rv7MtON}}D zN?tuTm?$6zcBrvH?sV~JNJXMJ)-Q-zz|%JaCvm^%>ubOn-(i#O^=zHDCx{&GOREmp zB()9FMe_7L4-acuG9!U{R+6qW;oQQ;UL<<@#WQ34_Ee*{>Tmh@F=CDb@U%I%)`fq= zP<){c8`}NoD+2|(%zhWcP2eQ=&1D7Rf^>41zJ@FopypW3&CNf1d9m5Asv@d1M6U}M z2K2i5uuiXoN+0TwrN22O$XGr!+<>EGp}yZ>)A@B7A*f+%69tZYu|GQkXq}hI;bzKq z*Dk(nOYJ8a>iu2a^?DS2Q`wFvENLKxdpDYWA$4X~x;?UJPnBATD&s|Y_}-%@uE(~( z9)JQ|rO(-Id-ggkLq&QEIWVQYm$FHq6^$_U4x%is`U}_1tw#!r`4D%L%{5~v< z>kbsA^ns9p-v2!-1P|8(QZ%qT*Qrr5()6iT>GAB_%Ri$?-3#oO-`F#sh3{{mBPwdF z9G^He=P3ophd!%TUeiXMP5(TZ{|z5LEjlbf7d=zS`8sD?wIjXwVquTGZ`}W5>n(t? zT-&x$6%_?BKtW2Rkr3$yY3T-OB&AEbEKraT=@yXgZcw_ryFt1;{_}Qy-`=xl|1)de zd1r;s{XF+|oq0r8b8Te(mwT_=A8Iag+9Vg>O%)pulM(-&?4F3p7N9uP+XyYXhRZ#& zqak6WRGi>*Q&^dQr|gUer$6u&na{U}k(|yxZjcw1Vj$3fhG9GaFM;;cmmrLetm>~_ z({kr8rUohiDH+*k02NNd{tU)|z%7VNuS-Xil9nKix^DeJMK-YD!vnr%NZWO@HH&z5 z7lyII|1mG92DZC~e4eA@Ya| z%Z{<#-AWY;N6NrgR)0Cbz?=pK5RRCqr6dBhdC;dMq$RekZ_R0N9n5~qd9QnHFDCB% zIDDRM-n-1HXqe%KW;3b1B^`eueR5ipVB_qGL?K}^yR*Q|(QvzBSFu4LL4pHkSOC`lDH5OKj@mD4$)QI!|<kYLTT&G<_={Q~~S6A-H$jH~~>U{un0_geIsHicCrN6^@q|igeral>N?$r%9`ezm@>TXWd_7BD` z@|>VpahC-R|r2ky{{x{XVaV(55UpH3J8|!(20mnDeFx z1nl;>0=~wczNr3scF6R-D^u*Tf?6bLOUrb=r_<>53= zxTrWkTYAIuwb~kYNDBp*+asd)7}8|Bho)MQ;81h8spvf`{RvALP%LA=&@?uqD`C~Z znCMFH5}9UL7Stb6Gv8;cTFH#vxo_5aK#9334~ISa*& zEG@er3m|L(gzUo0tM%7bOKEm-v9V;zy$zY?!*m(>Pr)4=c{jQtKRV11N&`NdAx!)Y zBH<*xciPo*Yf!ifYgSl%idRKeu$!2eAQD8ARTA2tFn>aA(ZH%qD-M-EEY6f7I-yK@ z8cc+ufZmvOfo!$r4Tm3CcoE8tPUXkNk#M-dWcd4l#BJ3oMPuK&K$THKEzn@I>eJ#Qv5iiQNRDMXOBH8JAjL_9(@!=yRW678LGda8#M2q!ec> znDwz_Ho7o*{E|jrh)k$Tah2*1L5BfZE6mA!*6qeeb>@|J4sXCEx5UH1zyJ?aPbh<- z4}Q1Y{o#N5;4o8^Q??J@-nT&!Ed{_Z@O8q< zcSr=+AN}x+c|i4L$MowF>+AA&&wzj@$1N=-B@E57=YuLu5>nEEolFWWRB_-s2eHu{z8 zJP+i)7sbU&oLxb~kADy~$#(SOp)j?nICt(;a+Bmli&85!CI9)>re*Mp%yw7*mUoX_ zs#p@z0mlR}A)QO3h$e^3o?jaP0prs$zE$VpLekTdD|jB+Wj7=3<2`HmnhN)s417kC z$lRjmibP61&nz4&xOLsV1A--yw*lnxdcgSS9%sX3g|MLs8aB&nLX!GfO$!Vyk&y{- z;q%udVd{~Wsi|!&{0`PQhn;bYJ?o~w$SHJmO#Gph=r@~`e*jgwrQ9sQWDeEB?0g~CCE@h+i7NoR+B;{W6a1=Gf7)L*qDKIgrnCJGnj5=j6vQ_+g zNx$C8!IrXzh4HyfTzr{QTGS2k{(dby?W=?+q<9W1stI`mgi*2+d1Bs;%J8;e3`b9( zr(-BqU_ozrS4lSU#GwDzO23UUtV;&zACo2-7GVEtL~K=uY-`J774lBC_n3CL&D~I7 z6|IzzhorwiPtO~`ZUC+l(mwUU#1zn2jewI-Bvec4{l839fw#e#em6@4=R}}b zM+g|~>_=JZbo3Bu=CN9gk+h$B4P?wXw#E`L{me&;WW2HZ;Nw2Zs*+*ezIyd42&LQv zQX-HcK|cT$$Qi;aDu)YLiVx(|fL6Pp zlsqWKi_-;>A6W4vSqk(+*W=NeUpWa_-45!*E<(Ii;q$geZ9r;_o0|YSK|~-D%9qul zoD{L39}^J@NCgbGtv5oja@f$|=H}iyz)li@513>n z;h=-BcuVPho}FFwxJ$N>UR+iNU2u-qI_EA--QNEFo5A@IIy1IN5P5_EoB#qK2pPr-GA`l1Nge7 z*CF4LI`EZrm;lJk08b-gHZL?Z6fH3=5>|Q0s`lBlwC?J{Lgp(M7pIn>_@^+x4djFR zo}Rqsf7M`Mq#~gL7pl^i?ZLkOo<~!t@Q613H69)S6-A`?Vd(`WTlkdwDFzl6meLnx zV*tnH6&C)IiQ|PzJRb(O(J>jFgt8PA6lTM@9{>zGySmy2%VObkc-fyzzQV(O1OVSF zSLGM5C=UsFgpN&1WiJV29}v6sEUqXom#=X@_qij340x~4_{q0*;ziKnTwzIz^htd1 zS!Hf|8ZcY8pj&D^5%vwH_pkaUC=kKAN=^ZbZxlI*PjB5AkL)+H zvZTLxcp#ve`Rd{1%axTC#H<8V!$^W!mZ4-qep^aP(Sdj+JRtH32nf7*@!~g3oB+_4 z%K6KN#co9razZU6ZFoLqJNExR8kc*fC@DVvj||`WXN~9d+YHd!0!_L6TH?QGfLsnG z_)6@+bJDD^H@G-E2E1JHu)K#-AOPlp!NbnZJ`M7v&@?8md~fxOjKl*O%*NJMAH-1= z3oG$o4*D$q^haNb;1|NWr$j*gLPH!dLa&jOtZkwyf4 zqEyaIalQX)5i4Lg12`Wv$h8NEL+}CY0nL!8h6VwkyC~S$R0fD|!?${*GZkoc?egf` zZ@vr67UP+HoX3y9CMPeVx48hy2exR?8n93^Fe2X#`Adv%qprtk$8KRBm`5>iqa>gwuXy&NR&FZn;8cwNcH+L{<@;oDdgxzMt~$%Y>|8oHW? z&d$zeQ`IpbU<(}O`rJ2l-&^#NOYVBS5C$|!CcVZxQ0TXD$AuQoyZHh}rQ&Uxcg?+Y z3iy5T-$2B94Zaj3%P;=Z;7tJ1>Br{(z9uv`2h$-oN-?t*eeNIiW* zT3QQ?9FLY00}$pcbOhw2q=Ex)LL%UWLT~nGe4GG6AjloIGRKu)-y0A<5xoDo)|aJW z>ZTIb)YTb39q^t85G9nnG=*ItWD3H^X-}#D_czwUL1;z5r$E95l$^-u1XOP!j(7%% zCm|AnP`2a&!nWLgUEIbdKfR8c35E{H>jktIltBj7!?I&9tD;G;uE$fSkzcT;M!%9q zxAc8LDl$Dgo1o~TCMD$uVDGBjCq@5m3>*YYRA>s3jvNjXa;^}&Dqh#u2zpmGoJnZ{fM4yNUgh~eYb zppT!Q0n80R)By$Z(_oIa42WPT!b?DhkOlD*K_7yQ0?=NS2<0QVo%e15Y!MK3zz(EM zPEJmWU6r@Fi>k-?-COeO)5@a4B+SXYofZH8YWw$B10x%o5u`|jjtarECsUOP9wBEXmpU3IM&R1ltj~zxVc)(@O&6rbl8}%9 z!f+cmGeUxVQR=*ZrChvLz-7dqC2R=<+05vgxAp78@w zDr!J%0W#wCZE~0{yoNj|p!ziga{5C=oDUk%dEhZ3FbT-sKT}dt;0gLeXjGPalh6Z* z_>di~r7P(%bgbL!B~wUUy#wzztS!3C=KF$o;n$~%c6cV3LN@}2nhI!A;KIYTcFg*h z!wrakz{haY0F5+FAqDf{Lm|Lv0O8E7eH=?NCoq`)c^ieUu3@dzY)PP1JQ?o;t)ImU_nCy zA)Xvm3gL!4hS+_uITcV+!uCJ-DnF^_vC&aPqVfs1qwM0zzY^(xDwqG>@YZ0OT;ffH z6#{YSIw~qM-v(I&TA0E7-r=TQCTKPWpzqc9*j}crCxdsH{F(6-MYv#mCM-TLD!Sgia1mkq`ABkkb?i2Wf&@^pWi%$hSb2d^?h#1eMum zo1uII34~q>oo)Jx?DcpJ9(7wa&4#>Nmgr>!B+`FE(2upzYgcDo+56McH_TsBUyvc- zC>S{TA%_;U4dA3wLTL(2>jL+4XOp4qtd;K87)5FW0DJalss_P{=q)l;LHJxSNlzDa zA%`T6(9WUTV*rJ!+Pb=_(xs=j=xulA#o&!75qdpUq>>vHOeI*7u3Ok8VW6vwwU~N8 z7oYAgaDS4WI3skcNr+E=PEU!@M;sd8W}>Ttu(*`7qk;z$-~cBC1P_#{QT79lTMt+f zBa+K-*xL~mno36o00V)(@Tb&NDy1)bt>EyDXg!u#P8t3?nXrH1r@9dsjZLkk24;`O z{Ejgb6*uyfN^bkhmch7coR3Jlej`g`xF!a`_HdE~2qsEDBIt_HpsW>Hv@-j(r`mS! zbXB^?(YTrdi)ySaHd%LUrrMojkjLu*Ak!^$^t#E(Sg0STmzG3LOwu6*-iLRVsZ|vT zX%0k_kVd`oFdC4`|AU0nfV3V z%vk0hFe@m0!X6E{sP{w7-phEnwhWGx_%#G6EBB}H*1i0akS#u+X?(h%Q&#lEKAVxf z1;x48k95HY)FWxTL$537!i&Z}2DaEJ!Dm%IKop4Y+|h?x6P9hg=}I(=bJZgBBBolZBcfIV3y4ScF=sKcB|M#?nm+Ib;$f$ z{FUawRU~oLu1?Pkd>gNM=QSVxk4!sfni=uC_&yBIGeCD^74a zJVfJ(`^cW-wXrnXgQEY7_fP-ehnr8d=%e|V7H&UqDcRXt5wY8ueyLsd%l70{~P}5qhw^wGCeKs@fh^Bx*!XK*j~^pd4T;^CUd%!_tuU)xZC# zNqjn&tvs{P!^u0v2i+SDw<+Uy*1xt+8;VPN#x3=UP|Po2R}W}vDts%a8?)YTcAxLc z$}WM|eMvOd(>>@kGtLo#>$+hms+H{<@g-YF{|&}Gr9PiS3C+%9DMC$H7SFB(P79o_ z6q$4_YnS|_pqpS>OkDMt+q6vnL8IWVxejQTw+(#Dc*+W9ZJq7Y0ay3{lt{-MAS8tW zNHV5-TFC$cZ8(rXb;#S;Wq|n;wv+p12v^$NM!ciz(wdmE?=CqDv9xhC;3f4!EtAUi zf>hl7aXvca>W=Hcd3)h_LGQCp#D(|BMG~%*lV#&Q@v;0~c~yqauvN;tIC$DyXEnUm zI6$OoIDkR)@C!HY{7lKDZ5TApT{hXnMC$6TQy03`6u*XSIVK8=;hZD)$r>xK-~5x| zBSW(Vbj0*$$0rF%f_XW%yBnJ`Ob!f@T?91%NP50DrS;bAG6LP0$I3Bp$tlQ5eRW!2 z@hG8f%y6z#@bl*wRGk?dz3%>pIo?l$2Vtg!Sgt{tjJP)d2lTF#R7V)m(M>VX(uEOj zG`G_asNWf!b|Atk>Z+=$BHFT$bF9H~=wC7U<7sfD5Q&_I)q~IJ;!q)8k01YKX!sz8 z46pjO<#RhJItq(Bh9Xja&DEww$|+9v#aFG!C_h@PKf0bH}xVdEhs zYmW2ZO3}e{@od^J0O_K_?Ca`fVqFO0yT`CQZ9rkp;{~vOeWPE z-&+l)3@-}iPqqbw4gKkH&Z5Q~!OL(@PSFpJi5Bam|7mnwlc?U5>Q4( ziW>luTkS4NDSc7OmXwqnF0mv6+Z#mX4Y<6M!FJ}riiah@1AuaYuf=(~ z_x`uDAK~v}K>X`VmAUGpA$OX|WhAp>**yf+9PfO?_{pyx!Mk9^frw*))|FeOqB=gU0B<*ux$p3wnL>fMnJ_XK5U zvqwj!d7g3iEflO>fw_A*Y68G72dHqbaX;nTm1SK(iqz28G?m9<1z2h?b&dojIcGP> zDJUqo(s!(z&JDWUhjR-2giY3lQXp*Hp4z4a7Kq4segt3%bs%?iEYkgDGUpY+R`!g$ z?AmQ<3+Y+fbpql6GNy$9jC)dt2HOVq8p|*8Otn`FP#Bm?bfo|{_f8^ghnz-caE3cO2A9}nqQ zy$e)$pbE>kSrA7=yg(8S+B!ez&{aUE@&7ig5Q5UT0~j!^@Lxek68v96|91r9AL^Gm zjXOWgSb3pE6Ty+sX2{|x%ZsdK5itaie!P8KsiZ|4zV3%JypFDX%axli#n%#b1FAe` z?$r2i$bfqSoMfLe!5GXK1;B;Q{5|W68;Vaey>OcI;j^@cv3{w?1v6~tm{+=`jI~*4 z@M%p3iMdE)L`y8MSOI1orur+THKiNYAK*1-6Y^dE`Rbs$eL>LWij~Wdig5*SFv?|2 zg+;Y|sCqOE^|%y2*#XgLg=1>o6k))X#x>op+abVwhrqZFJL;( zFSI&3?kDJ0K+Fg9ISIW=qs=8hU`@O}6>!CfxXC9kDRi`Y8BUyDwR&^u&)7i@CC`4$tL2sI2Q1&HPYE-o(o10keQ5hZe~hXU>h zz`{uJ2lMJXJ@@7AYX#D>-8+A9v20@eBbQcoVCL3G@rA|3e1MoC?KJGlAM-dfudFDo z#$glIo#8d88n+TL+xhLRGUBLgvhD0}g!-)s8mgh8EIm&SR5?`P%Hi)>8enjAp8S>2 zbxPuWkLYPw8*rHxu_)(so02FNhuo!o{jdKXpIHE=bnp>7&{5%HK2ojzMRi;5SnAk) zd8l}p!B}jYJTik8P1z2I+u?ov@|jSP>TS&3E5L9dZtJ|JV^dI@k&e!-tzPkFth2Og zRRRF(S08UF=|tdbUHiAm8$Ta2h?PLP5-rd!nmrT%>@C3tshs(PX zqLmUND_E}8XthAjQQ^%|wANa1ZZpA?>1xZEN|WurA~%$aCsk3czQ!)d^i3re|D`5r z&7VkGuBb{4Rc}1jMP74(zd)YD8yyw)&oh6$O+=Oysdl9q$X}?h{;X!CWqp#MkJD|6 zP|S%3KTRG#E+c8GCqz?6Ty8aI+5OfJsT_HBC%>!bBC7!X(a}+u^!3IIqJk#;2ateA zrqD3Md?}S>GM>u~qN6~gL##`n8w3QGLe)vzCapXR>F)T5RR$~DIN#)q#CywIBG1A` zAK<%2uvb*1#R&xVrR|`H%)jI%_x82eS!JFt$?ag>u`BH9bKV}0;NWOp_?eity+bMQ z3s2BbhFQoW&i6%@0}Zn1v@$uAQK6^zNPzF-z;_u6pyhN< zvKyprXXUj(Rhf+1u9FCAx{*YyyBVC>5(HPdT%-}|J50NMU2hW#eAU+vE~lSoq25h_ zdYqh7#vskm0R83K#19@1L0cL!6O(y;qaM><4G75M@5Z%lDId6OV)O7tb+ptB+UQT@ zFuz@u0vwVAA=gWRezz;>s$DoTRX4Hedc_U^1#-}_NY=dCnblWg!zZwFB}+w8vR9$fSw zx?}h%>P&sPx1b=R%mBZJh*$iqZ`9JyEMvFhJ@1}bb|vO!j)KvVV~5B_)1?a6P|Khf z$7YoT(U761uB3F=&jZT7;9}3N^B*QMIrWq&Mq3MU>Z-PT*Tw$+y~6B}`seJ!W=~q6 zUYb@IMw3k5-f!Y;A>p9!p{cR2=-5_^_<1RB*#-Q4s z)_iNUE%~}MzsP8L`aE=vPw1UIf&8-LY79@gJlpE)khR3P6j-*;9v^Fspd>tgjH_V-Ad=cVXAB&~OM zPd`OhUvDL;d+#0_@Fk!r%%zLx(2agKDS~Oq-M@NHUr#;zngO-a;$#uc=PwpNw`~m% z1(XR5LxAWlO+ayAq8Q6k@JO6|BW7}bYc%QXV1V+DYOXZb%c#KEP;=s!>l~4Vix>MD z1QWRl^%&AaZzr)m!tQo5>6eObVYiT-oC~Au`J6iKHtbt|NfI5nk!0|X%iSaZ`5zT|Kecr zC|w(bZk;k-=aIm6(-2mTh^HJ-a1r-UEtmD8%6rdmfR>-&NWKArzJt*<2`%j%hr=`a z*^GOLa~V)_X|*cDA!bc)Zsu$vu5K6GHr(1|TAYH|^hWcSS+mX>1m>ngzdYIu9fwlA zu6Y)B_vDbaaY@|X{~#GST%gxy~)0Eqvw5% z<>YL~<`NdBHRBiu%ljZd7B_X~H?J!OnR6B!r`Rld^fqMHcwBfzMLY#QWRKqjYAlY?(Y2Z1&97&Ue9wxL^t)NZ!N+KPZm1|u z4g{0liM~&!hqGJSTj<{opL7@dxtv{(i_o?A9S8dyq?TRe1|8EyA77a zh2g9UT-{UfH&k>t^JW~`jW2_5IIftix!u*S{2VV%j?(8G92m$C(`ryiHR?$a9^Jv9 zXYv!Ni%sIY(p^}=cbKEyv7Pe*9T%HC?vVGUPT$^k^l*M?(_sO&gOt-wIL(Y`sRyar z*x+a$0iAP=gorcVE^1}xU(NVYyy2GH0nJA#)jO9f%qv)#ZJ|p$abF@v8onuAt$Tej z^D^1tU6*qCY+5mj=o=lg+B`p+5mw8a)@(WPE$w7H?#iumcWRHba$nIrhbyn7#OFs__{v=V zPF=>5@k#6S`CgHv&05|Uev0OKZAoo^+sTsP_MzUb3;X=ybx+dM@b2M=O_ySF9&(N* zr;^SXX|p|Y=7@;;sj7z7Z9({okv0P+3Iz={2KKGx16QVXy1VKpDn;Py=JkY?I&COE z7-xVxA(N&^4eR(}kaS*LTzvPp*At{~zek?faF_K~ySdnJjEgxtbHW&M0d|s3M8Axn zMz5Qgh=Jvp)xnyoThjwDp1T*pK_?!q9S}Pm=7yR!yZn2@n`lOUaY|J$qO9LullzEJ zZ74lf@02Zm?09aLoO3Xv&XUNtKQ!#fV2Xe8+zI!pDQkDq8#nXQjo2se4)>Y|nI z@(cnb(!HhmG&M3uSJZbiM<&%tW#VHAo9peOwk${PkfNX*MDuj{m7?yM_k0eiLp5p( zRL`0?94%Qqb-(Ov^!2TlPVB1Ix$x(4?JcsczWJ@nR9SEAC~q`Q?+N)d+mh&A&9Ad{ zA*)9Q%U7b!!F)LFRsDb|LHYfV^~2r0w$IZ0tMg0tyea?_fa7ef~-9!DSznlInUb zKi#3OsRC*psw?6lS0iv$WrW{dy0X%p-svg!a2sdF+-SngvqO;(+2@a!{i%ric9rHb z%pfA)SF_+7T>?)P%XLg;cka*YeC{0w zL^YkEDIYfR?}QgrrPFzDm(ILf#^m76N1R_O(&rPIr{;!@qYY|8?NzuXquM^W^g{e2cg zhi=#)GOPs6AELxsbvQ{1UNc}FnFrVwOl>q$KENIzLk19ttc58xh=TS2fZeUhnF97x z=I4K5kZ1!I0BL-5;Dz}9j`TN7qsh&Rt4GNW0cI$D@9n!;_^{U}@BO~Fx!0n6=%|x{ zEjjkEHX^E|6wif7<%lG(}JjwTZK{P-bD?LGulQ8BTR4Y`cS0pvCuil;Cg|v73sng3DsL*YFl(Erqr&9g}%NaT_ZMD+K`W*7be@n9_b~rqu2g3W;jG*W&crw~t z@j*mTJZUFM)x3&o9ehwh$(DP}m>CbGc(t^&6w~CB9{+eGUE$EAk6-W7+L1wy=J^q= zXw_)ULf=IC^p74xL0+GQ0xn*U`_}NKeW0ISh)0bi(~DLUOWnYNH8H8pyWwNQ^2Oo@ z^o_J;YsP$zX2qPfHwRynfKKGFH!c zI`skc9bZ_hvp2EbuwGYWe$*Xg zZT-DI2cMdB|H`tC*^^=^+&s_q+5KW~Yh(1{Fh`R=UQyRj?~ByPj$--{m8kh&`%V8O zdsA&`Gu5`3Z$@Gg(zU(uwD8B4I;`B(A2?@aWJ-i%U+Cx}TRf-aB~p3bz6FK2u(%8m$vsAgKpj3x!xIj^!2}Dw1e2(zJV1b!homiC%5O-UcrUR(yFUUv1 zB3%bG_MC`PDFGj=2e}kM&%(MCP@g1j;J5o#B^EjMveBxT)Q5nT00&{emRiu|La#0 zmOIO8-Z@k%J53F3Bcqf_h8l(jo&KS}ex+qY_Z8FHEa zav-KPf2p0rf~Q&4lC_3j zlce7rV83WgE+(6oJTDskm(vb`#H~^R0s54fXIz->lus2(i3rcUnDrfo& zgAdQs_`;)C55U)OvHV3$UfB&z-pEMvsE8tSfAcOq9kZSG0}M|4B>tvsj$-v;u7}G6 zUGnkvDAZ?0(+5uJtlOQ+73_S9H@#k;F1z?q;|&gw{$>5y#rCwKU)~TsO1)gN zPFJHDI^D~32^-qX7Wro1|F$PvXX(UCvIEUPp*4JubI8 zXBt!4H;LGE4L$HrZIW(PQHOLd+%z?XfbE+$pfe76IfP>zq6;*3Fp2v}Kt4rE}IoYXR1NG11#oE-d^{Nu$v z6P0FAuWgc_DPXbC`Q0{U<1Ww?uu-0nhU#p2!Su*@NNZhkrq;Q9YxWbRH?$LT!O=x( z=xx2s83(rFYMo8(beF0xUu?&n*x$p#Dq4`^)xI2KI+c6{5$^ySd+qog0T^TI9!A&_ za1#mN;8Lfsck_F*uD)c-dfGQK5)h=l7D>YuOw4$~nRj`owjww$MX6&2D(2Xjl&)Fz zn!6DHysI7JAha1qF=)RwDT)Mr|9k+YfTMc{&c>$FPgyEBo3Mvvv|W^(bY7)%6Ln6T zVBf{@-oCeEW@KV4)lklX<3MLJbm>38wL)IXx>wuSg8L9n_c>J9`NzM=DQ6}es6edu z#>U}HjC#FBi*}WJNn?j?3?XC@fn{}QEZD&n3f9f-QEUOoIv+p};6RYQ`If%Evl9f1 z?GYLkRAdN}0|bIvhJr#*wqS(`mJyvm%R!X=0TS4L`V`(KC-JAg1R(|{MuB+Q@1cSG z0yd1w$v238UwOgQi5Hw(k~=+I=-p+uq#v~Z_Bg#xv;dpFuxKXb!}(jK0#2`+)Xyi& zzMa+^->ANMMFrKx-fd@1_5FhRHw&-&%jXXx^rk!}@AFo%Q78RSU`o+3`+getfiV<& z^inf18mofBy@k|W|ANyjR)P4daD05z62FyU z|0<)dg0Z~uV-aCf&XZgeLljI4Ptj`R=^s@U^yhCT_8 z&N`Mf*z@VT?3M6Xmd-=oaCX_U>^v4_rCn= z9+BH|>pjSR-S}kWI@ygp5m?ItWU6;Cj+JiSNCtj#`sDd@6@kiLUkr>?<-tI7hYch) z9bwz>S5W)k4adW^x!l%c#_0w_kX&uOUGa$k8^ey1St+tX^_{Q71~P zRZ;Znw6G*k*D7bor?9K^=(@WWmfA13(UI2-zrNb?ewERYd$3ekRII-|wt}-V=f^hr z6Qr>acoD!}Z03h=t5pwXyuz6+&db+%y;++zwMDyqTjurshiC2Q7-QAmRZ*y*!f@wO zp90)wT5P)~8hby!ar0qaN^J5zD>P}d9rF!8Vgc`ig+%&mpo7^`AOjv?UWXm3_UpoA z++wp$AyMB7Wh88wMD}P#qXV#l!fO)V+Ny;|Fy>$4!?z5o*LQ!ty+-*Ny>7fcSWY;a ziQ$>2QybgDNOse7K=*5|o&8!Cr(zZ+mKnFpgc#Y7;Arvbd_UpQln8Hz&6>WfSErKF zVg?(RkIbbN^hibSHJ{ol^Z)vl%wY7lATF2wjvgs#<6YX(=(IFrSM7QZzdIPmIg!D(8?}G2CWEsp2KjFVXdW^T#6a7J!0wq|vhCCIN z{&mB-Iv!}1riyi=?ZK!rm`>s0U#mQYCsEIv28K_si=H*CV&Koe4&Jf05%>RadCYZa zML)xUL9O&FvajZo8C13b6-SKv+cu3i&Rbhd*_OvHCNmfQ7YmRe7H>4M1ephMS_0nJ z))rMH?g;(2l>KC&JBmZM?}v}m5q6E_(xl{Iz~M6iw6s4n)jNexwLS-_OGS^*2aa9t zxE2IdlYW(->?jZN?Oot ztEL+M2tK9AUrBDMkeU8Ob^o|S_4`nYXT)X|L#)Z!E>*`!VtkG=y2>u?~x%h~SH5{k~B-HX0=@^5mw_Y#=rgAYb zXDl|ltgEBsZacEqtZ@mAjj~V~>2(AJD+-VRF*HbMMI~J}^@H(yEk4@$;Ut<&_9N7$ zWVcB3sd#ibN7s`ZF?jv)m3|mso(pZS=J7GeK0B_~?jms8x9ss1TV*R)>6W{4)3Jm~ zG1&;AO#tc#`W!CxkQB*<1?C>mKqd31G6ljN)Hs2M+*dNK7}%< zT&wWPes}-=*=!ry!!NBlF5@!R8`#&G{H~JMJnP%WGeS>+Kj92|shb;r@}(-kqPYPw zH`o(ZZ=KRak+dHNizl@op8J0LHV0++V|@I%+1Z_TOrk-~$9o-?ogg2Xtx+Zbt}PIa z{w#9@C1$>Rv78~lcPP%J@$7>}23n|*0YU{(#;{uHO^Pdh9od)5m zU%371Y5x75YdgTIQ?$y^NFD#KROIikm+B4veMVn>tFtGJe#-Zz8``aRnVSsgT)eM_ zcDCK&4VTTH!9m<*fk)4r6l9t|e|F_Yg>PUeQJ*kd3J4B~E85!3ch6rDIh&$Bh^hLW zC!vzBDOl22t{uGQl-VK4@xqup@q+%V6eUT%W`TVC?}2#VgC!UN4%_cA#|Kgt?8AHq z1N#p3S?gfvWj{%AdlH2;2fpb>ebcVp7VA2+hW50N`{Rm>Y&)ZwS($arQtrZd@+G+o z*XY2r(UB0~Q!D0Z#Xu1ONnP=mTbLm@MEwNamhdQ?^Qd=7&6^?bB$_>0it_vC6Jd2~ zl;1s*UR@%OF5LL7pXnKCa@vfB`bQtIDkqVo#1t8%HIEb*vH9V#A|nI63mI+1N)9%p zX_a$6agWC`^hk%zoC8VdsFVAsvfz-wFIg%#z2CFdBOrH$262% z_#}C%ra8K)g-3<@L@ydrB$1{VU)WGTL=*8RXfe|Er}0wch=_a~=5uPSYevTL4QQ@! z@)i|+!D^nxkN_` zsEOQ!BI0psEML%xv@5rf#pI%x4O*vtv7$BYCqLCr%OHi`-ZIC-=MGPJ1>&349(K2k zM&=K47+Jk##_OCc@5n;k9uAL^pGY&yP9n45b!;AqDiM>GNUY^sdT!R@UEi-L-`mT; z##+1VVMqw|OFbn;ygaqdUd)i2GiMYR@w15hLFLjAyJiOblQ+5~lsOYmH(IdP+B>Z6 zs07JR*QOGZekBEkNBZQ&^ZU!wilrHor6j#E)c-|CyaU1E(^BGF)x`7z2kT{lhjb}! zewOE)*FE-ljVvb@VYcqCtn65-?7ZQ7+~Fazvv|0BoX`~IdSFPrdZ}dfVX*9y5OEle z0CPb$%gJEp)&|;nLrl5b!k1L}_h$#D6*_}C2ZdE?iVD^6%frr_@~huoT=dO!xfROa ziuVAEIy1{Uzw=a1L{AW9McFFZ;>Hs9 z7`%KFuCSD1+vBBT+v1{@A<50N7fg(;MK?>}iVSieik9q@%Rtjpn6`g5n#*cvIAR#N z)HvKd9Z{4a<#&hSyz7e&J`+T-t&M+P;2AYrApRqCZsl0py3XOV~KxeYj38Aj>)g)-@NP_#-hQ5{BEuP4HYjlXq zQ1$l?&|QD))A@l%#LPGlgw5?kt5Yb6>_(IjK*{ee?Vv?wM7Fi{0pS?}g}u z-`+U45OjwTq3#V>C#->G74Y*wXBq|GMD^<~q0{sD1MRcMHnT9|~+I7j8-5!Cn_ADZXGX3WUx2h6V=rGe@_kUNFyl z0z!TW@>(+d#RtIHNlyiSzCW8p!Ia{8pCk@GOFZcZ1W-3o80JI3%n65NhKY^c2!IZD z(0~BbH*n!v=2DIa+cN}C4|GM?=<9%^B18`O1U#@jbvxO61C+J1gE7k!e^O|aSa9J> zes#40;efurAUH$F{Xj-ktjgHzTp@o(((a%oxn94YP$*CJ5M*AwU^e^4+*}HrQh=24 z7(5Teyr1?65ogwBHto$uNrK5Q-gT!sZ-4eW5D?d+@9gc7fp{s%mLL}z*qa<~ zCxz8|kHP~3AAqbjxIOrS9NXN|l8WxlQ2;C4MnfY3Rqz!VbmGrTwP(#a~h-fB#AM`sijWqB|!sk-26SV&S) zHP)>_K4qsGWqek%uLT7KcckH=z)=qqB=O>a@F)vlpGL4yJKUIX0uKWCRayn@`P z=|7(kb_c|?zg(DC5Qi;*2aOaM6@Y32vdZ}y<_`$Ua+P?@$GCsFJnwg<$bK{(sEm5{ zu<~Hk6bNfI_3g{ZNrXdEj9_zNKu#eO+YS^x2#;J4L;4*uui*n#ghn}Y8XW16PXn*l z75FrXP})BO10fKCFbN5>HrZisBp>+4INn91vAeXaTH)5V>ptG1=_0d!$iXWK3CQ4t zoOj9poqM5+$9Q;*03|`L9mr0=I?4dKcKZ6uTxuV{g6&I|dWp^EBo6@Oh4uA+Rg=Ea z)2oANrxOUAz_YP}zuSpF*c;Mgz@cnNv{Bl!xR-2rB26M>FCsEM!fk5B83#;@_h1$h zeDL4*`)7CPalQ|7GrW?(h6f-Sf=+a6qW%v5H`jJa8a&>R;0WQc+kp&D&@qs4k>L(l z>WK3APo3(S-yY*MN{2W7`J=8J0dskXVM|Qf73*JAlaUC|s z`O|_okKaA}m$ewVgm1~Q^DaoH>6xc)K89~kTYrRt8SBv_;Y%>C&(*nVXqdF*J-EF5 z7N*Exc8Q#1Fui@9;{?cJ2IQ`)#Rv3k@F_XhA~q<&Wo7H#AoA6Z4K9&-476h5z4mrOE{u3Kkfo0<{V32X6op1@J-QQ+Ycb4Go?^ zgqZ{CMt8g*FkVBob|GKo+pntu;ScIAnY4_*$hO80xe-FdAekgvVQ&tPa7F)D5+;#t z)LyW@n`n=>%h2=Pw$y5{CV4$2XY&ZR*!$=0&25;z#(+ai?G!EHL>%dA*y=A2yqC2;5rh3*5$t3GXeZxa!x6H9Qp7=5Z0f`8Fqb z%Fw-byCXwlZMuvXDb6;g{(J~Q-$%lk&}{_X4#D5b!NCaFc0ee7fP<5=`TAJ*%^Q9& zy#%xZ8Em?M=J_4$V@tS+JkA&pel5To;9nQC%^aA zPiHA%^T6zopgryU*36)r^tV`*%B8R^_^u?A%13 zF)%XqsxoJyjxBa{J3NG6i`+LOg9a%Oz{wS{WCJSTT>|!>$kTzZ6$qWfepBywX91o% zFgW;E7%l9JU@wY_IRp}lb%D}_mp}3(! zcg#if5d7EsVv@O*Z7fnIJbp4`#o(xlWGeI#emrk3ZY5UMp=#$7N z^J6qBr7m~V(1<|k_5rFV4dcW!T6f@QiB2D?ZOK|J2h4EWvSlwPQ4LOo==0$QR$eLW1?Sq0F}5Wi&eju^Q@hW2V4wv-bcHsF!?~fqZo1+8JC0V0r6votb)gRHtZf_GzRdSkA>|fq_ z$t&U5oO6X^g(!uLmzar^FzjY5vvUg`F+))hN@#FTwmAIg#$#eEY?f<}vo4I{;*s1(mSSd*{o&3^q1?3ZJF?C^E< zb;aSZKNwCSU8M*}f(>H3`MP(F%Qp;d-@5U^*vY;nJ_j9MmS=%*uL(59-;Ov?i5MDk zS9=85qT1X1ttVdGU#$;k;d-W$WnVmZM@n*=kb`)v_|n~)1X3r}L$vif^H4wRLRSEN zUMm=?19P;`p>k=ZFXf)Uzdz)Gne}xk0KPzz(Gba>rkJnWfCf&&AjL)mOjcM;JTkor zsVWO;Fp0I^$HWiKA4D#lf<^;2hPkybK53BR&*Iz<@0Pm}+-z zE@wT;d${~6K#8v58=lEeL<0}P!feBeC^ZrtVyANxQrsk{Ii|8y*(lnb!YLE}^o6%e(!#xCQR$@^0XJgI9S?;FR$P&Qo zh&ek}2pwfzwZ)U9Y9zQ2PdN^Hoqb{kdW_`*h0(H8iQ}$gqSump{L9>W{JL-YkL-V; zRqdRyDtYpTHc4O+F*{ySx_NYhvZ`2eJLkAI?X2SL1J(#T9^l4T1w9d-vd#OimGgi#?ydybd3{PFNK8OfyJ7dq}!^UYt^n@81*=YD#%S<7qM7$&_?#b^hZO z|MZ&3yI@TWcLB(P7|CVoM>=ByB=&JV50ny4Zb1Y@K~m3hD+)-^=7E5R1l zhY#4$Llb*&DN)qF!Um`Hi;4Pkf{-=iHs7+g&Q=|r8Fc!=oz{bb)QF+<8#kUx`bs4A z8ZfNu=$c`DDEX{?X)>4>qi(Ms=ahXJWO;oGMZeTqTBdqm^=o-D+6UO$9&BGhi7^}O zQI1`%jOvXnQ=f+TIe`T4TdQGCk;&A{B5N#}ww9kzXo>@HV zXDTjFhLUY-K;5$18~P6*Z!?5McWtsV@_PUQ*t_IyzU)ziI^ZT5zb_O@h?;k{!5aaK zzJHl5>Jdb$Vi=?42Z;k{J1X@AY9U+TWVJ=ko98fD-3pwO*x41Q<(e>BJ6lm~;;Byg zdp${ww|upr0%e{#xk!?UemhLY#m>^E_GC+lEi|rsru#`pMY^_@JZ`)JX>^=SUy{!# z%od=LF9_x{WVql<;(bVU1%;Ar$8GPz4u!K!^?L;*VyC-xcbr4X1d_AWF+ut)@wlEq z^Z_j;8^&w%0NKxUmz$R2!DdfooJ7C#ZhXp2SU@e%o`UQ@u)Z;Xa_%^IPXi{ofHedmNJ5a7fNWK! zb1|#OwBeFUxf*`p!UGNSFuf&Dr3dh@lpqAQ^1b(?!qym!oSL9*Pioiv)|NTv0zJ0v z(Lat%+N}qj{fCuhDy*4ICUk9wo|T`ztq=0bM0pxN7 z+XCcV0=+#A81{la1}hLTdMD#c%gaw76NU7&4iFc}P>w1Yys&%jB6wQjHNc->93%Xq z4;);=nG5Tvb=_jqi=Wqzvxx)Ljb!i5PHFN69bZ0>vHZoa{}_su8+At_l@JfwirfuD zQc+??D<5i~DVxm&CHT?x(#Tq!|BmAy=Nrxr0qI+Ta*{9Xk~~|yw6x)igQ@i+(~I46 zaZgpcB zr1=WBqe9RHg@I)n9OAocqhavx2p!_$vUPT5)}M+AAk=+;n*muO762oa?oSw8UYm|AX!lQZmI=R4LJDN^0aAg=4rNn#Oye-Z&!g~q@hXfYlF1+ zN?+AMwnzHjBiW~^M;T7{I}FP6(0E|}1?PkQ>uqa|eF^DkNPY^UcyM${M$61Sl%hT^ zF+=h_`L!420ofL3eG$!;@o!$J6FVjZNo|X{ILqpGg33VEP-xoVdViua+QQRR{$57g zu`5$w;epPLz-Qd0Cip(Xr=d%Uvf;m+pJ#Ybf@(JZ)i$9;R7hGJe|daP28RtVGO9UNSX+?Jruo_nR$e;yNh=(X1`c#n-T z_I+%nuAXkjr17c-PLR0B;SlDS#R^^9(QzRHu(bxkKmvHYVD{hxzHj{e{3_5Ii9GiI zKSC)XFTYhi{h8GeeSN&l81XOzx!*`PtjH9<`edF9v_}dDYP*|@Nz?3rBT0ho5+Y$M zeKjd5E`BwWz)e&ePYCMP8yALbSV2$T43}3MLf|i+b8{ zreb8*?S82#m)!HQdwec$PaH{#r1wQx?eG4tT-;R-bfDa|&ep)BRA56C#;$v}*n%}1 z595jZhhU5XpBYYvkW1vWk&FQ#4YaFDxZ#pw=DR5xJ-uX3O_u-G0$7WpfP?hfks(=S zWar?kGN>bm_ceviLrIL!??G-RZJ~|^SvXY4t@CIH<&Jx|&b_loHSyLMzV*2tyS)`8 z>dsRT(iuOE;#SIiFQ4MJD81>S&7^nEr-_ttAQZBKuc(h9MK)b4(HS1#RSr1=@c3wkY*9UhCTf$(y1 zlhdg6mFw}%&}alLRbNzoomYU2I#8`52 zYkr<0w`1<-q$4DZKM~VW^i|FkCJcs zX23Y|B4+6QhT%m)7>e7)idA6X&p23w-GJ#Kj3ID22-^YPQjsHV zmoz_vMU=Mt9zHV6h}`Y>)P@^ZNX84>rE2XfuyV_gtso$*}Jo*7XCau73B z9gf*wXlli^)+f?)T`Dj1_>C5F@IC`SVP9WgL}P~Z@^VePg@G&XDxXp=K&XwdKmCG& zzJuLhYip|>&nD2Ih_DXw1EKT7B$aMDICkFvi2{kzK#CNBQRnA+=v1<{w;y!8BZmUQ z(V5|liXOd!2MQ7AMUh_z^dD}~V9g(9t1h^etaEOesqJ~nw)pAv^f|`}74r?dKl^r& z_O#5y`OYSWtzCq#ytp}uVNV3aRdm4)T2ZU0-`6w&E^LwNZ z!7+Qy#wm;+2Aqj@2quBRJ)p!e7?>t{C2Sz%$Z zV_kKBv|@Jj$y_wtl<3oc!L~0azi~jCnFf|_RTt+ce^ay27c#9d_89W~Sr`%k=sWOr zfFrEz#^BQ{>HxS52^cbo5crRKvQpQfGhNEf?Ud~Rth=VcAC7j|9sKOlz%25cKecGV zhcCcA%XzV~vC%2#-T|^fek#d#xpJ`>JJBxeOTTI|@_*%!0){+(cNOQ(oj~xDgA0~d z;P((LU%??I4T72i>`P!o&JbR3ZWjDfaGd~UUH-c1h5iJyxzg)0qUXt zzf1^B1%*j%5#2pJu&>4`{LAk8MDo8;9l#qD?GOb^?1?vu2zD5B;xg+^3rg*X7*);5Ys%a2w3*?DzlFQ*eiHVi6({ zu_Az(B`-nYjDv%792_3>e@HODb$2TRS%8@H0zXT{@08!&O)2REgfEmOOKv5DFWEuw zOQ5f>Pnvcs(W!N&7|I_OGcz*~S<8U?pNz2va<}lx)xbm^F}CG2A9*A#eFN}{d9Vzj zW@A&B?kx@s41`&$dL#L|543CU0GSpV8fxbtSfbVidA(|_hk(%8ZrGnIBbGI&sPFg* z+SYy~pqiF^`z(Jw!S&Bm=KZRaRpD{Q3-{djhaG|2mAnBT2DaG992S^>VfSmm4{V2* z`643zzpSw1Ms|N82~_bM$QOHuC3u&gU&D9_he{j%PQ+n4z(&JOhldV*I3NEgP5i;HSVD|_F2t=ARkSl;q;}e=CoRmf|A%+P6k1{%WG}3xsA}$a1zXp0mSSjoz z5N=Bt4Q3vQK@dvWfai@o;0mya)q&-U06$P>L60p5D2+wHhpi^b1YEh`MJrjIpE`rF z!c#@XPcq#K8S+%fTLm_Vkk!Cfrkg?%4Tw%C$|I5{g5)!rz|8Gw81)KW6!tetL@$y+ z2?7;_w}i<6f29R*St@zZi*mbzHm!Q^Hf%s%(#wp22+421ssuv_U@#IjE2})s64>Tp zfc3l{7!IGF?)3Cnkl^D}v1M!P0ao?Bv5``JeFb!Sz#jw$1qG3XT~8qS`$jU>ux&+bbm91kA3q004fv$; zOE|w@Vqs@DSZEIifg&QWxOMAR@kwY{7?GeGH<+UG!#ovGpb#9ZiM9>HEM%A6<+BV?--yai)i z95S-pO_fJVrT@~%z!)OcLW8Hd>!6a9fIZ^t?>`5->k6+=bTkQw1{VK1Y2v+xw#WYD zr?|HtsC>0>1P9FqZC_ganxjP$un7W*9U7=`dVz4a6aTXmeIV8WmxtJ~0?!TTWP^G3 z11ksmcSQ*9v^wOy%-F&`y_;?HL3kv%F9dV~kGoE5vMn21d;0~jz+H;Atm%a< zZzuUD2m5ycw#ko(9xB{VI6;dFMhc8AB+4I!D7eCWB|k(h3Hvr|(HTpUnnAvT98gan zXG6UB?{HhaM#4B)uYOD>d)qTTMGdhN?g1a%I{*bA#S@`|? z_uW6K)!*Zp)}CG|yA_%tKl3}Y!ZA`0&A(u8>E6=N(C`*=O_%*Oh}L(ZDJTgL#<4dh zt(o|D@bSMx?T;~V0B16i9f9%Zc+UmseB_5ju5I-_mpRcv=@%q?R2KYjFBV8`ElEf> zK%;aEA3x;}aSPY}#0%^XycT5m54bnWWep-Cp@9L#>P3K8?twQ8;tdW?#t`gmEXu%A z)$IU~2^$291}#ARsCZH@HYus*OcgxKTTknGAk?swsA1N@{qMzy{)qyCY4X zfo!F&zWy{2} zkQTvFlMJvcz2!!4uvQhbRX$3F z)0?2>#^2N%9~oJusDO<{e0p{UaV!}oXTuSMd*@DPqFBIreD%pqv+aos?sJ*f*qpkw zmx&VR;zque+2fT6OqynCI+R!X<|HZQ4Dni77R*d>KUXYWBNe|} zEA&Cp3xytgU~~1Gi{{U^vx%OAr*gl;_@*mmE;i1^VhqFFin%$&g zN;1*YT$JQ9+XbjqMT|E>QXh6{$`Bo&@&}u%c*|Bk;{Q^d9&YBgW#u^69lO|QL1VqO zoe^&4J7$~3kd_x28HpH$L*qzOOA9U$(Nj<$ZEXSh2?YYJW1L!!c-G+D%8H*-)^}*W z^Yrwbp9iV(lh=UEmeGNkHd3^~K0%!1!LJPLC)9Ja9l;G9(0UwiVL3yLo1ixb=5z#_ zm%?`z7ZwVDA_Jk=iv`JFkRXJEUTjKn67FrfwH<1`bW}KBo5#k!z~;Q3wrO+x*6n7l zYIB>#>Jz_;w<+QHS`Wv1Z3npB#;$y+tWwuY*BcDxS=Txh8oD^3WLL9HwdAU{{;|%C zH|@MR50=v(Z`l%&xJs;PrkpjcLw97e&M022jPZ@!Jgfq#+Noj_TrlOs@75&N&?eW5 z^q2(e#B;n{BrfchALNa5)Ca*zDA-gKs(rkPR6-k@xzNsi)fhm!fJYC4C(?D3LNcegt&ZajcO!D z+P$-ncOQ=3yz{;f)QTxn9Aj7>zOo`r38i6L8^d9_7JFgPDxTE)1J+iUeqUdobZ$*> zV4wkL%a-?!(U&*}1)E^e!k-5Z7nD6InkI9gEJh|f!HV4;Qn&1d90vmPqvy|`2Wi1P z32KC-0t*S04`E@A;GKLO6BEdjPY^L6gASaRYWxX4LaV6_EAl|&8X`t4+if(6Yc~*m zNIs|vWK}sst7SSoQ;GLjUAsBNL`=@pznaLryH5)~~B?Yew>bkDHqrrpYyjkoQ2kJyL9d_`^rA! zQCvSCS7@YWAKOq@8cXB!>1PKl#XQ3Xw8^u!r&6;ygC{>qUn(qCbhO;eH`y>2U7MD& z{y61i;W$lw`KIUFOH4A=DX#4u-Fh%o*XCsxEBdw_3b*Ua>n`h*ydh4R6Nl+x5-K}K zCmOuB-q-8YHT3yY$5w_p9sRR%yK=XzboT=(_fs}*lP0LYW|{1j%`2pVe35Fe*M|nsDc5N_QNs? zJ>9tAnbCOcQX2|b1Ss6)oI1a~9O2=J&6neoKbt*Ftz8z}zVbFzhKr1ev2SN`2MY(@ zZcmSX){pG>6Y6s_vV!^8U!6Qf~*wFV6g3T`4o&QLNz)lI+tW*wx< zW&#?JqNExc82kop>NezyxP*kQ5M-`jyT&Z_5YidQl0U;TnA>1Hd5p;AO!H#8+j|F> zz42dHPv%sss}N-r89j~V$Vy$kY?bcKE4GekvmsNXrl7clg@t7sNttWwvUK&vRn&*B zSPi|^6h{@KqF(E>Gul!-hr{v@UJS9Xj-k;wV$I!?!L^@b#?W>R&gdweolMnFv2BUt zc`T!bzR&oI-CWIKaChsWnd)it4&DmJmg(qCiyclv|9~guRuP|JARc%AK}5sKTYl1= zma90p9(OEuhKe`1J{6Y}24BwmS>~uEo>Ba@KffhtKJSiDYgy0Dw7aqrNlNQmCgXm^ zhboMun3PNmMH8pr#l%keb@7b%&Fjay<8BL}eeHD#iw%|j`SE$T&pI{7zGRe@%R)=u z&OxEJm6gS#X=56BMU}eVXIZ0LJ6y}G{mygqJY4SaV%{!rSkSdT98Wvg&3*RM(6BFY z2_=Tul#c$v-Q!&rMzR~a=S7W9CwaBKFRO#*neJShJ+Th?%GBDDrg=9=t(203@r#s1 zLtwpa)aoycqw|!Oawlf5t3lDduYO!B48`TCI>%D-IK6uH-J7_i=1bVSV-jAQW1YeA zM*W#RI>e4^|J3c18{5u4x+WlAqhi90iCyCg8$ph}B9Eq4@|#-6o$@~0V3`dn9Uc#sM!w46Vj&vET|5YAYjy*(rpIQzLFpvwD-`2 zNVS=XFGGNbx4n@~ZnoQ?q!iJ4uHzQet*Vlqx5W#Z`oqP>>m0mkTH1>9_G&7$QEcvd z@KeZ+=k1|EX7S8bKB(J-nb5RK=JgRbOOiSgAAA8%KZ4b;Jj^zP>eK$L$4e)MH6mKc%zxJ zQj*x}o7b^5eb(b|~M>F(0{aJnOyyq22!lso4HbD_fW zHj%nuc2wpI z_pS!Ik}6x)s@E*}2cArmHv!>z9M+WQTDU6Gog*i{gNM{O2>;bj%INg#ss(b>g?9vJ%Td_|E6bj-IWJwQZ zcS-9q5+aPC+1#k-cANcg+n3L_!RZu=iu;-sz6z+a$F(XBRN>WGe4IDC7aCUj zh8>%t$w}UsyoLuk3yYMIobS=@ivFQ}Ir(cin0`Q#zJI=t>e=NM*&%`Ty1s6?$CK9f zyz3XoF;%y>`4SvEceKjhY9(oZTd43V|EQLe%UUK<-!+uD3QwYiu*rR5KewJOss_NuoIV-%ER5{#z3JMF6ks!#h zxYJWNfV#5;pMDM(AHVi`t@tNvORPXO3M60|zxhS1S!!#A;wV=y>MEZ}G@l+2%%e}q zLzeN+k8HDP;vAlN(>p}U^ZCU}wr(9`=w_%-f@>6S$@q|?)#d3EWM|@hx{ZGu+jD~* z<17_3RLrZ?4G{P7sDRg+m_t`hDqw!~meiY@zVU-}b;$EMDEyp1$nZC!?` z8ObM^W{cBp|5Fvfxd>fWIK;$~GBPq1JGof-4Cnuj#%2i19$^9y`~b{T zUFht4FWfBpS_-T?(~aVeayMTa@x?bzUEj;1(- zKIes{+Wb-m@~=NvC55GdvW4f$MTv<5$KvY6$;US9D-~43Op`yijva8q>00}BnF*h( zsp!nlC{}-Al@dIzHQu_A4sPptI6YpG8eUpWTfRSr`g8i{*(z&apu5vMV^4D%jg?h( z$&}KB7xDG?#pBS@trVKo<;M8w>L?=%0c1=ux#KBf-9eMVsRH$Z|=Nh`T%IXG~_pdAAc z6K*el#ugsHHdzHI)cyf2?X?v7r+x{%2XYlPspbD0`|Zkh;Usy6ZK< zN`mnvw=E*37>}B9>b3Es@0Y?;GH(h#eO_;->%M-@mgRhrX=Vv?-qf~wyZco>r=K)C zkVXb?822P@S{;~4ns(wxcydRT4WDYRKcD#2H>0gtPjXjyf-aK;*`K$Fh}r-Jg{m;q z>c4q^PPnWTI>oOtXg5Vh%s`NB1dW>*aC(Oek=lfu=AYYawVW65k-8GF<0$3?qk~ki zZE_9w>KC49Oq|C(S_pZDa)bOwkW5Z=R7IleJ8056m~mT!ny@AJ=yHP z+Fh|7Xrsg+{8lMM>B%w^BCFIBbM+JYkL0_QMU*QRc3d*y}j>KQAL@P zBZfKfq58tob-`UC9mpJ#vc0^DWfPI(2?h|1Gp!*jQ0fPTHe6cdC zSz(WpbuN^e(`_ku#NjSy%@JCbkJ9=TG|ZVD>E;*>O){E>hUC?P{Ku(hi?I*#8yfE2 zQjk=dSlQn0(&(zDg7gITxh<5mK-_m~2B$wJsXfAjb1%g0_myt4a8LH)XBTKDGvtr- zemZ+N9kGzb@RAbbaQQ_t=DYTmm1(@|Yj`X3IAjY1mD%TOq*})whACO00?}p7mG)?f zvgG=Eod-?^V@tFT=`ZjTE&>$%MrwU5gxtLQ8baas5M)9Ta zXA9Ld!=1;k2S#Yg&VEZFA+vUg8ghOknazpTgp!W(&4iDSo-#zZcX%|9CgH532`Rpc ziz)uZ%8-N07&B}2n;bwE-WwhxPE~gOZBi0gKlTa=20ft64+G#RVbHYKw1gUU?d{ut z09|=jigo?^buetGANiIBJ_-$kgC%`rBS6cRlv?^>Cpt_5?sj0$&;zj>}06>CSkq+7fpZ1Gv&a$;^;qKU;yIYwuZ%yP@73ry_ruNw_ z7JXAvs{9yU1;Ku~rh66XO#+HP8^$ILYkN*kEcg=3e@L6}oU%ZB-KT=G1{(B&8C+egV8){eFk3J&Tmxl<@lpR^a@`50-m;%Y+P} zE-tX{PBV|$F-a+LBs{(*@mAT!wB_nKTNOsm$<6x7vg?5E$?Jg^*jhspxrej*fm;@A z!z96z;G>?K`onK4Ao)*K9To({p-o!y?n8>y2_95!k1&OCE^PN-v~1)zlEr$^U~l&t z8h)@l`Uk#l-$|4*O1aG(Htb^Q`C!fEtpSW?Gr_$T4I_bxcO}|xPgXrd3heEue;&j` zbBDiC>o~hRn8h1R_B1v9iLi+r7dpCsg%eAdR~GL#JL{HaaXoSCou8HB0JVJu9+2I1 z!1phPdXFcg4UZt)e8k?=)yi8R-HunE8xu(x9f#MK4kJoL<9YpsGnyvz=p2ZZJz}?S zM7$^-Vg4>N-~MU3YM|QH$eF!h!;f?f>I|+Pt<9AewrZ^s5_-w_w-sCGvrX^dSOL9nY zBh3xgv$41fb@6o>2#Dj0ik0TcNAO?ICEinVKfQ_);&EsgH8TY_Li z-S90ZpM;BV3&T$)MmaYHn&MEYriZm}i4RL7Kq;>6ZoGCW-}$A`oXFwuT$E7anXc)RdHz z5-#^QnA;R$@YvBt#R&)`clnf?pf?7^t3po3b}wn}XtT!?h1pcp)8%q0i>=r}M5LCB z9|EO=_#!5jVTxI{?pTGCpb))FQ#|&>_2{4T!vjT^K({w7sh;-gwhK#(9|)b}8r(>O zH^{hMonDMBQqny1PG%rrA-n+EKj{X*kYY>ABW=VMII&EwJM?1Q=Rcq3BCWz+}_nQ0EENt9f zK$z8ET9Y?QYLmTMO2xtHFT1bV)X?tVB!xp8ejE0j>Pm^E85`k^a%V2_b*oKpEi9+8 zA7fA7jx5wy-jB)EA4VI&_Z|VAih;3lB;V#SN@XJU`0y}+>&fwsV=A^|7Ub&yw>JVW zFC9z-{64g6XEbI?^V!k*WnJgBjilJHULUKNS*H|AYU=uka4EXffJC3dcr65tZXt8+ zoLIQV_B4<`BuiHR!GnwI{<&KGEv1`MY8u6B0<8lXgGLr+K914BgJhOz*04F$Rz3rk z8}(G#ddI|^Ur9r;X^QHC5}p^35d2?Cvwv&A89TPPAhLSgFf~Z(s(fG?qMTvz+jKOy zIamAJ<0LxHn=*%`s~R~Krr$UzME6EVMieXl;d}TnNBX6^zAumsuh20%Y9D9yzNEJx zpIcuK3Jo8#IgY=TIJ z+2V4OTS$L>)Qvmb*wDiAC%rt#C)e@_TpKXZsB#0iosxm>V^aqXBsIMoZ)S^su@Sz8 z>e^=IiAwlRh0DoSQ=p}3rTIOqv#tXs0e`3j;SN9DzI|WQ?>>N&wy)p_><>NbU7SgQhL3a#M>}o#wjrX&F-+KX_WNEPM3LCAO6l`-BHQgy=xb_29u| z@dE~`k5FK}=r*wq5XiKQfSNW|g8%7SR2Tr>sp!;~;5 zfmjL*+mR8Dp!W!<=)kd;PEcfB3nCgMJ~@=o9=74&N|Vdl4?I8hA&xKWdVP27#%zV+?d|R8IZ6RaSsX+<0A&H#YF_C61UMOme{(;7dSK_5cz{ z5Y3o+OQvM9)^+#v&~S1hSP3ZhK1M_+ZSaG93CxP6OR?zA)O`6KD=FOu@`rujg~@$B zrSJo&M3`Oo%>mJ8if6~hr>AYDq5s7Xld%tvUR2RY*)Qei=Ou3 z2~cEGad3o#+AZ?)XF_0EVNxL_HTC1^0^>aCV8xE)OT@E?8!e5sWa`0a}5>6X^b?;Ruc*Fh@H%MjM zp2=w`JJAR@D#-f|kB`4pei{c?X)+_X(>v8Jm`kx`OM}OIW2GzYHMLxQRbpvswGnXH z`+qMBLR6&rh?`pld2ImQ?7?P%ZbG1lqwj=8L>R$Z1jYz#iiGoqmAT6@{-i~qRnak_ zySQ;XF}Ldcc)04^j;$5*w{FluZ#Nq`*-|qclVADBqHXpaA2F70=ZjGxopRioP7U7WMqK&d~jBl z&lXR0%?B?pS`b)0r^Rz}<@#R#*j8a@X=xQC%~T8wn0V;?#K!|qw0|Yu6CfhWFfdLF zi;wpapMM`cE<{qaxM*a%XUYz4_e@Nl+1ZNW-Hz~A82CLiGZUC+i6UvZ;3)*uIySlI z6<0K4_$WnHOKWSKNXA$S8k&IjWK-NjGrKh{_C(1VpWtevpZtkAaT!;?JTJ0ZYrz91hO;%uJRc4LW}bRA+oYmBeBIDdl@!&?&1V>?YDcy1FVR|k&oF}0()~cP zJzgvU#hPI!(I)HjgENU3KF0y4&hIIgVEj7uJ2I!jiNoUJ`-`4@L1!mrddzqvO9y>l zUfC8KuG-vw_ysg-goK53)!3dd9{OuN>GzlC`%JYiw-$`x3tk zk=%1}IqjD^IrOWoaN+o}zwU~S{o@j>`3|`BS z1p|o5@Zr%^ZEv@PYe8D@vAkRg?(JFR37Veerl_gfJs+&id-v~uUR%2%c=oeQ{ac=| zFD@S5PZp~g-12Yi_l2}u5=8hwnf?ivM!2BJ_N#WSgq6L?6Fce4y1ME|TWKLMrwPQ# z9>ZO>F^*)VYioB1j|TA8)<~~oPIj*JqkZ15r(Nut3{SGD;f$8k*iO@GpNwn1w8p=5d*KZMS z_wJ{Wd4CC;aAYaf413P#3F3jU_Zpm@HNV;Ef|d@%vcvyA&u|gIcA}02G%4lf++hOw zW#*&8;^N{6d~cspZx2@b);8Zt1`~uQaw8t1W=&vQ?=BCU3>Ml}%-_7(?e@Dq#4vbo z^Ws_*Q=g$G4Zu}S(|4=05IPJxJ-?VX&oylm;ct(cb12uMx zaLFzL?d;~3N+J}>_d+cv{o>ZY6bUOG&c1)3NP2a|u{>%jnb!}}a z8qX*wR_SP4jhBC<1=X{^mN$9)B)pMaZqaCJ_V3@%|rW(LdQ%^>%M*!t+|t))(`}EcvRV6&KR!kWcM3R z%R}k?^$h`vNroSX62t2NDUq)R_6}k#Y^(}_E%)vPqyDIJ_Ce>8G9ORH7%$7ae{b$G zic57IEDe-mQ+w@_Ulmqccqqz@@rICXwDU(sL|lF(uP0ry_>JhHnw@Y)@yxHmmk`6O zY;00szmJr-V!9sWvc2zTDfoDwrlr3?f0I?9$vYkM@A=kSYN3vdo4$E-5hl6=qWVYv zoOtyQ@ck?$02>k2Ksxn*6V=3DBpN-?I;CPb|Lg-IhpDOSe0&nYQm2rQyOrF{UX)U$SvN_|PrOYkXvtGa~ z!?uc9}_bE2n!8;o0VlYNb^@eS@~W(iGrE= z$Iq+5zhz%*%K4H4>IzEZ{~+0L&_m_}66Ar!bw8J{#aKo4&)uY!v}@enFEA{^G2Fzr z*_kGC5(fg+8}Ic}r7`cZvQ|-daA2Uh8l%Yf(`!}7hPvRRoS4nSogtZQ`V(0-wM1(U z2SZ7bwa8|70B(DRMMnoAros7f^L4fR(;nB9zPGcD&4pII4@xby+aOF)v)kdKdxw%u zl4lousa(_dT#X67_wwn&;8IZ(%G<;;?X7 z0{Rt&h&wT)#RDm#OLQZBLzFM!KBlUwzIpK zp06yGoDL~=kA){DOSpN@`!P=H(c*Q#lkbd{uW%h7cz&{73hA$LcZ468*+XJJzi}81 z21*;ZK!(~lG!*d%zyg8k0AWy)KoGyA;QQ~W-(f+lB=V)D&)`nTTQr#Qr_`94T+|v07|*l>}A-NBbsJOKpO>S zWise*J})d^>04mK!(S=LFF(5cE-#c0E+;(c3th6`S95uogD9=wby zfV1=lDeD&@<%_!EBJCC(TBAo7EtzgPUg_5zr>E};U$aIlDkwyu4b$x25af^o^Gu$;jMRHq%8F zvW?$gf;Eis5lBpEzY?#B^E+4~gPCAQg<~_y`ZfH%}Jjc0>e{#DBp4eo-^7duKY^ zgb&HqFee*~*|d%3yrKYJa@+cD0LVaV{{pk~b~fwqNa-1tOdNWWVStQ^`nc6$>#NYP zd6R+k>mdUBrWIevT$Zm4<*L`Uh3SGjTYvS*hcB*=z0z#^5h{RC5rPLQMl|TCfng%fOJ;P{E+?Op1;D`rSJbiz%%VF$oD&Y!VVg z;}*rDe{~Z)dPygzey*1-ZA~+!SuL*V&duk2RaE;il6NI4%H$>c#z?wFL78KNuxbrj zMM()}yvH&07IfV+zfgim$q9h0>ip{X9R>!od6p6&0CQJ`Ev2OJ_xEKM#tc!?v$Lh* z6f!JVrqa?pyz;}q~~T~K1!f9vDY(G-#_VsY~tc|m2FFxdnWI8; zu)7ZorkP|8d0RFeC)E-0CR_CcLn35jcDYwzI8I@19`})cZG>Sr(QaK>d4)^<&oan8 zxgSIFj+lOcz*{ORe1(imP&($29LdjBMwF)9=1D$DO7Zi%L5kp|Jzr>xT5L}A#Ki?2 zl7WhSVwL{di=?XGr|jnDYwbaf_Ir@7dKTEPqPy(fQJTFVr!aljV5Of#2Pc~mJ}&M% z=$wP*CB}I7{iuo#mdd>XUPc3aB>{ja=zqUX!_0}z?$mq_MQ72!A{h8tgGl}7;vxV7 zAkrNZZ0hO3;;Y=gr2b6(n;p{2a<#m4EB*~Q&*j7T6n=hw)3f8jkD;NmD$9e>f=p1P zt;O8X&?MF~ao!PP2nYxOxv#Kxa!c=H1>G5EuGY4;dmMZ8uvsB2r4R9}5Ya8$z`iVL zX_He(6_XI#-nMxnEBiP_KB>1{c}#EN*x|v^5pQ3An90bG@x#+hp<3hmw>^X$zc|E$ zVt|vst*UAQA0?Vn$jR{##dfJ0x|}+q1Y8B4ovJIZYULT5Wa#|H-poWpwdeeIX*ZHECrO~6AsZ;>FKod!zpGbh>IDpRp1WG0yt51&QVuecXvIZ%@le{U&Bb_ z@hb<0ys)A$hHja`O}Sj9@!46qYO9N5wd@>=_RIaL&C!_`$6E7Xu~ZK?v(j(e^zaIh zz4X@+>j^T@2+SdlCh(r1BNEX>g|Xh6=c%^EHQDL~VvM-NRJLR4U4FJ+im)av{ezjx zCGvZqZ8AlZDDE0hWa^aA{$RP$hh(kRA^(K~=x^prk^A`g6afqU2X#DY{&9TV@+Yu= zvecH#EY6V-+k>6Wf(TN9GR@BZsldze5b7r{zjv8r*Z9vzoUUR;U@lk=6=R7fc63ot zvUx*^R5A6f%H*_=!jo$qLkTK8N-V|Do4d5%_3hI}C@ z*4Xq+ZErGS)5zLs5RgD>tiOiV*)HD!M_QQo|HqGfEkshDG}w5Nmf#66+69ESg@YvhgR_u|n= zDEIYCj=kmoT^GjBK6eF@nANY66z3EO!ZRQO#*8V@R|*!lQrzxn0G{{6iB0#`<tm66ht#`>ZO|xqW z9O^+PTLmEbZ5OBd69mdeMnz5&SA1fPmW}31*8lGRbBrz~RYn!@hi)(ED3P{(fK*M6`%zj0eh>9BNDdU}a^c4Iub z)6IHGrUFTJP zBIja{%4!Uo*R)GMGtk`Z)&CjbRPqwz5eO+}E4gg9eoq_RZ~7P>>Kk5y`D|mg{zqGX z;8N=Mmi&|9BMtuIkcLx!AY*^*Z504spR8e#=3uUK5ij+`uja<Lqg5z0{i)VhvPyj6g9gCSBc>hOwbwIIbBD@;?TwoF3~Wm8xi6E&oUWPIh{NE%mw+9`BC_k0~{#R)VB zs0Mj%RfU4MC_r{xrg=wRKhNrA*CvQ1lN_vm{`zhU{A>NkJx^xm6{X^?j?G=tZ*#+} zAt$xW_^t$8iL#_rcHd!a^F+1=xBG0J7D%^bI;OA)8&_;SHx0Jbnf}qqC~O{jD)LI4 z7+gx4cNodWT4eJCpK3YWE|noRzZ1({qK}HawGk3F!rya>^CLjfw{~kU>gbw?HKum; z65v^J8u0CR=K>tlktTbOi4Oy%+znN&%oLCwNNM1ta1s2h{N`rq?R0fcZ! zbL8IY@D5bN?Ug_5>-yBXI#K0RGPCIz>F2=n_kqhiPap0aBEwwX|GASsz-d5D^}8cE zt-|K?E`^k*0cdjRV;%deqCow<^1qn+3aF~KHCz-GMFbTUX^{pA=|(}NyIVoJyFo-i zq@<)lN*Y8!x|D9|-jsBA^Zw<%_q}((9XZHxL&_Y zobmm8aY~tJ_qmdU>eiooEYO4jB*k^2eXcKqCNQTu+KQz97;@bZB6#tvgTBFZkmuWcUIAf8e&r=;5{}#?f*90T!)~DY?!k%$viNbFMHt zuR1ckoJ%^Hr@Jp);{yEl3lD?vqRX6w%eR<3Dg&E_ zeE!gtbedNqvLPQbx&Qq66Da^mv8%TGo>HRoJieJ(#{uj3;NV?C4GFvQ<7|wZaslI- z1M`f}$GuEE56B@2ECs;aM;PEp$8)_|mtVhu>GNxtUfuZ3a~RjB$gN#dlJ z;tLv#1K_&SN`;c#$f)khqSI&9Iz6U+0FU<`@=-WM{~s~)8Ad->qH%1FenpNcvV ziU0Uoqb|IP{Jy}mwcdIyTD~>-b6;Qkro@Obq5_i13Fl;>PSiO`vLc=H?jI50v*G}s z`EPoZHj62O12E>ts<*%D*vQUIX}e{%OtIm!R{OqLJqxZ$%y+qS4JO@6TEI+V zB*qp7&<3FL7W}Y zE`<8F7dS-lgPoz(2W(3IEITEuWNDTC!xH6t6}cUl-Ml}o*=wtJSd>i&wU6K22jq*z zdPOkh5JL;St5%77tI3C|_}sYI0-hgz^0d8pmzfQBUx&}y;V(56{C2dY0@M)T2bC-| z_4x@_gbZ}~(CT%%A04jX-w&oWzjS$Xl`}x~@YuE~u|J5{AsMw3Zp3SKXg+`>QD{IE zDrHBE;K-Jhfj|o~-I9rXF+iAv!OF^sdq0QIWgbU|+1(|!l++(jcM9Ijkm%~Crzz3j z3aVP`(E3y+EcUNqJ`Ms5Emh7tqLPwnkgm-$E+i_9GR`!8E!lN?iR{Q>rOj{j%{$u# zb5pFF896<*{+1kBmgT2}txPgDmgG$`C=K$ln15-t_83~HK0h_~h~bKznRyJDl2AxE z-Wcmk9S?9RcR8(nE6_QPUEbNTOg`Qg*U1skYwG(hwu9NMiTaiLCVal?Xn_R5-32Et zSixwXQEF6XNThjlF2mNcJh(TABj`c~Iv3ZiMV!v|h7gqjxk-bYKPplb>5Rq(eVRP5 z8=4ow5)?$AdlZ(p-*qJONqM^?Y-ni8$8Ro|wH_!)p5lS9WyzMyVJ?aQPJxN&JRf6- z02qaU)le&q?_Dq3F$@C9WeO^`QHti+`RhPWqV4(HTP5NNa0ohLPq!TcpuX*TzQF`e zSXqayr^m5!`4+tXZChKwwd|W5jsXI}>*ow0U?P)ZNn)4<-<>fGenXAs_rED4h5)BaH{O?hxXZy_Lmdi()Lw5HT;2C;iyRD%t!0_F(in>!5-2MNNdNls6cd* z|0TK_|6%GbKu-yu%jyx-gDKy?-=?Rhw~eBZq+#sE-ES5GE|C%BAO$vhL=%KFq{Dgn z5;;GU!%+(zlX<7rt9YL<_kf7<;nhkIBda%n3$4xjqXDUo{!qndLu;G=)>+(aALH%F zQQy`6Im{EOlj5}(zkFCvk320|?Y2*77@mOeNGuQ|(jctZZmjwON0Iiait6tC&iKWq z(W=%2ZAV`y0il*fO}3rOrriFM<6z)nb&&D~PU>kl0yNBbMi&m?uo$~Y5^|j@;vC*s z=kp5c2gL6oZv+y=Fe~-P>kK1`f6mlVi|C$=(+9@5K&Yxai3@({`Y!5tYk<1!LPw_W zt2HZ}+{0W87d`s=JwFZ}RgKxP1=F9oEWCFvW7}^0GvVDIO%MAep|Jh`p5K@lmpdM} z%P!5EH*ezkobO^_UqAG{i`h@3f|9$M76XP>)z}&QaV5Irj_pTl2~jsR%0GXPyxGo&GF-i)$2F0? zal1l*$M?*WaegREuT925cqRDxENU8np-2kttoJ^!nbhS(0l>7uK9QdabuQXuW|III zH$XI2cKma_@t1g`1nQ!`Kp}~8dlU7L&6tLIvSRPX zQEL-3_tq%pGGwiGF-t%b`S_hbex#_RqYz&1&Muzd7w$ph;n@#)nv#&D9_h~*&2B+O z^RIgT7h8rjSoays?>eR>3;X>rRuJnstV757uHXKERfS@?vL(~${KU4kxl<%)Q6{90 zUB@F>pBg?9-YXGzGDR0kGSkbfBY@y5DTLFFTYd-SND=}V0aFYOc(Ld_3QGYhS z#n7nqvsUpj1zyfK=B9>bD7mlglx~iHJrA^NZ7M`{T3XHPRkH?)YX${U6nC>y(;77E z3x2x;SPnmALEq3CGSZ)21eYuJ%a=|9d90)R%Zvydt4K3^)x&3jAPWkZL5H>(uvtt` zEe8W@4c6zowx*vQ!yf_+{jX1$uiiH=ux^BtZ9OP1a-}amF0^Tj!Bot@=0i{_>#Z|% zqseMg<)N!16}ec_<~UT`0I}1czU4r`S*89nJ=xQCw#k3#hP9@E0~#I^soSVS_HGS1 zjvpYl9&#^bL9C^2nZ1Czs%dCYR`cF`1oTg=%;z+|of8XsdIP>a z!I$3x9Gu;QT7b#glxPbtD{?MV^hvJbq2szV&^du=^4ElpvDa32#bu^DNyzw(e_RLv zgHL9r=m?Vr(=hnVjn*iVxc;lxBM@eF%=j2^XKHt)QEEJn>ykhe0TZ%$#&qccB`$`B zHJV!)wLA&FLbMt#{nU4*qeKztY)0CF3t(q4QOy6nPQIGJW}RT%4$_tZz$EJzFy;9b z0mU(}v#}U8%bvkF7up}0)WH_2FHJND&Y}|tO>PjKzaUnm_>dX$AE+iZ+M&y6PNw4B z;eQr0*LRA>f11Z_JrAdn>QDtcpO07*W*Qx_`*?$-Ob*|cS^nF4pcn4>d5y^^> z`~tvKFF9TSppEwh$c2!Qko@a;Fa(mEEYfe={Kz3PG&fbj7#Obvf`zEHTxjv`|2`M@ z2)V{M8PK1hW2O&KuDAO7(=gar4|~BswNA~Ty^LP)z`RrI{eM3(ZvEC74*l`x;NIHO z-Mw)9Me;wlfZT`UW29j)gU#Z!`5LxgnCEaf(*D2yT$(;O>aw`!0?f5Gs8!4DmhQ&H z#DFSHPqq@hFE+6_H1jQ8x@Kl{K~A^@vY#>lnga+(Rb3rso8GXDK%Xgf_M*rUwZ)#` ziPkL0d0?;Lid^}>T(sgKbEz4ECHd2)8CH(yFb%6llvy)-qIXfz4DORwDX=+%eo zcSc-+a6dh`cI$4~_|%CtV5^thiMH=P(Z>7s9fSg zLYD!%<>chl1Cn!KGa{c>2-G8$e9dWiCz|CpKG1!Hd=?E_tRN|R3Gfvl50|A=D}d>f zqB@iq71fOB5`Zie2zXxu*d6p?Kr+Z11P&NLFm2FD=wk+=_6jr}17O``4lL^mM*VxK z|NaqymmU^B3A)1IEkhQ?sMR}i&`j#N2}Wwja1^nsDS=Xp8#if;Husq z0U|a<9%sjX`@GgOSHSS+FZ?2(^&~-00T8%)4ecC|I-2*I1qrVB1C(bH2tA23aR7N; zy~Lyu2rpS0rR(G;=&Pe80T6-!IcpR;kO3Ycl&Ij{rIWVx;kFDXfFr>_!UohHG3F=5`C#s{~e9VXtx5&r&K;K#x@Pzm#Kuo9Ab)&i&0A7?x zLmIhRv}bv~D?{?-4h%jK5k&5O%*{JN0{aMcaSnJT*Gpv}JcPI^KnQ@J(gU7(Ldz<|+K}BGXj5PoWelBf zDB#mFGQy*yMPIzQ0={o#EG*lh)^LZ2cU)LxS?Z-cKb*ypP7*k4S$QDDN0_agCl30Lz+r#g zn4{UovODA0dK!NP??Ynn>0YDC)&FFJibnWSOfU<6d z&EGZzo$j)?e4juV6lP$W;A#{P90kPxC_MV-&z}b1Sq1nLP5LfGj}A;N08x$p45nEK zco4KMaEMU=g|juk*aQAnZ>^^%hvg(z9lRp7QVU|}y$&wLB_<*+-|YUzAex4#*?_*8 zy-9~OR4t(RiTJWhC76;m6m^2`G$?R@lh#=aQ|EsI<#zYa_NZ`*rwrmREi446r5k$l zs{Og;oGW#Lj4&V92XN) z2(Jf&>RSLzaP~CIte*F27aN5t>mGY-RPA6xbjR+)5)iVmSl3GeIiP4ti7&4W48D#0 zga{Wp$yY1#g$+l_71%hSRI%(8kOU)U5S#qn94vKx%Wmu7fLc&634YKDFJc;mL^%n_ z{pb5;Eb?M)VYRolkuSfhsQG=nJM*rXbk2YYgjl*UL}(mH1@e8b_4mIa?2GlM7>86q zQaadyKMiJ2CX}Jgf%x8~r3bjpE}rpVF;KPLMvt?00Y#9hN3g&DpHEBHTs%C8_OTQ% z1w~qq#{o<-fBcvUml1GI*})`BWPtq*;?VcqJ2ZE{|ERlt=t+%;(6q7H-Xr#PYHFkg z|E?=rfpnpBc1A`t;4|4@%xkx}*`l|e^cS95R`~7&%=nd0N`|yuf6?50pmGYD3+!tk zy)I})watH_Ww!dfb#5k8r?zH(0YV|6!3_>~SdXy|uy2Csp63j5BgDSGzKE6#2;7;p z-MjIhN0ca7ULF?xAf1U?<2=E?SMD);#sw_2JO^3X&j!=A7)KlD(B01)8H5EEo7CCa znWa`VkK2q7PFgOnKyif=c%Lvqd17rYgcxUiwA*Vzr)TM-ceIa_5L%h#^X zzQ1tRY6c_OW{5FRinju41w8h&4Xgi#nTwTU79D8H(nX5e@Gvq;=eS=y_I(){5wTnM zl$eN!nb~PuZ1N(A6-=n=>gsfZLgV948>~+qAMASKPJzahx;Y5>Pzz+-o*C)O)8NFy z!U8kF&vXCF6Rwjb@=(eTl2iU|y}5C5@N@U1-F$8#sv{3QD=SNe@0HlNK!H5}DX&)` z*4ljskmpM}j2HT77y@ebh@=dx%42NA_U~U<-A)-6e>-0s85ybU3d;Ae*6~aO!H&_S zWNmGYT@ZvWLz9v&8j3P52p7ObGP{K9jhrGF5c%uUU}|V+bYClU{qS-d*EfyVKjAY9w1*QuF&xiY9I7 zTyc1KxboP*z@ES5(}UgJ{?m9^>(EuoQYo0k4LU+`fg(ct-@Hwb1?w?E>3C{<3{z|! z@E6m=K!2_O=G%f%!)#epJof+hs8Slz(ig1BO7C-Ujf}n4HG{X_paPx*Cj`D*=%Y>Ai?BY# z(fIdo;Xdthn{?@kxfzqms&eJ`4sbGq(8!^G6B{%0S3tGLg8Nb>F!Z)@gVfX^!_7V7 z28|$y_T{GpWG|f4WoZyrCSt6oCaDCkvaETq?J}pK$ap-lTtuDFENmDrtkW|xY~Z)c zaTYAYUA^Fz*WHj52`0)q;^yIrJ(zUe@NZJcQgquNF^qMPmc~Xb&>BD8pZojQcuz`H z6p^ej8~#2WIvY&DF1}+6%Mhu5VCW=s&(zonfCaLEQQ3#R7)HvD&1jnQ11Li<0~&q$dOyt6yUFh4?JpG&l`rT2x}Vww17y+C)en{R_kw z_&wR&3GwlgkfVj-`~1tB1`3UrTxP z?jN$UXrWzJb-LYtjJ;4Eo)|AWCzv4`5YpH&`lW3z?zM?=-o|${(ZJxDN)KYu8r)m& zVoDhBX~h)H`-Qqg|Ls{3GBB&_ND?H4@*MI3YdgCJ(7W3?D0;-P5Bf3BUuaf}%7YKE z*j99$ntW#^2{G?(MZ!Xe%EU*_@ISF2HFoeAp~~o{YlwMrVmC1$)>Rd3!;;a zb0X+KRP=Xq#TU@r+h0FRfc|WFMEtvw65_$Z{jy3|g<5TTI>pQ@@4)_c`)?6|V4@cn zR~nj8uM(1EUY2HK5p4=kQ5gXG=sogYd?@BU&6NvX~(jSNbyO6%!lhc0PhVJR~QOMQ;IR)vG}eF=#% z5p7emRR$qx4C^)ZhyMPB6x7tOO-!UqO!gLsw>aHnVXAANRX3}c|kPcNZESXCX1Lnb zv5;whGFtHBlojG+0mvX|MEouhpbjAmTC4zJLnd@=_(J?VdAplYQ%ELq8 znoXrcOpCY-`BWt>(d75{hry?1irq?+e;5A#ugx3k!bWp-J+XsY0uAu+XJb zGhq$m{ZBBLW~8#!lkc@*<7ys*ELG>-)>HYI31$`JjA!hq8As`2edd{j{aSVs!t~$<9Q#bESp767s+`~lgUv}#d z3fXR2NkX$)?)Ug*G&3EHoA8P=&YCrS=vXUqYy{BEEQFC%{fg0|n$lchUlw zxcZRTu8)`d1sj`~v;k}Y=~yCu!r&nDR^Qk&bANMlDz9|%F>K+$p}aqp+=`Dl5fk&g zfLa*}uQUT!AgRDs1J{9a0YHY|OWb!$)hHWVT0X+IyafV*Cho>oD}xLm*)4BieVwdL z5Xv^hd)dlr6Mf_I9mH1E5~{;Ix9`|Q;A(Jwx8J8&)ujLPRb#{> z>qzHLMhJS3;jf`+HM}Yh{FrDkI8)DF2=Xs$NVad!+8D}E?A~E(X>WbAA%oJh(5S)} zMUu~+q^;Rzyx91z_I8BF1PW|uSC4{E_BK#SP&Y-11yUw&1X%8Dx2Ha3XJ44_ynfly za;g;dV5gIFgV}KZ0U(|Db`NVlqP(uFS2{Z*uraZVT>)_{yJ1n;Cus_~OjOIO$2f#4V5joD)I zcXj1owl%o$^7f`58Hs^t!u3gc2GAR>ER3@by8W6o4Z?sueX&@wu`lDw+OCGZy)B z#ulVNh9R3XpdPX{Qgn5t918dS=NY$CBqI-ZE1-b_#_3IvMhXZe0azV>Ez>} zF=$gD%E)eRe1McUfXp7+NFe%+Nae=>-vUSnvF8VEL42ZcWJ8O_pf!xLKSyOFS;iN( zvA3|-@i6h}*Fy^dK2QYAE9AsVPM#J%0eQO~*xA61M)uVmOtLm_P*l8p`}U)f5AEX3 z(;I)zylDebHqnf!O?}G)ocLTjBBR*&x9UP~(UUwCS_-JW>$#*-(xa&$_H*>jC6!qmN$2M85SwbY}Y zJOowcjE|3R3LQ$#nP9~yw!DJz!G1(CJvIm zZ68ZBz`uTOuGM{R0WVK8FQ|n7%NLG^4{5Azciy&4g*azzT(_~cRV`gVc|T0A+z4M) zR%Y~z(btIBbAI&>8UcriO4`m1xlzURP%Cy$&V>M`nknRO0G~n0=miuKKRuv2?}9(UH49xIs}s0XP|@vzUpLP1_+l7O-#f_ASIx)tC+8ekAoAU z8$|!Xi34(e1@M{#6^>{yoR^=Nm5?`=D8VXE+plepK<}-| zwRc(R_Q~nICM7FvHGg;fYn`=BT@6ey?~v8M&=HUkUnTCvoqZ}xo26KJDX8K=*x0DW z$J@2+uG{C`2B#-PL|J257?CENPN$}=t(Wc*O%#ipx>hq*v$1Af`Xxg&-=X6fX2**u z;Y#f`dN|Yft*FZhuKa;`RAMHNhD9O#d$D86tGKSi;&si%mAB)2IYinQMc>RVOhqQT zuIi+HFuOS zWhuK$)ix~By!X`UT-#)hpcCd>=1jLPP{B-K@bI|MLB83hgF*7i6YFgGAQvqcy_MrB z9MSR-HBCX+u*@ApQU3t4ztULZ^c9p zjgkU34g|&n_+qeML$w8N@sYZU`l)fKM;;gMH{kGmnb$ScLC4s!HS3DDIQo*a_D|t# z`;&9)yHN%4luv1;b4YB&xbDj{7ggt=>h9`<)6>!hfB5haPRjUry33FgYo+NC6KNU^ z7UQ0rPa>DG$~oK=i^Ild>^oVJ8m?D>pa=#-VRoSaXVNwbo(nLBbi`u$U=y za1h55^5D!@z%FnuS_4g$yTZs^u;h$$(DSMC!8`l*k=ktxn;$G6t38xVht^3KdKSWC ziaplWQgj+T=OB;q=;g{Yk6dA|hlKmIif_rmsnbn!9?d{e7(W0dZfjj_|WCPIR zLi9<&xPsIfZw~~=prx#X|5CYvPSNUjPtQj<{KCRI}+X>bTBeT+V5v6(!~?e z+mO@6M~{V4CdLPc)ZTh$d-dQh;m%aOc`j;GFkkaXq-br@b^7!)MP}8mI_@**ZEyZ4 zywKM-?_P^0gj}1?!Dh6+{wlDISXleS#6SIpGjwh2V0uW$5Ts3$ss!vF!!{_LKqsor z8K%~kLPWbaw;O(FsTGgW+SWD$*d;1nUP2nD?ahM&rnOCAEows7$`C0ULF@p!gc7J} zKKn2W)Le6^_G5lpFL=?tX+t@~N_N!r43Y23b!_Gf1WCDl!TqCF<^GfP-jpPZ_jkW~ z3{Bd(FW`2VnfaHy_;<1jkg>2`x#=o$?0&>Q5L*JJN%xYL#`QxBObtVs6uIv6Xf^&S z%>i#3CEq{|!pxG+LYsQRd99mj=NmGE#Yt&{C9&^u zu1uhkB!w@cWC;kuoaiyfK3(;nTY&+k%Z#XEQ`7Fnwg|!T0p-#@1Bb`arG@j!)^Ay) zTTSoZQ@9P+^FxS8Hxk%8ot@}>FPMAoB7ZsbBL3G}joL7dLi!uqW2u;c55HW~wfZ_a>%>bNq+*s60J!osY@(JZpoY}JoZ_;j zr6ri@$oYmwGWz`b+QJ&T(V6Wb%l)b9)Gg10vURv@%3a^YMAtviadnuaCh_UrNO2#X zAuq+9e801GE-x_qhr(p{@xRB6$ zqv7uizzjwEYK$AROVbt-=IKlG9FIvmO+H;5i;yD_at*d0au2ADk<+m!$>2Rk!1PPKebW5Z#u=k*@`&=jFTnIh2I;7$KO{Zxb_iCYU&xzM zJKHLlWsF)b`EH;0Au^S9-Ja06&1hA9_m*=x_b$Q3{UF!<=4V;I221XG8!$9>wE6b9 zkBJ3^T%D3`&_^>VUjN`uiG$XWYo0ZkjXuxuvXXeHt^S4~6X`0wvuwFGZbk6XhbXG< z=U~Zg-}N@({2ER=C}@o?S~*X$sAL+xlXJ%GNL9!lu=p9 ziI%(&%)h0I=H<0=>EYJC;;WEzOjOqyNamAHoZU>a+GZ}bc!@Tpn7q1fASGG`t$qUN z6Q~zW=qzh&YP&CVvBOxxYW0u=k}ViPLC=J$AvI!U&JK*>IW5MKlMl2)K0}`nEU*2* zxdZ^w5D=`o4cB2PlfexqOaQ6psa%L0%v6CpCJwW>8(3Hr6y83Lao}6Um zP%FTo&~Surb%Ku7ZvE5aZmz7}ue7qVNk_GGL#CIXwIz&&-0%CXi7oKV;T2D!l=@Oc zVWxxQHM_TqSwbP5-R7m(Lg3Wrl)#L!mNJwTQ!ERfqDBH{hJ2OYQ@KTnDA0kRv}jh zm2(^d4zm`-;EVY+8Vvs4B)booumgVJWue5UFOIwp`Io+GS1ZTHlwE6kA5#sLi@JLp z$J&)=bw493W>;0EEG)1E1cJU7KM}RjP#b|Q3REPhsHsoU1dK zNy?0?Xx$cRZF!>F*FHIUu)1?7dRJIuC?W{C3p2Ehc?&k7FU5mpZ(S@QRv^H|E~PY)pc6p zQ>*fX;xR?^Y1R8NPN^qPF0q-ri~0xs>5m~n_5AfZTknkDH#xq%hI-fzIj9=X@_nN5 zoL;C{jJLLB13MW=c(Kvb?`F^qz{;oEcj1y&%I4o!MUpTYLYTE6&OYwPcEKe4pVHGQ zVX~?Zmh3Qs3yY8M2&0q$orA$+p-R94fX9GbNT^`>+L&kkXj^bBJ7KCK0n%25HbFz( zvsMXr5tCzXp0cm94{zf;$IMdi%RyTW8Wb*jF5%E8vDuvB@HjT+N#LmtEcC~x4&Kmj zY_pW5!FtHIh?=VrPfw9gH_@#12>sh$3p{O3q_CYy?<@95uKcDbBG;ck5oj@SthawLYa(?E?!?yikg)*$e@$Q)W8u>1)@u@`S_0WEXzjl$(LYao!-$}y#)d@b7B~a z-0<~nBI0{kH);=4u!oz_sJClc8FReCY7ng38pZ^G4ZGEzgTk4+DaEj^cfo>lX`+%7 zOn+uUQUFw^tQNY6w|91+()tB!8)_SvL*gRH+ibgjpg+oM~ZT7^71=4 z7`DvDw;>cL+r2$ceAttMUl|*{@V0Qb-rpu;;`G~`^(%97(kblrv>dOZj%P>z9Hn8! zK&OWryXyiGv^sIl(A~R}n$^EC>Q}46Nb&Y$!z}TL*0bP{^!J1O)D)SC=HSMjhAZz* zE1x3m+7!38t}bLi?*`C8DBvzZzZrUZ4V4=;0YRO$rtYe0YEQv#rsjB={_7NfAPnJh zK%E25KAq{PkKl+~6L;b@$N~MUNoPX>WF%q6%E8e%R4T~pm=EStWPzj?g6g$B#Du1< zRPI~2ecx~vN>QG3rKpSQfQgotXzSUUWQVnR^;sNZ9_cu{I?w?e`DuP1I%_b+{XH=eN5D=1=&omXEZ<)F>bUWS=+=3_O+4xCu4u-WeBzzK z#I--%D;;S0NRwVw@meVVkvS)&rS*R?Sr!Og;Zq-6p4O%4Xqx%16H(JRas8G|xxUP$ zK2^i2FASBSYg3=6ai@RRShBJ5CI_aRHpD_Pw8cU28j*+lH)Tr11o0#o`NqRJ&+K{b z=I*znh}2Q0hQYRHt71gs0sFoTu+CB6+&{!b-Xi~UE;l#Zh7K*uXV2H>JQU{ne_VL! z+!QdwN~jbvEv)r)z*wZnP;y+kF>fHqwPp5ZrOy19Xi)8r>W#A#HE!j)XIT^gFJC8> zq}`qgUAHG-F&dPws9_E5WEG^e+i*atgagj=v*E(=xKNJ*#ZSFGw?VA;v6vGw5aTNc z`y9ZmG_8&l0U^EN_iyk2+Bm#1Q9taMu%2yt3Q9fD!Gd8vLWfo?IG-~$UL33Q+L)bF z4BgzcrJ!z{RxhMm8{O~i5351(rq+HFo85^nt0VD_?eh-B{&eIQl)zt6FY4$pHoXb011p z=0Swxolj$}BSvhj`exT+!Bh9T_x;eFa1*p76bU|UOqa+Ill=5Doej=bYz+WyCl@6xvQ-`<@C|RX?NxD zy27I3Qe#TAiKeNsaq;lId=k(ZC)vH;gTjI`M;bNd=a{fS|8%9oX2kceAv_=`NWcF5 z)hDXo-$BP_xj*YSfNmbbnNBzGTY((lAP6A*bi?x^6{=srO>8HhB|Uics>#mi;xX}P z?O{s|%ogT5D|@QS*K>7QS#O?J-fF7VAj5c&Vloh`n5SM;HvK>h#%s@?U%lC&vUAKi zZT|MHdX1X)(XxVIM$Mu$A?GWag zrIFDKh$ACZjZCPnq?DBIL1GXWZzbN6Bf|@|aAbJQr?*a-QED+k*Se(ZG(GSp9%~pG zSw)Yeau4V%LiO4h)bm*gue8!-`}$B9QS(UAHO2vxyLkA?ea*kRru|JGMp2nk+H+W>n?MfGURw_2UbF`~0+G3Be@8C^VX0 ziLaU`Y*W4bSK28zMS{`e)MCvUXseO3eQQkasOhdYQ23*O^kmog@hGms_H z-HJ=^7hKJ9%aeE&W|Cro%5aTAA^GC z1G=M@`B6pG*w1L>d!)^E}YJTNZ>MqnL$H>vwOG|c7uA?oMUWAh%Z{&|GgN6vY# ziQn%UFqx98rG+k<`i(xq1!?r#?td@$xvkvinSWP+_b2V(x1rIZLctB4@O!vYi<4`o zn+BF>_pV*b+!!o7-z-IsJ#P2V6Qn$CAj8hn=$gJo&s_B5Im`%Onf&M~Ynp#h?0QCg z8L!)*@kE$?>^|mI1soqe3q27nR3fxGLKVh5XEPkHEe9r7X%M7kx{ca>8#-Jvi`lzM zSnHM)q*GI1&rm}y*krkyfde z!U>?DYq_&>SZYU71Rc3QZ-%+@YTzOUtskRCYL_wg3yz-`xi3~J_3x~y<*wXAnk|12 zd92KT0EKPFM}Pw1H9UjhkFZUqsR4%xmHx+jfv zc($;XVN4GrY$`@ZY*$xTIQ;sdoSbXyHuwY0EZA#a>l*vW>$fcXllx!4(bPYK3d6c@ z+W&3#yCW&jGyQ zyn;GbqwLGh$;C`k_1MRdieN9VyYwQ4bORRes^k2=6=-z2xK*%zNR3mrUg-rWfF5&6 z@vn~22c`?36fLvP3`)n!x~=zf>wUgHlgFL;Zl5ojK6;CtwGdvXx532=EtGaVsy1Mb z_s_$a*f$n>et9{(7VIUs=k`2@dOC$H*pVbvht^|csawXg?Eo|`$A2Jhp-lA!TbfI8 zKg|nO3)A0C&l^5oVVO4#Tcu@w50Bq%Zc{SuHAL%5WM?~b@nDObLU|;2zMjx!&UInoF7%`LNC zIqMn+W|Pn0VD9^N-qsnXQuy;{b6*k1w{Jc}qYqN}WWi;3;=~$mjBfxYyS^aj59#1M zfIblr%v)2%Yf%a|yAJ2M#fk=YmO{F_QUW6$mql=xsv95gZV2^QJize`b*bUU=dp|g zh|J5Lk;Vx1L7SU7aKRM-?nZ-xFLd>h8R7Za$qqmpr`Oi8R@9V z{xV*(*nP4@DeXFFTtJJY;pMB_yDPePQ!V9ZeS%%2Bp3e9Tm2obf4pa(Eq{w};Zr(M z=sz@($KQnoT-c3vm--O$0Z1$&2GKA(oLyed-heKl&^<4T3?HZ9zjJfe)B<_(M($qMtv141b4*CPkqE|7SN1 z6tCx}pddrU{R`9&ziwEyRWIl9FdJ`hTYT{SOhhOQf%{-TSTTR>;fXlOF*c5#0xNXq zRMKilZpD7FVe4js1`YgyUxJ1{z~EjKQc$FSj!nGuF0P3D(?x$79)N+LtE&@$X9VKk z1!AN#)6?}ZmPG6e4Gav9v1KeHx)&-uQv&fHwzy)*@lYU8*6vIaeivq&IZ+tIP;hI# z;OM&FZqAJrS$Kot_||~q!)|^;LzN@`j~TuBUvXD)1kmdolY$#sM3=2@k|mO~wzeWB zVW7?q<{~0%#VOv3|HA@|mc($F6M)6SXWlE9SXp63w|vq~VT*}tN4|URgdL!2ugWVb zT_RXne5(7aKAD2q!)~>OUaa#thJKF-esE+gxbfy#R!; zVSLWYj;rH)8m%z!JpE{>3Ij`S*Zn7mP4R!RW*;Blz`-$uIRG43%=&HjOosA1A=3tk zL<+D5Y|3Cx^PBXcQppqiBJqAr*76PQhRx|y8^LH^%R z712$$v3U%h)rTjGeXLdDWa>q%VZVG0A8PMX-^lU&gV$ zuk346^qxioS}AsGqmjV<4Npuw*DTOwL;{zrEd+3-@STTXLB(Y=cm2VG2Z)hph~Rnj zr~B;-mj`6feT2Ni!)c|Zzx{ifpI%(uJXn*IrhL1?`+zLbnC@!T)LS8UjjTS3wlA>F z(5vLXg%@DMoA&QT_T&I43u)KD*2wL+P6W={h>$8#!hg=X`1%=4#fMo%RyeNQBL8v_OhkV~j z8b3ZTs-gI{zSw!U{|^H|HG{+UAz;Ska?|J@S}F9d9C+AdN%!$^`|Id=+Nw*Zzc z-PU!3tiiVH5jV9C>gn|@O|h^GTOk~P7nVEkP#Jc`{{bu=5}h;LR{t+N5t$b7VM2U# z0iMnZ;fZzY)*dmKxqP&#dp6&5L#}B~FrmjAP<7~t(=oruf6%Lm`oJ53Z^Z_uu5Xet zLQvd8bMhu=>%4~80xPh`TkvJ)lm(a&ghT)eyQZzJE${eGrco&3lPhaVb#=Ac_t%#a zcm`8A>p_Tt3}rAN zjNqV(UF7A1ANDsMc1ZYn(;FKyfCl1qSViE2|9&zp;uZv> z{1wkuDahZrjhr`>|JhLd0YUJ_*qAn*|6iOn=)p681E8b+e62XdM-hPINdKb5bSRqB zk^}&Z$Jlak?+UNdH_+e{5x2<6$zfqNKvNbz8G64LIb_LRgaV4a&|-oIvd=cK7|+ry zX8|ac{^8~{;Fy;F{RWw*G5@x-JFwS+;w_ic=2uvL|9-?@01LB#Zf+{rrDEZ;TzLnu zDOc!$eur(OIQv zg*z_pP5St3C%H;JN0@We2qUUBFSu>@mX+qa6562?2uj#rfjJOJDwqh{^CB-B+)a1p zW9p2Z9h+832}*KqaEn!t1i}hFew2Zk&_{WLO!=9&mZ=-eFYsa4G6ainkg&oBMhRqp z9bgYi(LDaEr{Nx}sYZ2!mD5?b+37a=_Kpqvm{~l%d`7ri9g?j!)Ph>&Zv;jL%`+IH zWWY)92DAZiVpc>YLpRE)={_y@S)>m6Olp33RCwNq!B@qZRVD9O^U9tS)C!E*JX4-Q zBl~ZMj)u|V#ofdp4gNo>FGC*^Hx_s8wX}D>8IP^q%w%G!5c;_AyLxUHQWUV$&4uy* z{J4KvNAOi0TL(up?il=M&xOj?rb3H}SqOZdk0|zksx$Xcm#>KFU6%%{$*It*03j_atw`%6WLBMO6f|P zHc+KGQGSxvjGvgP6d4l3o!@2WYkEWDJFw!b-2m7S@IiyK465F^+M~eTxfc~dCsmTVXxEFksy$i$@vZJg%%>TD!ZKgxr`_u^;?J4H`aAQb+4yIIugxBoQ!3Ikw}-iC6FPIGN%k}Z`& zhAobn7G{k(3{z%>WFK*MC6KW*`i{q&5z)KKqc&bjC0$fF%yV69^Zad-uZ(Wk{a)jf zZoc-+%(Q!hh!!;~J3g#HxZQU!m2FfXURt!wX_1sl-uYN6M}sw7vPcM}oEM(p*|(l^ zu~JK$zlUUZQ&UAhmHYUYQ|1qtDilaCp~=tcmD{X7x>5RAEU?Bux$tQ)i8k93KN^@q z@BZD< zCFncM&(F}Zm2=8H_OEa;d225m)P_D35 z=$RSh8*8XoJ=cFLX&}yG?7tPQKK9xmUF$%Afh-uJ@X0E1Nw8AR0PF#hzw!v-5jC~+ z$73c=#vWWhfTF8DQt$cOj10XE?W@qZyO~g(*VRo8odMkfotHrb8d2hk*-Evdd-b@f zsZfQ!64!h(0(?;wt8{U1gNFih5`GcGyy~?WwuHQrPx45dk?@-l)Bo>*A{EyJ+#)MS zM?-*9!XO#T)^yV*E~#3oJRXVNBFcmLeT z*g39>>^B>GX_$LPk5}mV7m%jVR^SSGayV7)IeRJf717Fs3Vc=b;UYaj|RAQ{?b|)oc1oCB*Fvn$fXCH-vF<6Wm=rW1I6|7PW{= z3~>JhASl+c!^o`WT{L8C>qNPIGcLtvdoPgGape&s%lk;un)*Ff>f&JJj-I0!`^2&R z)<>FtI9^)}-01tpD!h>2?|gP8dZ030`NUi5#5zYURxGT>UJDqfC&35 z;!lxrY1wg23RXSfxpKs$32Btz(cnufRek^NNWuTq%iHve4FHGKPDUP5t=)Q=#nMlZ z(Kkjao2N!7HC7{kzq7kj+b!sthx>Qioo89{cRWs+2{SyNwFC=hqU-|LzK7oFx#GR@L6H(H4gwQePd-G zb0D+YG6QXm9NeA1Q|M>_h`Ul38bn{_vP=Vw5>ZrlZDU)jPue04w8B57`VEnjqLY1q zqv)&s${@_$0IeBdf6!V)$I5UOJ^j=AhRm#D;#;_ zzIkLGUA|wJ?qUvOn>#SUWiBe51-JwPi8(s+EZp&AUEG?E;#pYdna9mjYahI?NwPu! zkV7&5>L5LR8THzIjxX5vMP=H-@y5fL7;Cp3b!qdlhThP# z=-XDB;buDg)>rvPC>;7t%CmEXQoW0|$YR3Bk#CW?Gw>pjzkW+_ zXs0<)2D!H7sv%D^J0u5P18!_<+4{vzAO-nxK}J#%>n0xK+sP_t3M#7Jug}qu8~`|+ za&(bU_(Q<<$m_NmXJk)4tIx|nsJuY0Os~du7dZ2k5ZT9tzO407fk|*>bGo`zl}0iB zE)gIldrXE7R)lR<%{?wm_@=3G@MyV-?HuRri|{avf3mw8-!^o#{5ut!4zoH}fO^Z7 z$lXK(0LkKN>37?@qSl)}BpNUf$#Y00ITJ0j_kOqW?Y74@vw%5|0{k&-_~rIX@zQ52 z?e)*m`pe?tgKLPc-5aO!C`d~6i=z1EbGvZ{N)g}(Kw0z^s>Tq3DekXqY-wkN7mRmD zio4OMN_WWM^mtwrH7%b)f@?#0Gi@H*Y2dO)etH(stT1yS&`Yrn-) z+S+V~05a?C_0eZi&N;E>ZpheSb&5pV9Q9(S#=EtvV`{!p77kdarGSwOZJ8YG#!u;; zpXFl1C>XR!goHvON+-=IUaew)E|EqNKdSjI?Y%D2@zz1xD?MCg#+ZzC7 zE;bqX1l_Ja0DrjqQNg*z_xZWlt+z&+R&h2Cua41A-hQJ1Acxf+ZCMG&w4nd$OiOFW zBPLFa(pdXTqIADhTOCul0saMeTR;R=8&pENqRPz!pjE8UWU>%q1-ZAVfU%56>kS%L ztqEx&*;WVf-a%j7^w=;x|)}i!=FskF2f2CnS?mX=9vQ#O&G&?L(}UKR+i$iFjKu znNWUf|CME4d#ML$w6A^Hmv$;OlJU@H91;rVx5Gk4KrHO!(*|#|8rANR@p@o<9iX>d z&aLd}xu8VLPo$x_cw=PtKFjAm*2)K#B!7~PMo3IuYb!QRaObFmv@WG=j zu(Xu(=7T3+0hWO{*+Ee)CJ9I&ngemOc{`XAq9H?_v27b$ICttijN*pf=#Cg4-Z5 zq|>CVocQIStT3-3-KUDuP{mkNxgPcD!>8lQNt4LP%gNfII?ARw`caVz$qJek$x6w3 z8Oq7Cff12j0G~`&enzGAdGzR*hjE>_w)ZLp1qaRiHW#<#$Z>g<3P0+r zPBD8k$FD+5`OhHc`>_(2;pswUYEOmJ%hgR;lPV_}0hVdMGR08*Gv7A#tV?FejW;#3 zRalbp{(7^;W?kVgIW;+s0)uUZvO+DjtAnZRMArC$W@k*Bx{^{qTree!RByaErQx=o z_7!Whn{Fs17|rz_^{FCrIG0G+U$0FOKsi6vVaG1o>$_i$F$Ma94{Cv=DGVAEpz7gq zcmAW?(f&l7&&s9nlfvBi>yC`!(&r;gH?+ zI=6K>4XA_bt{u*tW(XC_B>gGSU|bpHwjLg;+8|b0?$vH7j>D{QB@YWAdZO7+&UL=1 zvShvD(qOJM80Pd&`fHG7gCznx6(f}_fP1Z#&c~oOiEw}CMs3IIdV03^ZXl{B=-~#! z67$;H8}mzKC`?BgahgZFSN)BZ+>>IitY;1SWVMX@Ji?ltH($uvObQk(&Q2aHJiupK z(maw3mza;wSuRp%zbCM>UF=GV>$rl^6sAQ-UyMRw`|+79s_0fe16v+hbm;FZrkuLk zK&@m}_VbwMjp48UP<8yK0al$g=T#`~d9ABp!TS@#m2=~J9b&gt)(B=RGo_tD3*~lu;$Em zPoDOs4ulW5p75_fOAFxW1!GHb(_egV0yF#db})~UT8)h^H44*KE&WP=zDkI)aMV2f z)AcMwgiy9z{)WMM=f?v}@FypAM z`URn@1d#g)^25=QO%|JkIb4`hrnldEKjw05Oh8Cz%KH(4%|4VZ6JcB{Wq(ezPi)cM z16I2)e5wO0o7h=p!Pow8=|Hcjm}273AqvuGc_XK9(wb0u4f>D#r%o)}WKd$Aj0uQM zc+}QhxyS9)O^vJ@MV{%~%4F!zPMM6&m$;r9Msp=pS*FqUN|`t zo2A5Aw$VJIZdDOYXz1{Rfo0O6^-cTQn8k3Zs?!My+fRpEivyS+0^Jiu#!JeMf}dY5 zu^mJ&R?E4fCgv_fh8lo|Fs<6j`Z^i zMU%em!$xBVbz-Gbukg?1^aVxnZ`RMsR`0sXYY-2Yh^&u=>|aG>nWZVIsC>W$28h47 zEJhE((F=3{EDyJ|`S|#9^YZAeypY?YFjeMlK-j`|M&XQT81)JyLNx6|Q{H7EHdRrKuwU(beTrjJ=iy z+o)$%P1@@6A?$ts>~lisw?XI_#FEoK?MYk3Z)|COuLp{u123&T=3 zu&mD4!$rm-ym#$Ro!eJO+bh*^EcDtKjAB$@#y{fR6?AX7dgo#DoT={Dm{7j9>*AZK z!&0W_MThDZnxZ9Zv2)rwU!%-1L%z|EsyohSYk{7E8bqW+2(&PS%*ySs0}F~?IMQl0 z)e-XCWB+tRx0z73n4i_drBrHv_bcx9p>Z=O!51a4sXp&9woRS1meex7w?jaGW8duE zEQt>HI9fwwVebA$;v@2?Uc&~}d1Q9t8((N-M@sNBJZxXJ$`sC!9`S(Fl z(kxAQbnhtOZ0q(6^swiV5opQE25Qk_aIIku@t)s?+il0`)Vbf&x%tz#E9N^p^}lO; z;0Xh18{`g^GhANCn2(3(_% zy8Dwt*6*$sq5Y>_g*K{BGtYxgtEeg8h|!VVVnn4fVA1o=%D8cMNtM!99klIEtF$)G z702f+v|J1?!&S93ro>@?)QS9Y>Tv4Frh>l&?6ij~+|80C$Srg0o@>5qog|2VzElvo zucN?X$71A>Wr((9y`V7n5YrNMXvg&%A#eLLS!T3OlL;!NA(G|;Iv0X#f)mA;52x&T(r=~q! z%Rw%Z;C#4R@(+9ucV{Z>K(oCkteDfV+P?u3t$3dqQHb^<%vif2KqJ~`L~!RUCe_y$ zct*)OHOXxKJN;3`lb98_T&J8iCv0K88DW5p#_OCu1=Vd7Zox{QEFXMF*VAu_KJ!h# zzPUl(VdAoD4tHJKk+_u>^$G#})WTNJ!t>rlA@%yRq9Eh#s+p-c$5H_O=lWUxmf25? zuK_n`DS=PcHjIqnik6e|j89V+7cI}nx^9EYu&ZEV!m4OF!r zf;?#$BPXl*YVSP=LA#v$-y0hDH}uC8Or!k@@JU^jY9P(<;87AAwl#o!vqPtA~6Gh$Iy>=RlCQt1~`0kA^1z^-ZxU*=FkCK_3%~Gk46| zxx=l;Z~w;yAmr~XdFF9f=tR326YX^sQAh7*#U0ATy3ZM&F4_0D*RLb}MQ{T%U63N;ls0so-Rk;WW+NHFnq6Re7_f5i_zqlz+trt@`-7nd zoIYv>hCtA)14piA7?1^FDGU&BAmQRtxsS}s5mCu6u~V6Zv5tCS10@6quW}q*Tw(An1(HEccXuGP-lD+269;(o{1=t{L$@a>#3*=$ zk-AUY9?dqBWyVCO^(13Ts8ts8!Usqf2AK(9&OZgp12mM|J14mmJX*s$U@O6EmV9Z# z0u-&1W7b6djui8)tuDW9{juvIU8^{J>3V2arMdB}Rs~EP@C56hdLV>}T7JGCT^KO$ zO&;gOCnxuSg@pxaL4s-vI(T{WdqTI9?_uyxH1-sa<5vd>by2Z7Pb~aP3&RzJ)Od%u zY$O`5NdSR2Kx+9j`6ra1MN~@BTNA|e-K6lEvffF7`WH~5C%TVDBl~+(-H#Bhd+?s zmqM!>lGJ37SN`v>Qe2P>+>j69Ps}T!q7wbz^U_hksfC{c9=q|fDH1DV6;Z%*xrdM6 z0xE%^WEvLFk2INy3=p=?&3JM;I*crZ78q4R`;Ytc@&PvX5@iC=v04zp0WNz65L6+9 z6$UvheoqRdG{28<3`(d#0gvA5Ci+j%{R6$n*P5CM@%&wu86`u-rXV==VN5P69`5dz2kbJI|R6svH5X;TIB#tO)yi~vky9;0|fKSZ_ED5>n zzQSew3G7A;yjFg14jJa$1D+XBHmR7;Tt+?NHwf(*tene<1j5>FKw~g?nKoCqYfXELNEdP5Y!p%bpf*$@ay&9T-Y;occ6L$c8 z17>U9|IO7v0i{BE7*eY|J6e8A#0tOTIa(mCSnc56CjdPpA{fM+<@P&%$@;Du1>}{u zbvj~=JJ}$U0~=Yb`SzIWeiuV~Q)-};cN-Mh>W7D;pyS#JATeSE3?pN>999%aKJxei z4r9_L$6++`O)B@q*#BH!>WzIj+2igHWrX48>w5>3==?J>s6d}p4MCfseVg`YT1Hkj z0=OJt^_VfX2QqCy1P8Ln#ZxId5G()G)97T2by#`^LX3%pp2(Xa5{59VxzHI8emL~D zYbp>v>!HvGsm3R4Y!BV1y6lH84%}DVh>Qon7POk;gD+>#razN1L?aBs2q3T|y;hjz ztGD>hd+Q$Pxx&Fw0zM3oNq+5gi}o?h14#qaF9Mvcn@y`u_g;+LfPVmqyoruJpOI6^ zp!XbE9gMzNh(E}l0HM6^GHG^`?l_2VwH_E5#$S*!h(rZZl0~k?s3dxZVkB6E(x{36 zWgg-EBq#SN-g3!sGXk;A;MwU$J;v`bU; zB%*&|*bapYkE-C-v-Sq*q*)dK@z=V$v>GxPJ?J(9fi zFp~t+k%yo+X*IpSGFSufJwmSnHfk-T0l?Vpj_1FD0^14v=+J<=*8ocme1Bv}2sAyr z3<@*VDwJ6VK)e)m&=MfbB1V9SO%Kp82eMVI^C~v}pk^u;-Ud+=HJ3wegT8bDO-&Mz zjnYn!1C2ezFA;1A5%>YFdqe@Cw3Hq0?vmAthtR_W92_h!W5d)OZo%z}+>M}?3A|bq zST~4(F91x2Jq%)DAK{fpK+Mzt4I+s0>Pg4rsnVpsDk{Esc_HbgJVy~n1U~oW3Q#G$ zuJtVEF&~!=IC;ORbYchL5L{3)0veGi0-CV!upSQ1VfGOGRe zZ^;O~Zb$4a2>w&pj>B=Mbj}9ypF*b6OOV0@1@XAGwp!3lMgHpzxSj#qo*qoT92)$9 z2@G#3EJ89~D)`NWDTFZyIe9j#$}mm^!ctwsybw{x9G0^fEa4~l0_#e;%EcKNw4Z2)m;_+P$f~Z!s}#=)5P@pmU16 zWpJKFA`&Q7qCv{?FX!^IK(1?PvZSWjw`7#GAQG*ox1bug4AO{~5wTSVd4z}f3zq!C zEOWfFLd>F!(x5l>CETSjn1>G^6iz{+OhX&UzI~A7Ih^qiEV^>z?)#a>Z-^)iP(fQ; zDc6qG9kSt3a{ts?34qYpi_h25*g&BdhInD!yT;P+v|N@@82AY&1JO!k7!jy!qQFN+UIpZPFfRU(l+?b33c1A_XJ`vC5=3NV zWKc{FU%!W|Ae`Ayq4XGJbsjS5NhP&gA8crRPdwiS=H%`kA-BVW53sjI+T zJ3Fxlm0z#`BV}jD{m|OlD$;?8g>~%(Fh0|*&SU`9hc&?u7+9(1>ScoTprifWVnC+ z63-91)%=KXa|>cx~TlRZD-pmP~m#3sSDwee|OTKiSJ_x1RiS z?Wshx7id>QsC&xD2ogqYa2}CbkFen;Sr#{V8?u2Um1>(MtC3&G&L*8uKQktuoJY); zz-m0~Mo|xpfKYe{1`u9^c-R1EaRH=>;Ua$WztXLiUj!JPT=l+wawTj-lT&)05&E-4 z&Y+6+B25l7gF_q%2$>A~?|gumJF=?+dobbq_YWYChB%|EUMuE{FVU`=_l6--f)BuY z9kVv-GtY+JV0@R}%{zAz)vH{d-H2m$kOy56JhX*d z_VsJEJ1WlFVkr{BQQ-PG4{@Z)D%zqZT3KEF7#KTX+cP&cp63kqYbt;%XU}J}mLi4= zQ2(m0g?AH80CPFMq~xH;GE_lJ@u>~)jgdkZb_*r$X2rBgwq|$^R*Xuk8H+qF2LTjc zd`T3-BD|_SpUz^j!-X}=?l`_R^o|s4BQyPoo->@`&JuI2Si7lP zFp=E|`fgCR;V~QJnoa%@Y3qakg*+#6DfXpfL6r@+bs(srL!zBdFie&63z17ys&e6m z+9eT!tDS=b@O}S4sRWzmLpF8OC@68pt?|J6`tmp*fo3|tfdM5D{h?XEF z5meoaeHl0)5e<43n3$NdI_(hK=zwMpyR#7($_Fb29VWQ^z(Rn6hKz*;+thU3pfsCP zJMly4-KRgT=pIXEk(IyZkKIYYp2|y<`j8~@#V_wNY0{HK(1``=pz;{P2k4k`3a2L z6ciM&h%#i-a*K;;gGvx8<=9wd>!&Y1*whIyd;!yZc^Wm>LF^yp1Dq)k6OSDG+bw|7 zVyG1_MWF<#-gHBNF6?;(UU8s5JUCdWdv9XmUs2)gPR$Df{)YXTlZZhjBwZRH?N;Ut zBHxLLiSIzDXVUs*(^TNz-d-6tL2(&6QVZH|&$UJ}`@mJXSj5TB4hmoXU|I_4(G*OQ|GN*x*T?CQC~NnV;-|bl4cI}QAy!Br)-47?hJa_4(9Zipn$68k2xx_h zsw!2*_lYVj9Gsx9UtdT|--G-G>iT6euM81L=pa!N6ub`UVVMBw`n4iM+~3}kV(~J!b7|!Az$B611W6e(lIkDnYCxeLxWcY#Ea&M3dmKU<+T8kXOmD{ zXhKL#OiJnk=>dqy4RFlq83L6lk&h-#4sb**AtdkUs%2L3@cvfDtIo3n%cmykjQ-Lg z$qWdw!(#$ehU7u$d63C6I_?=E*Qtet1(ZuyU?IsVC>H!tF<{p~Jpa}w1?CBfuswYr z(tkoI{UF;8fQdOVU}1rAaUCFX{a<}eB!d)ffQ7*H3MRC_A@@a)g8w(1VAtAcDFa>* zdv!SKD0BcEToEE!^Hi&F&;V{A^!xx-_}{aAL&7h>iBJ=HD`X_WDRgsl>&#Dj-un#M z&+TU>%ilsTZw@@7#Yyb(#OM=ByDCbxv2~p)YWObB)Ju@kXN52&3Gjg&3sQGX6_JpV z!mPG76dK`?kql-NFOWJH!o!_^Hxm&GSxAp3dUdJn^}7OILL-|LdblWOXm<#|VAwtf zQEQ0fW+w;cP)z77K(+}l2L-HBK^B4zY&HNT@PdjT82Orzl|pjxkOP@|;NyA`TpJw% zQzQGI1+TcSeV%#6FGNyHhGf2zMB>oE0FR!ON5A{>}0--F_17LY>Z{K5nT z1~&H7i{>3)aD1mnPH&)jPpA3K&4Z1tj}oJ3gzgx&t>DJ1tE&skpt}G|g*+BAGX^;X zLLG$RZJ3{H3#ZqzTP+-bT!8fg26MCf@T{qs&gu^IWEf^`CAdc5E@ijdcmze|e?ZOk z;J`q>|3j>RGMxvA6_P=5JN!iWQ(!msR)h(Uzu{5sw1CSz5h~b)>rv&E7%3|cIeR5L zT)Tcfp-b;?iTRKJ@S95@2oeKW1u`RrXhSOI5W(Zk!i?`7LXHP8mGKXQ@$z)*?z{xR z$&X~9^nv<>GywZ8=A=UYiJLybklQ-ib5+96tz}s4ql!w5jP$v;UWVs z;3O~zgAxCB9x#*cATu)n3NT#EaTy@O)Wc7c;}dNyjf|dyw0*m?SNc(Sc=*OlkYxTp zv)em_7Y@~HU>fF6k7NP;SuKbXFQ~>hL4G3)5n;5vMVS%_7$ue$h|?8ZKwzLM)dYMJ z+=;q7C*_18vI7bOd=Na2i$R$Y(j=@dM0R&T-BlZgaiX~$LpL_eU}{%;VdG-;{z;-g z^b$$Zgv=ZI^-#EIESRM~0XWN@?{O9~_2MPuEBNCv7gsctf?a8{Njw{ zL_nAm?vDLu&E*4S)`pP3!nj-JzvZh{rKF_PggGLyI35sP?m$LW5F-fyl0aGaf7|Mn z@a#{Fsd$LQuAM28#9#&u6KM%x_JQbW!n?eWgOh@93xg;Cks+?Gh#3h?ZrRz}BNzQ> zID84hE`;<0QCWea7MOY8LPC;k+X2^w8t!44&8i60VCEJUe?~`RVBRMmJg1rP#N9V5 z0hxgrAbdb#;85E^Urbm`tQY@>OxnVD6>nW#9k@7@My~aq-UB2Q^e-S&{q)~QBW!MJ zdi?ykFCy0jfpTkWE1cVr1&SI7ISB9%XLLUpneg;g6&2I5_)GHClX|K#?IFLXr6{s5 zAz1@a@k?+g!QO^uGooyY++Z+3v{-L4UKs;;ST3T`ji@Wb1qyQsW%M9g{xVIjJ6Rl` z!)E0*j1K|4K?VU(uh5lO%3^Cq_|Km|Nc_a}yAJ6D3WAq{F)Bzjym0p+K7%(56$Jt@ z00C#*X&Vh}6+ogk%3&IW)$dfp)sQi(HcAV*4NCVA0?^sr)Yj2KSV^!$5vUN75(Fv) zBS-`)ba!|E2Nl+8y|_qgy}f%A6ScnvhQ3sL`7ypGtrIfugR|pxbEt_ZDZL>ynQctG z1OVImR`(X@Lj=N>$k**4f~l$o=rE)V7X!Tp2Knof#iRa~T9QNl`=_nV4>CG)*!B#T zXdsmZ1FcY1A-I7jIxzwp0xGNcmzD$4eN1u*vzf+w+3HmiFvbfq0opJ*^C(5IenqWL2r$h8mf8gG=opa2PpWU4&!7C+w$Q zEmY5^euNkvuv=8c^OxrM*Q#l^cC>t{jD5c#^@9?~3lK-#{xn7u%S8cc$Ugk}l@(xyK4NDt z4OHBE2ebZMyS=htSd$@kp`?!<8io5tMf~-g0bl+YCg0SiIW(1!Wq^YOw3m3mTQXJ4 zLjg$xgGG=f`qv%62#6PPe$I0=KU=FJNy5(PGxCW~%-7c}H$U+WwPdV*-zR_T<&kB! zIznYaYq#^PFE8SyFnWJI+Y1e7n6_W1IrSsdZU6Q)_P2L2+-_VA*(%dBg=`*HH@~{( z;)6Dne|EwF%v)#mmjVI+x>rI7bO@y*O#5mee2Bp$;eR#btJ%OFOBUmv)2fQ3YxeE}GdNgz z)2-|}#((*$LfZ}xcNOy(7%Ox?dUN(97ooeD&>WPOY8q;oESRlDP0We2;m5r;+kR@x zEE#t0ETN-DL~C>Ng-nkJl2i7zyce zw{xYQ<l;nz7&d$wU21z}e z;XEv|*zN6YFr=>^9X*W~m3aH+4f2Jd-zxVcu0HOh4N5EzQz>Md3g%OMh5J6r5##x~G%mbrubpMhL4DFH4f@yyRgoRZZibqKu4iOVeXcUSdN=#z;p;4IQWD2Ckf>NVs%F6mHhOmXukuhb)WeyvA%D|9+TUpIvA2!^5SH_rwsKxg) zb5rk)#XsY%h2!EcrysY8`?jb^iYTyrS;U3+ksw34VQWjZU%Au}_*rIizr!=joxR!h z$t2AaGR&CpSKpNV!hT%w<1&1g8OZvx?{{;vH$*fuU?(Ge8n|2540wPa3IUsro*pm* zGzY3KR=C$Lc8Uc+DkuPj*f!9sR=E(OC@Kzc#@2j=v=JFChWOo2y=F(7gkzu-_mx~2 z$`EepOn0BbI+a$MiH0@{gWFIqF#@>{34Jhf{I6l@ zmfESx%3?gPh!5{Ce`~wciS(MIC@o2c`nv;kAGRpm@Bu(W^_6yQz2nX!(9s(rs^V zd)Gse%Hbl>b>-*c;=)HdPeYT*RwZc2#wTXiaq{P#d`*1wMCwX-xFa}ntJLzhJ!8H( zZ7i=RzSmxkF`Qi(urIPk{r=?H>C%Y+L5Z2Lsn|HS_@UOyTa+gwVhxtP$FSF?S9~R{ zwPK^T2@UTOCn;rharO_j`leku-nQv5IT_x}7fR)6|~bP*6QmQQEtyEd+zn< z8GjY`AEDDv2k)zU+_HzOgXJfk=nvTIcNcHHXx)7u8_g1wk*2S$xhwc$uiK@`mwdqq5^p$P7 zJRe`XaRW&Kp|rE@2pxNaqQ@7XoH*lhie?(b9UuA~Ck27Lp> z0uf1UA?ZOT>FBm~};f1%jtKt>i~d23+P)^gU}#@H+`?fLw^vmZ)%4L0^uIQyvtQf9`6REm=CBe)W~o5Mq{(R^IhrNf};h`K=M8xeUXq;RsIp9vk`hekH4dYe|;BZ~@OC|83+1^a6&Zfl zq$~LM>X1yPwB@tI>&b%ojZ)SQpnZtICeUG~LVQi2*8<(s(aRq4@zAz~I%TlLoK%wc zlnL3)%!d7mxw&71$R!o*M&Oq7P|iw*-Vws-Fdk;9JU?>;#0z*chiQpoE$DbQ7!)hYu9Bp^&o+!A%C|Q*MTK_gme$h&~BsYLyEP!p;{-yRzqAwl@_|` z+0Wor%!Uwloh8fDO^TF|n(#PRGAiD<{m-!^JMOyH!$Q1$F;6j*H!NpdJgg0b_C3ht z`>W3F-5^Dg>8SBLeTL}daS~hFHSErKrI%_V3hVZ6+@=C|6H0@*bytBW; zc#i##wx&0WjguHozDRHpOBIIHpN^^=TD{;!*v6qdWM?xtSS)SDrxObLdVG>(*07qo_8o(9k33NYGn&WJ#j9I>4`8ip>qnx$TJRX9`gR+7bGd%} zxk5mY`Mia2zh^D*n0;}v8t|PAAHMjB?Ux;SR^yNN8dPRf{#aMIISa^%klA2kt8_G` zc6&aXY6q@*to{lm__5`6u@SnWPJ&ux+35u52YkPQj=3t zpHfoD!4Xm0A3^w`dYJ(eQ&>RYV@N>tpO;TL%;*AbFECAIxMgW*Xqb?YKzC@U@doMi zeuZWNKymjsceomI zRIa?6jjvhZ!TC{4yC-=2LpJT^NZatC_WGp?dbrpU7bm=fdZfD7OY?Nm_5{@@Ua$0t zkfj9FtMO;_$&A-jj9fb#>U$=vMEIp*D?lszG8t9_R%xh15WIOYKVLh;u5O0ooD|)q zdgk4&nTYK$0V0i93o2UJvX1Q*z+$x6 zrex-!s(M1xU_tr#QnYdAZvCbAIKj^|7p#1|8@e|I44B`Z7n-R(a}-7GjSNhGfp2Mf zS1bZa`c6s=Y0uwc{@&R@`?dE>Cz4|QV6tmN! zb<4|y(a0^z{obEYqdc{Lc6ylU+f`9Ey}VKz+&7uY>F~)~NKy*!68oIHNwTU{j#x`t zy08ie@d~0XHuTludIjEDPvHy@aE|y7DV9MJ4S|S2l*Z0sY;b+$^B<6pL$E=YQwQKY z(N*{i__&tGraaAfoes#ghb?`%mr3|z;+H;-2tOCJjG@#8%NiYkrVtUSGVAqhj0-bK zfaa5CcK-5k=N>dnFW9DA(U+>LxrR(&i<`2uq}pSCiexSQu^qhk(kAn}duR$sEz4Bo#U+DgGFE91$M-d)HTN)brB5s68)j z5^NMX~su9`9>uYu62%R<(imu~1v&{(c%C0@8UM*F8GVr&$g7 zwK%irXyXqNCV{3dL5hNMG@vv@k{U2C1kzdY>t#45^^d*!y~5Raw5Y$52L)bK!XJA> zj=$4TJlnWja{NInH_MPq&pKb@@)0Q*Kgdf9HTwdhcC}cq`S&x-TdC76yS~1Zaq|kw zc8-&J+I5A!ObP_PWy^=H;=Z_}8Jt4NE^ZNd^!Wa9B*nQi-%k50$B-X$lHDPT6=83` z5>R^cMMjn8hbrFJ!fox9s|2b`#|4(%jz}sG8NE@ZC+~Pb6wn}$elRo%!3Y)jb_gd2 z_UD?tCL{!7(9Q$tv0nypj?l?o87VCg@nPp_bl6|>6G?)NnV>x1Mj0vq8d&K5E1LF! z{Bk*voFcPch+{I$di|#%_5W!XadqDbjCLtB6oTt++;m*tpbNe#}d`h+zb71y8@0CKc0c|=QW(jKL;ZL@CzKH7Sc@kA-8_-6oQR;! zdcrUd^VIOhIPMYWN!?f6IpxD_KKBhWMP z7)$uHADEX%4;^SFWrej{zUeYte5Xw@F){kU83ZA+p$%u<%hb*WlNzQC@jhV);6ZLkHFt+lF6l21G1CWUdK@L!l_zKG zF0TRj0Hi0Mca8+&c!39(q%*qh=mC)*9$mVkErbw3D8uyE)uSTs?!BLzwVc_KC*dWM zG*;Ao%1zkl&p1Db?|f<8_G#yza%=#hni|`tt(-1Te~z7MBa%d+v)yK?9LoRPzI&2e zko&0k``4_Lj)X1)`*>A+NZ>YEd(s$fNMe?bCa0&|Wa20ER^;!h{qimFV;;rt&*`=v zZ%>UKecjZoSzjjZRHhSSQVykXJ!MEXt+jvxf!EaOEY+gQ0J<)|A)qLO=6eLVrS>}} zL$AZ+&<)1izKxb|j25Z>M2!%1tQzVy%Z2MME>$$|mGe_#9G~IP z0)f>Nk=a#Lvj=-2t$(~ZmM#UmMy&j6tl>QE))QOd^U@!>{Sm4U}tODWpxKAD#pgSER53wSES3D4Nb{~&h}M! z$5wk#KR_?x^3nmIS9ji?DZlT6^!oSe@Rs@cBh~zY*GaVlb+{d_@@mMcP&eRCTdeU@ zlix6+Zavg}nF3jM&!tnl=_c~O=E+aeR#sPDy&zO~8jpC9r|h>#t&I6sIec2juqLvm z!3wLBd1dwBQ3)$l2mScRZ7sCj0NDe$uhnt*@Oz>|d8}tt_Nix3Ayy?#yHZ3*)k0)N z5*hsd@0;M&a@FC!2oGkizxFJV6>Cb7qaR-rsnXw6g5+QdRv!tnL(uj@n$FO) zi7>4^^Z;HzxY2=aA3JC-&I4cJmHcl<%!~U;D2DI2#pyF|RBFFoW9u=m<36F3_Aam4 ziSuNBO1|{Bc&`Z}uc>ywg7~zw+2YpcLZ_6?9?HtuBiik~1p3NP6G~;x=diIJV7n73BQO6iTto%TZI`GWcpTEg>1JWN1&w_@c zs}8Hxg;S$laICj#$y!#(NiI?F3NAt9>zE zb#t@Vjxw-kYOz>v{Sx-t*$t_%-p0nJE+dt}KW*+gM@RCSR-mg3ZoR}nwbF#|1`M{U z{2D}5G8Q1rM*zkP0*AoC!2$6W0>YYTDb>XuywBCsq%W#P%F&l0s->IKe0R2#wL~7Z z^$uDofpb*JPo|v^k?5?f1(orjfnQPFR!x57zgru>O6)GlR0;0Fm#0bBbcKG=^1S@+ zX+~8YX*bXapnV6V{$L>4@_FHW0s;cG;Gn(<0n}6r_}9?sKLmnAs+#KA9x`a-RuZSn z!M^&Y?VD9sg-tW zkhkfwX>267XxEjO(78Tub0(lBaX0JwFZsbqF7rYCwCLf&Yat>6K8hZ4Hf#yLWj{?s z!urLjIDK{1fT4^`q(M^?;Dkxo{L$=|hyysN=7tc)BAG%hqfjVPyW7n~M@J*17$6I0 ztw_ZRrxcyF6U*|DZT*QLw`u?B#@2rjFtM;s75A4tPX5-n$;{IeZi$n^?0o)bU0vcC zN_)Q_O_5~tgl=kOd*rV9?6X%G_h{j&d!l%w|OiY`&3JCdkMlTFO8cLnt$YiZ3r9iN_ZMUA0=1^Yb^Vfu}^pe*dxBb3)I6 z@Zt^AF=VEk(;ud}Z&2{=h7hx2hjGy-;v%HU&ta3rlAFA-F!m1hXT zY!VTzYix}k;#tkjwMLmxXtYn@YY&PF`4VGDs1jB9N~m~7J?5~K9rlqZY$pn|JLbki zfd)joqAO8mjGvZM{J-AE|D0D!W@EC()@SN1qh79~57OL^Upl__`sJTp$>U#Gkt$tv z>$dw;{5-M!uI^Y3*q?*H_xjhcKI9xh!jfNvCSsJ{zhR|LS} zVC&6(Yed6J8(Y`eO8~AGS$jVZhM$c4*N+TgqjFl`!EO3dt5W*f!0CBJlQ5+2DPhb>+mla|8)0(|Y@Fy>pxn*1v}~xZUHLdY zudRE}hRe$>YH|S|PeY~;FJp;0&)Qrl#qBxMjb*@G{ z69$DBO+C<|-wdbD+|8xf+AxLV!bM;MmO1K(XF)>Y&2BpB#W&2Wi0JFkpziEz6r z^F2r2j9TJqgXI>e?dd%7g8+#+W|DGss1EONaqQb?M7K?q|}R3JKuFr znFit6;wOE)v}ZyDZ&`WD`ugterSpD^oH^<9%F{X@Ex_o&1nV0N{b+zYIuhD|s*nc6 zi~j{~l?!(fuZDVvI>6M;cRhDOz}vw|y*KnWH|K4xJun>T>q!~ylvWw3`W&7ss@%ce zzo|Ks-xmI6(4bKLl+~**u9DwEELg!2vq}+n=akQCcjuDzlyS=rE&IlD#{=vc!(He3 zc{IDC<@CNH=dobT%~B?l-y7Eq*Q3O!#dQoAGI$((I{6(7hlrWR7LB9+?yNRMx;+If z_q%@1rJXpY8{GV)C@ygakMGS(&{-_T^=_`FE=FPCwxj3#touFcBYDn|$-Gy@9>iXz zkM%hPxV-BF>{un=)OpX;znK3m>z!MTbD;`-KuZ-OQa8SxlB>I@a(l~ydG$!J9=?58 za>bG1uB51!cHLhM70xum?a$eLo4g7I+VzUc1L?1^8t|Md$dxiyN3yAn!s+~d6R(O# z@d@o+3lf62M1U_22 z9nV>&wN3Bj^{#Qne2qU!cs8pwQI-m$`&?jhI+eoeD6J$VwZi^3Z0OE`4NtcCTWm)f zPHw+9IYm7NQ5MfHYgx7&$BJ@Q!LSfA$Ocq9pzPfToEDf8)d;68KRr)nnS0 z5F)BgM+VqR90Tv;^LIwpy>eH32g{GkWt^uI9iH*{xywHDmZjyw))={+^}@?3uo!na zh?M<^pO-!<7LccY3B$sS6~5Vr!XQ-Vld76@2XUEc6HN29tIYUJRM=Y7>-|iNgT$aa z8u-K1!mfr^h1b~^O+7&S;1ly~Y(aYNS9L-3rz>_CdxcBgXKnlR1w~{>TLS(jD#8w$ zPI+21M1Pxdh|kegov(2C9o(F+r+!^ZC=L%A{dWtWJp%VmTovAjvehyS_=y7&MExQX zu&-Y`XZ@l1789CY7`Z=`M!K-DBvULt&za-Hp}*2y>#1Hc$`|+HOYv`}PUWl()QDOURJuNi3AdT)h6#K!!eKqjg z>6be^;?^A=pwE0?%ZC!)+r-AvESbNnQBTgv-t3J9T)Zxm1&U~V`yW4g$(~ULw#hai zg3*2F0dcAK|HlPb0tcLv4Qi`k5eL^T>cU^cU=c%diLq*~I^F$mX-Uu0?&NH&uJCP( zzIEG_>*D&12dZKj%0UE{L5#!aPj)tVTyRgXpIvHTqVn1XOKh$wtb`nuM4it%KZ&?5 z^UE@txJS=79;u6PHGs|h&G18Juv&t1gM+WjntkTzwYr_0tH0I2*1!2D=?l7u^Odpi zO1C#T*6*fS&pizb6c9Um*J%z{ar^-HtSdR<(c9IBGxKvYN>bo%a)Z|I0E7DqDM&z; zSiQr+IXjGUuQ{gEX}fFCYnW*rUHOgPnfF+&e6_s|YZC zBLFDBw9Y6Onbc+@1wD(q@6Tp`elHxMXX-a_rtHDm6id0tNEig%P_emU@W1t~_xdQ0 zWnD_$44dc^QEw49mBH-##e6;@_A8V#T6it}_=hGHV0?&emZw>Tr zLabMdqC;JGu~i&nSsDviFK3-SSwqPt;C^8oS7cYT`e%H*igd;9Yd;&P*$w#G?$$nT zGZ+gVmIi%jzBZ3G`m}@!k-SJD=g3{6@Af0VI+n$jCWPe{u9>K@$v$6HiUdP%8`{V( zmGSp}D4vV=8Azb8v2G>jK8k~_Yo<3Uqju_mjiP%7V*wlbpb7J^LdWyz$Hl&Q&e+v* zh5~iNtWC>;0%SX&n2M>3eQvK<=LrjGnoFDZsr#vY6b6BRyWW?KY3+;wg?Wj&zB`@+ zb$qGqOdW?Of5z$_oPnW%!u3a$&S^Y(134Hjb0s>3n_sA_?!o4TbG-FCXP8M_8AQ!K zmP>NZq4lfH#zloR6_t)B{K$CUyH#aorZ`ZZJ@_5w3dy2U{O2c@1WK4Tr5h&K+{a6D z4h)aNGS3a}n4a(8FV|NILGEGNep=YNZj2u`#I$@`HkY?vT|CHPNa7gUsSA(A%GIh?RQf zmDj4UdSsF`brn24A5T@tZqE7NTVv27N-1`H@Fd^)rNVn=xOwoF3Z=_;>V8jz4tt28 zSvZahqh&a;gGEq~Pa!p^RH6+gvoVs51}BN|qcM^li?9kW&$yt$i7aQd4wuZQR(s`{ z^ZdTtDRLwkowvQa=Z9u$LQIoQh?mC6WsCDY7D@H~;vJGByZngao3FNwaqZU8Ajsl! z92^jnKuF6eJccfWhKUxiuK%bGMRL8Mx4luG4!g&6;8iJ}(rvr~@opQ#PB zq6d6GRL=i|PSGg`ZBnyR-Z)g+0H3c%Sd|&M=10+UY8x9~)zYx(zv0OX^K5jo-k87Z zj8T*A5Kot}_QVZSLZaMm=XyI!DX9_twlk_EGTY>f>UlfNoGo4xk{sy>67r$$fT*We;4}b|Bj7A3kIsa5G|5$e;-q6p{%sAiSU;(oLPFwv zzDINWayGX2c0X5^PCPcZp&J(5R=R?gZdqz+ari?tSedIp`zpkAtmVeKgti+m)}C+4 z6l(82Z8b2{y`9E+k3RQ^Xavn7O}=D={KT(fvBbr+_a0|Jre^6SG!|`g_X=#=$&z^P zi&mqCAdXt^3ka!rO#x-)gomfow4mJQ+R0 z!XI6x>zi?Fn`i!ub26=&O}ZDE?*%xS;%GP$A-~^0vK-q|^$f!HNMY1*O#J$pl2kV4c@M2p%lDH{v2!$=d>PhI z+jzo8#nbnzy?V5IZ{M0=PDrMG6_JfMt$vzwud7#aWz|ji zb1c)X6#r6Om2J$td(x5;Q*!phSWl(~H$rc+j+u#f_v1N%?Gx*2oN}5~FX1ukg9iHd$YGUeO8G)RRnc|1WPAA-R`UO3` zh+p!2!O-E;3tl0p4m8Mqp!LWDhPOV)mszMcQMF#+s*;RJIY05OTm~F*TzwEqtN*hXns{YdW|+k@?+X zK`UV{WFct2oD?0yAS1UhzZ^>`kVt@gV?v`FYkIVVRahVI%GP*`@zuf`Q#d|JA}%1n zyKRJsaOqsSTV+pFlCudS^E10+vAAOsc~yQ!YEC6uefl69?poLbi%4&9LrA?5T#neEi37PPnernl0}FGJo3Q7@^{n{At0 zH*e|y)g9v49}2V&xGWx`T6ckvoW0(M!d}RmT=*^)xZoGf53a7R_wV1&118_S#9nB@ zef)U-hRFV(46fn2iI#-802|5k`GQ~lL&>UJmjX^G(Y{TzDt|T}P(lN6Bw6BIgRt}& z>=8su(h({^Hz<)ri@~CNt|9kbv{zYFI$){KytCY4n|_~nUm?N zk`D`k-ZAp3+?-~Yp1tlP?WRRwSQ>BMsw>yHq(@k*=Ayojp=$cm@J zz+vq!Ze2wv(=YP^lyVQp?go5JcZM^G1|>Y(`JbsFiDBn5bN;E&m3=3k*B-Ddj`i`< zU?9t*+*P0`O8Fnol=|222KH2xYc2kudj4UpD;FxBY#Yaf-x74^ zNOSKvT--OXzjy_qP`>@{ho(Y$RnsjFNK!)X zw~He&Pw@|R-|0V(lwB=guMHsSfLGp7tNdKYjrh|@XX?Mt$;?r|6hRK*+3-=V_!-uQ zBq?j?y|(t(xpfj`fpGl7w$qTFU0difbzhPu_B36o^b+J{AtB*)728QgQ-|L)Kjz4# zvjv-L@^xNB2+B=P%iV9SLG$!os5K zyZsF=z2{%vXeG^rqtm?mJVGx?(*U+q;y2ZBcBlai!bZt+k$FfRhC+0{dmnB`m@}*! zI{;`T=_o|(=p~F9U7n?qy$pnrCOy3mMl zP>;l`N3Ex~@zhGE(^1O&)FWdku8;F66$ml|F63;_Cnqm5-qrC{+gfR-bGd$9gCo+z z7*BU@-76=jK;=>8>sDkSXc*>M&)5W+sC+jp%~jm#kgN@+vI2|-^1+!(v%vg3>!z*_ zuf3mFDTBY#N10m$-h@u`x_9v&~g)KVhut-PESzo4K^mnFduV~<=N zGx6)4E+3yHk|<;=p`+ulqQvgYNt#ETKBdp9pet&V6V)CILIeCcb@x#CALgfp(-BOm zh0c3KX96n}!nqejpa&3i6%lrisC)C3U|-XgVzzaQW^F@}rT_?I>p%`VKEfE57#8@# zR2rNZWixY8f$g7t0uI+*w{@-JJMa~vvLTpAONIo!iEw#^nu&=wuBRKE&_dV%!dCf( zg}NYc<9syZ4T=Z~b}0QY5J!)*fwm&9`~nZyAh(^xUca916-3UTZq*t9-Rs@EcNU;6 zF*`R`d=ecM#SGFzHezX`&fE-%O%t=RI09MYBSQUv8@Y-IN@}RNukW$Vn!jmG*VHvC z?URZU@l0rK>-yxSacUSEb(4gI(00u`BJ5Uq^n-VgDZhk-eh!Jgnj`6wh72WBB+fwp z5*sb0Wc2h#$cU()-^*m$pI*uoaHTg8Bgj;4X*rw6kiwUhuDe&OlVlUjHZ-FBF+oVO znmH&~W<4L!M1Pk2h?gNEJnG5Lt`$pRL4CRGt(XJSZ=+S%&(h7G6>6gQadMC|TyVf{ z065>=Pz{}Vm&zw=imiLSy6<&_?HmbfJf>Q{A56|?LH=qvqa8IqHsG>{|70JlC`ep7 zfSR50S|pXgph=&^S~=hW^Y4DnO%?DT6nE6ljumCFcb{Uv6j6>`FH-n*Zw?Nw`d{A( zu__O~11y}4btl`ntncTN@%_c6u!tCm)s6QzcdS_o^Bao|#bXXugLUH_V=f>pfiEZ` z;_(BJqOZunt-|#WH)H`VAd?O?2es9h763 zq~B@CLIKZ%>~ChGE!L65=}UCnYq>qi3?81D(-RBL#2pG@{W0GTLd`%(nyKMo8E)qT zFg!b4g=;%*Gn^5tNyy2{Z-QmpIibZz2)1@-XS=8LY16LLbYSttP7c5DM#Mc^Y)UT4| z42SAm@3_23(xEFN^WLpQf6~TO*FTA4P2>bNL^pU{zqm)^pxKIs`N6rB`Yk-B7pE8*K6f4q$_zv~$4CCp0?RH#e6FBJ#cUQ4l7P{jVh9BQk${98UZBNoiHp7LG&l$Uza^Mn&sL{pb^=pLW{=5{7S87Rz}j3|=&dY(y{_fJf?c2t(Uni#;#?w#{?Gd_rmE86|*y_>z!m%ffN2;@ryt_!Li5%eX}6f*89 zuZUI~Lhi5){n98`H|Wts1rRV|luIF07lEDs_`j$R-v`AM65>;Wi2>93HX2F%Y^~kT zDX6Gc5r80M+>?M0NaJl5av&xr+L`Vrg*QE4%+_d`w<7$c+|3pA{y4icR(HSVV zFM~Pqc&T}7XD6!g2RG>pss$NJuHcP|1+qs~pm7Kz>#W`Wca|%NxeuPbDVeD0*_g=c z2omvG`sEw0@+Po*MOz*Ron*2DTf`?-2!t$Xd9T1l+$INeKf{5PeCTb4^-RF`fmzG# zoE=0xGyaz_u?kVA=D8zubHsGid|KDl^l8;~8f6vj9{&A8Tm{hXgNSl!Y6>NA2MSrJ zVmEX-_kiHc|E{&;)zUjL;&un_2omo*OrMEV4d3*<&RJ*vdm}n1IXNR?@19v%5#5^l zD}x3x3QC0nLJeYKVp`t6jm}^$E9WKlL&AremW{%fhw5tt2^jf+x_CM)d({$?2Pnom zH1L8P2}(R#IDp_|NQmrz;2n8_Wkbi&;@OF_OD7Ar4E4@ZVI54XI(Kk||U*^eu zzmC6Rq43cL??zGK6l$K70r3VkrI~8agh2Zc(oU493@D*w6#SJ;1fx2Z_zvu6s27i! zX<_L>^!%BGR|Sg5IFCG}LAcPwOp{N2^t*!JAJ*2<>JNC}f&^vZ*#|Z!dxl=vB!1Ar z3?{~?Ixnh=8RR7YiMa~-wEnd>2dmIU$WKk8x52BcI;wqm=cZa|cy=MxJ-0bWJ#RSx z@9xnF5)0RL-AE&qS4{=`lt&=@9Qwl?O7G3PAXM)If+i?^7SPM7gWGfc#tm}t`UG zSsA=gkx40NbTEoTkUWEd+Ys?Ac<)Od0&Yr=8q_t(9lrTH{@?Fa$1`9g*tYtp<8UvzM`k^ zqT}ZIJ2Y}=x0iVuCV!pR@v;jA1>cyWb7NGQ@yzE$9)Iz|78PDz{)X#h*$tnBgl>~k zQO(~xb~sky`Q6`^d&^PSi}JutX_4~t2Np=`?>L=5wY;m`E+O@Zg8ME`XGR&ALE?I^ zrro>+rK0ZaFT0TUXhU>1CSVHjHIro(Sj#YOAlf&KBl|honu-nS;t1Io^3B(aJKylS zgcp^mU`lr9WiEUar!e2cEUvBp5}(lG=`t?rgDE1C-jpAN&lu<;v#em!=>zqEA3`2b zJ8S-4k~(ZH+Rq6ER{N<6gxI+P5+uJD(1(;2Do7uJ$|j@xI~93oHA> zoV~PeJft^YAm~rY&O|oZ8zU|9g%Ce!Xe!T`Y$~dRAo=47RRT~DdTA1^mP_^SCLm&8HmrN_&K>U8?%1~T2Uk&$X!Kw@V zi=2oPCR2AyYXrXHHm0UsrNLoOaIr2|5JhuQPjPGi z65Yu*_fo68?bqQonJ#04$%N`7Ok@NXsl(JYdMzB0-ri_rZ>o=Tv=^$ z&#PB~neuBb%$zwtOAJ=8axmHK(IPtvHI{i|^M0^etGu;GOnhdk^fctd+BF8v2cNdq zkQ8+M5%GeOORKfdv(8N(Q$(5$@7=yJ)(DtgU%%q7i|qanM|O`{DSbOd4y@05*_(kd z)2=`BM_}S6w{)OmpyBoo5$uzp5nnrMV@Iqkk=Q%CJ4HdNb4bP|dyHwwYBzOjoBDo- z&%uA8xC%4<^Y*y3&bx#6V&u4Z^XPs1=e-sgh%4YJlk-Lgg_Rwn}jvxV4lb8zXocfHac?o`g$?9`v`TRW$H`UN41^&HXBC zMqiTul3Vi?DsjQa@?=oueyLT{HiK1bq0?#pvq_l6YGqO9#v!EY)1x_=i}{cb`Dygw zPoNaXLBkqYXOF0ME_Z&hm}Q^pWHjo~}kBb>WEE4k_O$5l@*%!1$vmJf!fwr;tNym$yR5;a)aA=lj)o_Nq&g7jaCc}$02K1OV&Qw*Vf0h?(^8_ z>NrUQ|3p++?lP4ktsQG=6>u*Gs(PPw&*@idwH7jD2hmofgz#YN;Xa8%xK9iz}*U0kzTv7_r}Z@L4%IP0VmqtsdI)Brlezyu%;aq zFaxsKH~S@PQT2n|gGmAoW6zXHV1USX`PFVeF?Q@;_)KClXxec_c|># zT8r{fGt)pIv$}TEk#e^uVlolEP=FgKK*n}$B0d7}Z(*j47Fe-Aw5m0kd6(g|R&{aP zvjht9C>1|Y@$eT*{sA(SC}vxVq&OoDkZj?}HVve7y`nojD++M&R{|8i>)WXO(Mg-s z)Ww2`aF$xz=ef$2-63i?l}n`J8@Cahkpk2F^y{V%sts|?yJFR5cw*2rT5395-H<`n z1H!nZFYZT1#0cxl-CQgwvBw!Wlp$Q+Jzx^tnHr-ch>18G0u#v(N@b;Ti0P`oHU1hU zjjk+{_V(A*y!^N_Bf2#}Xh9JQus(=q)@En&0;}Om#Mg$6%*ab=YLXhW%kS#0K|KAH zmjiHhV3weu{n>1y1%V4$qbY-jKO^TIOyzX|wEGhEWm2>4T=X=OLpjW`gwk_+Q*u-8FzexARoK#gO_K-;B3}XQ zD}VdR{(iolT|>^-Tb@@vA5Z!TDNFk%zt~pmhunVo3tf{&@H6G~rooJxUxN;sPg1BK zhlYjtMi=9rwAgbR(zf+dZ#rIC*@j>-HI38SD&5yZWaHl2*1|h|D>|;v;p6%9`uf@{ zZ@rCosSYQPa`}L!R(`Q%C^t8opI7KIEjG7aWMg(s>k$KELe)BPhJt-lvLQwIHOgG6 zP6;XUnU=|S-ru8Xs1)fh?dyB>lShgM`H}Op+;}066BoE zuXk#h12bGq*U^C8YB>JNUNr!_pz z)(^|WNbZttWr(1g!gB)C5$(al=!oGgHilcQ#{0G7>D#4rPpd^b^37aya=h+XA3xd@ z*yyk|S+}+2-`dv-WK>!u=cc;pKFGCCJZJs}y)L7|+Sk@!{2sogA_)M~dj3l>C)XNg z_B&x3gbiD(8m8ON`qJ1=qNE^iLXU0kT|BQtorprDcI zqFGBmQDY^U+5OXx(nDX`VfswmT6;wBOyENO7tTF)Q%Ls+rVLv5(n(I=?_XsuE!6%U z%$h#>^LhS-ldD=wLgMD|b5r&scFhYrKD0`F`r$!OM&LWW7)N4#Uox2MxbNot@-d7! zdFbHWlVbV7gQt~;&z^?KMG;zM{q~*?jXPugQcKnQkd@}ufc|?UA%t&Y|N25gOvywS zJ%=-gZ{bH?;CaW`K^Ps@OIr8LE*2aVbzZd#Dr#$QFJby6$d6?9boN|(CZdc_Vcn^7 z?!Wq)C?>|Rg|t}bmWZ2H^T#1UYyH#2xX}x0TQ%G1zz5h&evL6>Lz2Fab=^l zwsp7kbiOk-M?{8c)a>Hlm-&)|)g`9Q0!yY(1v>+zX04n{co(mxH zXDFn9;Qp&x3fRFlJ>YtB+hDq%O!DyOB?9I6d2r8qOlcHkUhM5F>w|3f%qE&9db~=^ zv056Y4tj1bVk91}_T$i#o{&PQ$+kRoV`NbhDoBsm851;i@xt&DWsKIjqoe1S=5@9- z{qfLGv`q}38<`N-^B&%_JmI68I`_Fb2Z=aKy#C zOtag2+NSruZhSto_KDbX>pD7D_jmc7HS{Z%5NpfTBF91c+)#Nd#RhqVo}pylRM!;M zW{v;5^aCZ5Uei~;(UEQWVtNE`@P^~orLkDemE{O0X~`Z_M7KGan`cePT@69vSM1XE zkE9X}5I=CUkT#b&dZc3JPp3RPxeIMEdL#*dZN`&BMdk}J$qSR;wkWKLHXdjs-=L3W zGgQzo8;#L7C?`}Y_jaltl9Rh~j;W+ORstde$n5f!Os8Axf6K+Ub-boMKQvGUD*v^* zt_o+wyZ-*6=HCUh4@7TNoh@LlPm)$?eJl@jEIz7EW)zbzu3$x>6g5^MMz;QlzH_F5 zj8)uSD2#bQ3AEEWfx3P|_*^pobK5NE>Q*b1;$TBMayep$di>XwD1Qt)D~tU{ki?`s z6ikFF30H+Br9kdibEXian6F~3w4bJk43fBRGEnrDCG_>6pgl6zqmQ6hh-w63HNXa^ zhq|`5w7pCJZ9><=X$qq(cJa`#NsGld)?`E`skd9Qb$V4tvSb&#?Ie zuvn0ob7jCo)fL?byS$-$7vFzqIA{Dj`^FTh-0ZP&v58dn*KYP;ek~*K=svYUsPjIr*6Diduyck z0_XwCL8RYGq{ro2b4T)dm62FFBR|N}kaE(P5}RKGX$^x~@TXPL_p2_LI{k{@eBS23{!SvvX7#8pVcw@D5hZ{&xYCXsszS#k!YdFMK*W+rn2 zNvl&Z)$gXDrPus2ff0wz)G=)UY(c3n^h90A6+H-egYXLuvXZM0K0Zgg4qR6Hoz~w`w{73S!%ubc!FZj)?2R z?4B#+EwwNiFtV|UINGbNd{z%F2xy-U&8RmLPF8?Dk=(Q400JnD;2Z(cu)5Gk0pizr z;G))(z~2cCT$KM6a1zAzPUuGJx;}UPyl?euKzt+C#bCF}FTAEqd0lZ|S$Sh}vO}S< z>=FI?pw0?=QSQnrwOfunp%Rn`vZEF{#YcYpl6E+_$^;>X!a+sS0<`0~u2>bz64x@*>u5lf}Q~+L#n3QmZK2Ad@FyRXwZh6U6Kj*15Ou zvY(IF9$%9Ej?V>;94eEoa%!G|J zn-@qzMrf$eyWJfN02}ebDSPuDPPJ#(s4ov&AGZm{O_lhz@N1IK_h~Yc{J#3^kKp*W zgI?OO!|g1!*L^XRDkCB>ov%A(?=bWPs>x)Vp_^&&w9fZn<=bRISiuIx zlmMjqDOK<8CQz7Bw0j#T=#?ia1E@mni4&kzS#>v5;72AAy% zY*&fxy3V6>=BAR)d0+UGFnzwGxA3b8y1`yZX3soIQ5uGxiO5yWKz|b8+n^6iF{)8M zf_eRb$M`B153(Kz!M-;tU#dA1FLKzGaqK2nj7;8>fI5OtwX5g2@(UCxQh&H)&~*H2 zqOv!tc|%h0RL$0KFx%XhE;N3XlTwfq^clE|%pO3I z1VuH1hX%cez7-oh#`}uP%W&{KBD7g)-WD9T0KBxFMK($yzahJvX=sh(r&pw1Io&OF z3}O1Q*G6wJVK*d*QGF{vAQ|smWB0&?Vgu2Mio^#PL`G|ME@>l4Q3V>v>`|>M)q37H zyJe*jU%*;hM?YC3va&>49jwb)*yvmfhk$5aFd$!K6a_i|0}dxj6r%;eOCXb8ZZAz@ z0r+$S&oq5==z1&CP0ol*8GPp+bX4 zqMMp3qz%UL@CI_TZnYnB=|yJ5e7R8cZ_H9HMCn~a=1w;cN#30v*ad+&l?8qSV z!G^NW170CSjm!4P*-pzD5#hk_E9IC+NV&$$UpXnVnX1+fIqiux;xCbOwP)F}hb*!9 z^c>5%GRql-YU0lxVzQY9z6a|bdTNQu`K!;(qY_ob}v>_kAwiXasGAGpTqE%K4g54cX8(&s6kK#Iot)c*1`>3zA> zTz_-4B+V@KgE?WMbu=zHrp~FxbZ&Et+Q+j({%?oNvF`-vUVoo5P1z$#AiUV1f{bCCSEb$ zzI{u<7X4es_Zja=o3~j%VlhF2P|ta**}c+coMDQbDc8zcRng(mTPobY2h5tLV{djb zJ-uv9jj}{r-z+3dSia>YF&Hx_zJS=MU<-QWf@g9KhHp{gH1EExT$=R z+8gs5Z|Aba1>VK;OMgI+qJ8l8yhA|pYNwv!>`%j5fXgXvS9bQ^QaFBQUQuzsTZvzx zb-uklJ3;9Ii^1qa+3S|6yoFA@iPEQQLu17WM|Kn+Wmy98I$L^|^0x~eM%Gi=SQaL6 z90Kt~vIAIsO;nh@v2HC6axX5gpyqx{)B4F_QQK>=w^M03!u!?8IrAK+h|)4^kUP+* z4EwhOA+3a&zOU~8Iade|FWA^+IzC9pS3Ny&`rvsjN!IwaG@qoZ_FXX?$`=W-qTVcx z=AsrFo0YK{J60U7MPEq7YrY?l%cS&GdZEp7io^n69o#qa1M^|F(O@iYySrr#>-|)B=^!%>Z;F($85Irb=H?_)+ zdHC&_Ux8I#0W?8pF&v`nVU#Jhg<)xKaHlnyOy0Z`%-;8@4ebR-r-*4 z=e?%KRIX{`0Y(#5$n6z=dF}!`bYJzk?f3nd#v)GeMA~_rJ{oXb{HzTYCrfxGtAt&1 zS362oFK<3ASNypN*Az4Ah`)fU{CW|vv(835_%at`XG?3oHFwj|Ab?vTs z-X9<6mweFK+OpVBALSi+Ceg9|BF(BWEhnpPto{aac;o$0HdzYZS5CH^Z2#E)!wH6O z$5`Fl()UN*xHRjvxg3w#ZJ)qefCb^>b|)!p01D{!yzwEaTo`Z}`>iy4lVsl;@H^KB!i$|unJ3HpefM=*gUOHkWN8&B z%Z`qxaS6M`H!5_Ew9WpA=Y-{Q!i!Z#$JfTQ);G2TuQ#@rZZa*Dk6^qjnq6#tESNrB7{ZbDeJmtswM0+H4%c=+b}dSvHXJ4 z#^L7Z=^mD{YbRRewO{ch)K6%PHQl^#VFX#t)ZL9ccm89XG)~aYOaEC!nO=L%K?D%ZYW`p(_N!$z5yRItJtJ+=*Cw}x zbetxBS&~dt6YeJ<<(mWI`Ud~;xWLxxN@w{W4r}PeI=V(_bO$|dV4<`8q%Wz7BwUX^xV)$JlAEN$|PCpBbbV|m5@pHC? zJTHZ+iaH{LYc<08&vjf%y`!R=&BH^{F&(swqyjCgqZKD1vM)~9SI_>=d(K|#5Ye>f{<>JOVOR_;8~FvU9BaXG?bgU8<}7LY{=4Lo#3BFo z6x7F*$)9D14NRs;)KxpMT7}6!>1Jwb>cTY8G8d|}rz#T{@i=J_@>x^X$fhd&cHP~t zBZCS>jQTrf_%R`RG2CvL=)>&%s38Wf@$cU+`gi}UsaX=&ww}nl5hX-EtoC& zD>Lzb$K*5qkA(K$C%`Msc>kXd38%XGN*;WdNM2p^{H1FBd;cXd5qbX-9ZgHXJ$HPYc%gIc4MN;TnS~Wq64=xyYWrU`s|D1_LYpCrvgrI zui#^EvA+j9fq#zSsCyOTo?iazGfM))5#{OS2N8ObB*Ax(YV{o0m=;*?SAhpP8&tf# z>+puF3jgri9btY!4E$RC`+CVU(=(H4yI=9}@Jp7K zY8Y;o{HrU5`W6i z4%q^$W<;s4irQ)ag`0LXf zv0J;cPr}QtmONypO{qUM59lb63#Ur?NgdDj{?LTfFFHBm!d6xGQmVb#_lFdyh#7XV zK_|BkzgwaL1;MlHm>qsM^XzvYmC17glV&x(HY?42@H559$l_O}4{W!u+9VdQc8FEI zXk2r~W{l^%>299++T*}V@h;%x_x1E-WPddDGzYr39sDkLTt0n2$R0PkixF*r!@^z) zL%Noj$Fbj{93Ihg>`_V6CD8~+W@#PXQK^~oQuSG3hMyHM<75`z$;y54|J8IqcUq~U zH$zT!i#sNU(8KA{^gzMBWOePoRp;aOZ|#>+xNz?Z1eqO`OI4F0y%w}> zn2v(Mr7)e{RmbP_IM~IoFI|e!OK{WM!=FDI} zM^IyqX#H55W)5)?fZez?^QR*5=p_BL-)IewVAtes3wk1b>yY5GR}P7d)}t=6fyr+h z(TVmmxc9K@dd!i*T8q^ZdaCIcWpK@n2^VA<)KZ*W=EsEtqQ6U~zI8Ws#m)3qX{IvO zt!SuoR~u(u6l=Xy*Uz4~R)^2JNqO4vK;K+EOCJ67ywWL2hSp)}eM!y6QaRI3tMV<_ zk*>Lj2nc$c9SU4|X4G)E#&y-ziZOw88E4d-0B4b&?R6XSV&x5o>jfiw1nxB#>K=`k zdwuz~ImyGL)x1Z0AMe70w+(W*TB-v}cn(2T(UV{B;}g$Tqxvb6wxRLJ^qc=ZL$beH zZyCjqg3{$a+XlAF<91C=g8{d^5B{_k^#0yEu7pX>YSD7*1Dm-oEvJ8cCo))^I(TPy z0zaC0$@-jYbSX@Q%am;>0(URr5t zjXvbsk#Nfld+qpQynk&>dFNZ(Qc%!@@C(vsAI8aV$ExxeKZW(a=Qw@i@_36(c*OIw zomcDX`&fz#$r6sz_GX!8=dZc#zTCpFCwiRPyAh1>aCe=CR!rgAjkaBHJGlKf*2WHZ zPUSu-Wl}QJdiB^#*Z=AvRg~hu7|EGGtJiQohBo2-fgmDH{v0+Iy}qtU%rPTnLNb+I z62Y@Anyd|y2dAqTAH2P+H|rROUG}Ilr=0O%@f0opR&>+!zB^|)VE+W0bn1zO1Q$td zw5gVBS4L?<(yP`STcsW+Z?6HfwuqOx)y9ol*WZY13)2!7$3)5E7(N&wVzs`tGa!%p ztuMO=x&HBPI6nTbjT&!5UW@xzA4DE)oR4swoDvfAxey#9N-vdtun}Ana7M?lXY%6T z<$)OLNqZ+b`^@#{{rPSWL~Ql0653)VFf=R+9N(okU8m{kd1%=xEY)nj7$)lBFk9IT z*FY3!pl$jB;ho6xo|T=mcINcIBlgGuMZyMaOSM?nM#H-}7|-4zXVP}8xr*#4`BV7B zaxwH*Pp=mHFT{sv05q%YvFmm^%}(1X_TXeM^W@@E2+1F_2lZ7Umg`6Q||Ou35+xfHf9 zo&bVeHxgDWzCx$ZA$F@=s3U-Bf>}wIMp9z=e7cKggIzJ=?CBORoG=4;=V{1-hYt(T z@rF6h{?w-azSvK_*1nR7c;WsH+&hVZ|n%T6YM-%ms``! zC%itCTxUOWwyE6GR;oSc8;^8tSS?MQ3t@6)S%!vtm(H!fQoRsQB&J>nxx>Tzlvv7} zE5PEboEY`e=H+V$51J4{DkB1D?-KY;`!*+^UWUyYmWD!_-zAOg$%YVESjZvv+B_twIrUuN+2?=)&f6rN}oWI0d^pBhFi-*AbPb>gKW&;In#@c7RP9V zdzWk$xGVPc5E4r@7?gTe!7BOwl>i^xsf`{xz;N^k$xbfNp4jnFn;Hw6!`LN}7rEt8fB!$x%U zqzt2^j)_mqEg?D?m-TD5!4Ye&F2H;>iikf`zZ2+PP5AVhJ%yVMv~|&C3|_p_9aK=f zRPpmWO9m6Q3enilo{k>UzU+s&it0b+n-&phuYXrydkQ^=t;29g7Yz=hHpz1LiVD9b z5+Z2*ZyIU4M^bEQY&JcXn_MAO`m6H$;V&k>n=tDJ*((k&CN4fKVsCXFBqz!!z46g{ z$;Mdo@mj3#)b&7NYx$+dp%FQIc0Tt^H`cR;B<9lLNogE;R#MHX7+IzokFZE4@8O*J z#vA9y>-8aNI|XYbjVc^c6--VUjIEDjf(kuIE~Yk*oGLkwxKUE7un6tKU_Jp`5i3Ko zDmp4=XWlPcn9>dxTaNtN!GnRXmc)Rd^ z?qpu>ce&fe$#mFpRJ; z#}o3*h>}X$s+{S@GEW}E-GwuEcRCfJRfw#(jrf^T(C}@Tt8Tm7Sas+36_1*ZPSM$u znm=iZ>iWr9%w*I|h_xW3QE{Spks7I1329+xW;xRD7jVjOQc%-@So08PP0%rHfVcF| zxqmQ`tstRXCC_-zt3y>e%?sWiik!|&FXxtt@Ffvt%PH6>n@(6-=mjRI8&dYXN8keD zRKWX$el18M1CrTTDD~&Bke*vHph1Le7;p#cO7Y0h3xOKLY}s@4f1ydZE(XnO>1^vW z*nH4VKw_hmrwYyhKqT{=nj?Qw5r4N$Bu8WO-~1dHcFWuC)VFcX_3}gg!OuK%9G>oJ z*l2NrWwF0LRT&@TO8ybK{lKZaW;mz^EqtE6Dx;!g(^_pd-#Ey=`8Ma8OD z3+~^iE_CMfFYzmT4)G}}q^s)~W&J;GeRWt=+Z*m6HUcUZAt4AzjdaR@l(clWbW7(T zjg(RY(jXu)v~){IcgG;zJq*oVc+UAf_dd_%4;Uu)?7j9{U%cyMd@o!^n8__vuMhUicw zvh6IxgsD0#0FCg3RgKQpgi+F51C4wT5EIS?CIJ=xwCR-o?sgn2WLhj#>W`wQXSPA6q| zm24wNR{vINz5kXC7wm-w6J^R*8^4SE_&5KACU6aUEJ%PeO;;! zto|xGiS;aQw9c9x^X++u#CvtMZ%ExivI4Jk&3xSgIfdP6FvTxlNT1a5JG_``}D zY@=l6FbfjiI<20cDTNPDYZK+V!qi$@~*1HpAlEK{Y62PKPTWDl8PC$iAR5eMW5yk8!wQ z0|{U07W=d6&Y#g(Aj(+COlX6pF*ZK0CQOL*JmCU&O!$y28%6a#Rq-LaE>+Krr(#qN zNEQ?l;{7s^&p{ArGVS)oGul$k_JQFImk(h5S*^L`(9`eWn*W+rD+wZoDb>IINlqCJ z5^s=N&yDj9k^4^!XCqzjRisWd1A`-sPZ*q;=|9lqzBefUg9+N!+V1D|8?Thu{pkgA za`iM`UA52ih^vc%f%bQW@Zm9QY;0VAjvI`+N9*1D>M_^lqQat#;!@v=U3{XvFvp!8 zYLLv4Oz{yO9pRjGzc7zpSe8$S6fbd1ylpZ#Gpj1t`Vy?|+d-S3jDurFJvN_)ZM3vN zzpSL95p#gApfPH8mg;7tRe3B2KINBl+g!}JsT!ZWR8p3~(ji{J^}63cjS$J>9R1J5 ztiSn_g-!MNW6H{oL`o@ZmIZ3w=4(0u=Lcd-!RUAqacw3rt9)C~86r|705PDImNw;d zz6Wl1aoSr&?l>@a3LWj6>K)jCEz5KXU^>|?h9(GRvr&L!fz`F92|7xpNTc+?bk73U zI5%bS*^afSu(%Og!-zb+*En222wBW|A%U~^yUr$NDrz6hoIlTrdc4{$BnxgO^}(5x zT*f~n@C%Lx4VsRWQ9>%8_S)HO-u?D-^POsW-6Lb;uZWnC^ep_u?WCp(PX5M@nMQJs z%aMLnUaqU_ms933ictnZMo?kC2f#sUwlHW%b1P&2YQyt*>3^e){P4HA!)Eb3SI#7j z!hEoUJ2%Jb3N=H&J2Rlf$qJ`1Yq(8aTV9&|tCI1=Rqer+?#K|>+j-@Bb_>Y=ZcGh? z(}KXL&ItWebz&@y?HF@5Hfr|MVU3Ejd1_#-s#E`;w^R6fd-v~Wo6!R_S;JS0@9^H}$kO@*ksr%xbI|?z(=D_#%%$%F&+hCm!@slUTK@s;yh0Fjp zc_H(C2_$S+CsQ6)oFm^o^JW|r!%qA9_=$26TnDfCM)p6i{(VadpOcJ;VMbNWwqora4AyJ+|9$@I<0c^7pMwG^{6lsmw zyGzAn^%@fr|EaQakr7v(4LLN#nx}Dz3-NG8iL8$#TA3tFP1!qc&UlqX^4i+^SQ`fZ zH)`OL1L4mAhoz46mwi&xaDwi{+BvimH8FWaO>OkSFvI5L-~q?Q#d89hF0)cyk-;4_ zZl>&Z^^cr<(8z7izm40`f)nRykI_!b>ED5YQvh2gvpMGL)HzZb7-YU?L!>0=N+$-t zj5`BGWx5${(B`5Ch1ErcWu|L=|J@um7H|rkG2O=JCyS~YlPvo|PODQcUX8oO#SfV@ zxj?mrsG^H<&`8>FsmtZ>oSI_BLu$2yl=s=i2Jz+E!a-1XeX;$>BeNVjv?a(y(gMOP zn|}Ov{3#|;!w6OuroCU`$)HJ+^PgisVNu8I!M2Hsn4O(ip3RJm780(C=)5k)JmjVc z!8jqje%5uly}NsMu4NGP>Q0e=oWjQ3f`Z2G?BJoPYt7AaxNUTC2nk>P7-p}ibR&33 z5`<78hbSpM1~rl)lJvyt{$=c3Dj9SOQSp5@UKM@1dSGg-7cR{7))Pz_k!+pQyNVvkdA_359pl>8< z)p3Kt!h+ag%MM7Y|MY23;1PueTMMN3pq&tQyCgxA@HSSikS}ATe|d$gg)oO_p!m`C z{KWcwt~qF3uTmSd>TfYo6Co0;ol&ZoVXXlA7jJWfw2>wI`1wWGd2CK(xE~&OqQmC5 z$(lA)>YAsg#ZZoi8@4m9>lDSsP{IS;J3;yv)iCTY2t-RTR60eifYFLbCJ<)%|ASo3pZGjVltnaLA%^oC9;wXvw8qVa5b6@Q(oPX(#oJlZ*v zx|Z`;FEcZ9igy3DYpkqxGtPEA zbV?MtDv2#aiim`Z4~;mW%SPVJ5s$-0TL__DVYySjodv2+8r0ttzpT=#-a6YJE`X>W zc2}S6{JLglW_GV94!z*+iYC(9z5;YFT;vJx6qGgJ34FNa&xg*bn;wS!=wH6@Xw-~w zw5F^u80F;$J7vzzJUuSq08%RcTgv6?o|GrbwXaSnvO3K%Enhz zlhN?;`GYy{dEvr5S#~L<}YVg^q!?8%5&bv4*lXK`X1=U#=N^*KfsYge}0NeCtJpZqDq|G*SWYVUeE7V z9-Ri*uVY?`I4Y z9^TyS)7*M!+_e$_s-RQQuRkj?U;L8=8B5bL;xgGzBy`{x8gW`U{G$2ifr_u-s>tc z;DTRJ5p%c$nRO)Y$*rS+*SZZyJo5>a?$XoM_%s}4Fi0K0AUY-(5ccsI!)EQZ&S3!P zqGjyLs`+uB{+8HV9*4HZU&LO3v54?o<5GIO2g&6t8b(y z{@$K}Uk{D&=#Z{#@Dr+j)zjVDF+v)?L~*^;G(6icL$DrX6aXow28^;)ch@c+Zd<*y z+oQWJxLS6VIe}AAI3R=_R%6zTKnMvM5rG|?pq#|&B;cFHWhGL$czxy)EChlzbaA{V z9C8S@(Ld;**ePkHzE|X)qofA!)U|KK2ekkP1*$I4RjDyu?COMgFm?-lNGgxvd^osV}bJ(m*8pY z&Akj?9n@)=sX`V-%7x3$!zLJgE9^X&=e5JEPcmex0pvTq%kEr{ooWJE@kAMo`AIT`sq~|CG;dBo70pm4edXwSWdcnz)@R>XU z9bmjk&lWT+w5CGp$2xm9gN=6$=7UIiVuI|R5|Z#9txG7^qjgT+oRQt^aem;{)WxQ~ zKA0(!2RXD#3UT+ulfxF(bn1B28VZ3Bc8d!ODw3#7*1K|_sz68U@q#uRe@WUeRw}xT zU+EN#F)hRi^Q4cOhGgqGX|W=lUd6dku6ypTt&uegU6$v_L=etSE|W1kztQV?JAq#v zzaM}pG&#`{#wBV0eyl@uz+L0{7*=~h<|^wEK~Z9a{XQbOGeIvAK;z6S=0>J1leg>_ z+e(RW&6n#t%Ahcgiex~IOr1}Nl|Hx6uvhX_zbi?aETBHe?6V6l*qQOdB5}Q9Voq%P zPjd`vt(0C`Dv*%->sM%3V@=X!ZwpjM_-v0pISAmH^IM~^lg`Tj45JZYbV6BKSylyC zGKf0UZ%RwQ!3LF0J6${FuWQMSWqzk3WKW8W+?M+{QAf0%^V%_T!<%eTj;BGsv);~yg<4#iVdY;4-h&_Ff~VycjqG+;u&zy%77!Q&$8v#ayJcoN*t=BG zvEax%SPZj&PA|LD(AK$rP``+IhZIfEZ|rGFq$tq_v}n@a+>4&)V=bT9QMaC`e%@;NO}w7F4@Lgi)}S_<81t4?3P+4$~|Vy_%G++$-mD}(>^@}u+ zoU}csjjFmd2ACP^N)Q4tgP)Kk^df4j7Fv4`fCVg?TO{N=Yx%}whXJfIK;=(HvA#N> zunm6X=7P!9*xDaQA=?e`E=0$`!wx0CljuWYewTR0U{u?9Tk+#L2HBN0y&{-XQi?)kK9w>6 zbvLvtqX5O6@tQu=G#NRBDpv>OHbtI>&bc|psQmI0@1HCzkT+be-BeZ<{yBJGT!vpH zSNVDVc)cUG06qP>Yq*Bm__KrRCpePRTxOx_sF7?1IRBZ=b6B2c1BiAQ)b?(I_=aLI z>oQ#XjTm7#3~L7=3vM#M(Mr5YyiHxt+i+?vm~?;K;D4$ zMi=mg)d8(@)y^hPc<~8xIdFwLrE!38bPgcwMFupZrm&;VyYd8KeQ5?Nq^diK7m^)I z;N3leOy84v8pl0$uR9Gy|Caw2JAg^e!fn01HI%60=4o*Kx2+-YXQjc?4xLJ-O3q2k5;u53+wvQyTMd{p3vYP|VEO zgZhylf{BUKSijrF3oy>ghZ&vcd~0v47~u7~M%}LAA~@x&HVxN1IwX!j{Jo;GK0^I2 zIC$UMENOjlHbwHu^_#wXC*~-&qpUi4l2OiZ9Q>)v7m`o5_2+!EUbB|yE_TR{6^oSH zeos%Qhj3S->+wThff%)-cB0(}&3%=S%?(RS5bz~g}+b0^p&7{n|v#>J}M`e=Po>E?77T0pe|w0b!v9B%KE zUn&Od9E-E7sHy(j_`0fQv%Rk#aBlzTL~K$gqV^tP6dmH|c6dM{hD{UBS<+ zMPHn3r4xn|fLWVNsIiw7Jg>a|yBpMG$!d#g!{Rzv$1ndN^%vv=(^yRunLx^=do5Sx zlB_L!TTkpvy{dIsCnrrAEYR2N?2AL$J|B#;{%ma-*qnuLR_Sn=ud|~VR8*`MZ>3Pdr#-|JL^fs6C`W#37+RaXhL1%Vjy=0dkXma}x?zd$0P%eCQc7v&U`x zaJ{BxN2-J=Ck4azzW19{Il`)BKB(^T}4`+@^u*0PP9Zw12ep%k`T4F6DuI zQ0cpX`aoTPIU*3VpzONT7`;^Ly573Tki z{hE>bLmgLka*H(AP4Rfz$+*~T_yrxE1n$X)4^$;q(<}li$(2q8%tJ#%J6mUQj~*{# zRfVR2x{6Kx2Gut9HmTq>PLd-(oFD*yo|_|;({(5;9hI(?NXM&!oI-~w(ZHVYYy9Fo zfp7p`EiFC0-b}UZmt<>IN*2ob7i;mFccLRAgu54Avwz-&qLwGtoosAU-7tSl*vyvd zIVJ1$pWAP@B+pmLo#I<$d|!}wxI9)gou~fuzLy|2&C2APH|JPfAMI)4;#8E+hVu)1 zdwVkr`(E2>X=-|jrUM8)x(bwLoowoQi%|?NONw}(`2-=!t#!d!lT$+iAy81r+jzcy zcE@Be34kH0V`JL*LdaZY@6VAZLxShi48PVliw1fgl|#eHo;Nk| zL)AStWki24(#ucZ4nj@1nJ>P_)KzifP*auN>Z!vzxzOqU)-`(R^2P3S&UV*Y6nJ#- zUDOap)%UAf7R95r>VV`fu=m@xh=EK)NxMzT>GwIwA3hb$ZvSI@uf@go$eFafM zqjZbj{CBtd96FG|{_oCx0C&y{^z!!p{Fy}=sogyHvS1%w?6h;R(>BxY>!&3 zbIicVsK*Oy9(|rE?cp~xRC1Mq-y+-i9PUJ(X#{rT81=D~GL{%aYgf&uixYm)j`DB|L2AJDS5@qG zcTN3+gJ}#6Unank!R#>VOF0EC%unX&0Mz1#++Uhz(LQP4zvMuB6#NbJGS8W*3Dg!OZ zmAfU*YuBO&GA~f3!v~NntTC7Q(Bx}7yByZMC< z10FJOs}J-KB=-kjR^pQEkPoLE?UJob6Q zEwz$8$oW4bODbx7`snUU4C?ZlRyE$AQB({lb8W>^BLALF8bF>@)S3936Aq)+5EDzw z&Pi*eZuhsB8-Y5Kj)yNTEH{EShmwm7$XX_=;~J0n`30L>a9dHZ?>$oh#0d;04haaz zznQMC^=n5DWA{Q2y1M!l%rSuF^jBlF?mcHA_%R#T>W-JR!bpO$@@gKzP)|xN z#>3Xu=+sM1q%E>MsdV_d$4lC5ZEPr5ig9Y)LNm=rQ$w=ew4M z>G;p^{$;u-2@v6Ds8!3`5$kK{`WF z5s-jVo!eh)wsakI-u$^cN;`$pr^pe!b41VFa;)5Ty12s<903N|iLp)Jx=E~jvh zsULMlMoagIdHnQ*vWDEIhK4r2?f1IRCEg78^AjOX%`|L`0$fckK8_ z;jLzgW5swV<*z{Rf>qUgP^Z)>2f}RAE{_vWCjKfnTlanS6W9p}5J7!g2e}-V^HfqT z&%a@?6H|61*w4|0noLwNmbun2GLaLM{$MUSyKSMZ!ubHw#&OXE#GzM``q@7sF18dv zq;Umj0J$8*>v<=?@#%o<4glRcX1;=y?@IbE0OFj`t6~GX3;TAedq>wtJdSuY5bj>V z*5t!V(I>-g=vRzg%kyrx^yXJWfWQTaPykQf0`h5K?D}YnT>X4mPZphy{|wWpU7!Mv zchQ%#8w6g;$V{LgFzPRo(mA4p)cuhIK9k%O!=b%S9His?$mmH7(4^VE`eTz=#2XUZ zPzL3v)?4Q>!rcBf(jI*#+1?!T?e_{|X?1H5j#|1TC2aGToiPGuA5tZY3&!j9>N!;H zwILS(WS09TqOVjVMuvWLH1QGMnlt>dVaJQYdo+>$DZ^&F77zS`vR@sWK5Yl0Hn60g zi>^pGy}T&;>T9`<1(a`Lv0n-9Elm|4d4Sv+f@yf77!&;^ImR0d$&sD)T>n`IfpRym zeJg#%V?tw@J@w=fuG8!96C#67S@9g?r0OfO(>{JcBGWk5Rh>|`j{zV6iXoDV2qXdm65%I4kr z#CT7iwqpQUXGJ*r^cH@>dYu)0P7LWmTD7C1nj?X^nOHiIZ1t0}iSQ5XN zmBSX9j>l)PFWGiJOCE3~fZi>6;zbc!utZ$nB)r(o{zELeK8GSeV+ zy>Dku49R<*0MgJK z2X?_t1H%0d_q2d1!~2N0*;Hwx{;Jd;twbK%1r)(NIMV0IR4Eb4*z zBawvDZkq77;(@%w6Qv(}a@MwPkQ35kidK*gQ4b%lJp+a76s2m~_q`4RSB-T2W4iXQ1<3 zoBXJvf||0KqK+i-q$tu{OG{1$eOc}VePbJ=_slEV8t-+NK(L%Zs^0*CL~fZ#eA!4d zwWDzw&Y*(|b^aOF#f?x6>8bc5>?!*U=FjSzjU!32r2_-rpV=i+4X5JlZ^Sw+&D&_L zL-C#bZDNUMmJ>Y+e;DVk^E~yu{cdfi@hg|R4b}ED2xRBkA{!*{J)-WMh2O>47Ea~9 zU1CsVIy&QT$bHy;%XVYl50oB#B)R)1F57<*0)gl*zL&u+%!|fxp;4{lNnQ;;7Yk!r z8xNzYM)1}D93edoCkaJQ;}pDDLsc5fsTF(6noz#Mto?CLb26~!8;*2}Qp&Z!<%B?} z43^p0;t~f7*>q6xLGubFE^Uy&z(7jupDUInK+OY@-`SRgFqq3mMGG|S70uYFoEkgqyl*xiQ6Ks&*pRzSO5WEY?s$APqTJ&yTxh zrQwF@4w- z)nUg&E2edSCQ12JMc?9FUH=67DiaHmqF3x%tDPJlAFe=?rLRAiT(jSh6J_||9Q!#l z-xT3tjSx}EKAP)tSyQv}@^$N_%OksebPXjIy(ThB1de<($a?@OcK){5A zyB}Os_YcVTa6f=RD(-_{;K(GjS$|K-gy-tr;72+u;pWxflvT~Z z+rq-lD?xU)4jYHwy_+ONA=AG!B0V=)pBA(Ql^`N|riwfiHpYt>SG!6OR?V`;Ow@_o zHJ0R}j#jcf`%{a=wr9)D+~s)9zxRXev(IX81=HJ($Yp~V-l@75QoS|eeNd+ zva_1Ff9jakb8H@xbo6qy3yJMf1n;#0uUm(-mBSLHdF(m`tVza7C|A4a4d!>d!zDXi zHb)Y_g9Sl;W0CFtg9j?sGhw-$Pnoht9fl$hgYyKD%-<@mubC83L$ip#mMB|FYRiP9 z5+#yKCX~gsWl&ZEJ!KN+djpumT2#bxUTGOsuS*sdw}wVXyCh0u&)N(EA#Ye XieJD1Hfz-HtN+DCWQ6nI>U#el=d025 diff --git a/neural_lam/__init__.py b/neural_lam/__init__.py index dd565a26..da4c4d2e 100644 --- a/neural_lam/__init__.py +++ b/neural_lam/__init__.py @@ -1,5 +1,4 @@ # First-party -import neural_lam.config import neural_lam.interaction_net import neural_lam.metrics import neural_lam.models diff --git a/neural_lam/config.py b/neural_lam/config.py index 5891ea74..d3e09697 100644 --- a/neural_lam/config.py +++ b/neural_lam/config.py @@ -1,62 +1,171 @@ # Standard library -import functools +import dataclasses from pathlib import Path +from typing import Dict, Union # Third-party -import cartopy.crs as ccrs -import yaml - - -class Config: - """ - Class for loading configuration files. - - This class loads a configuration file and provides a way to access its - values as attributes. - """ - - def __init__(self, values): - self.values = values - - @classmethod - def from_file(cls, filepath): - """Load a configuration file.""" - if filepath.endswith(".yaml"): - with open(filepath, encoding="utf-8", mode="r") as file: - return cls(values=yaml.safe_load(file)) - else: - raise NotImplementedError(Path(filepath).suffix) - - def __getattr__(self, name): - keys = name.split(".") - value = self.values - for key in keys: - if key in value: - value = value[key] - else: - return None - if isinstance(value, dict): - return Config(values=value) - return value - - def __getitem__(self, key): - value = self.values[key] - if isinstance(value, dict): - return Config(values=value) - return value - - def __contains__(self, key): - return key in self.values - - def num_data_vars(self): - """Return the number of data variables for a given key.""" - return len(self.dataset.var_names) - - @functools.cached_property - def coords_projection(self): - """Return the projection.""" - proj_config = self.values["projection"] - proj_class_name = proj_config["class"] - proj_class = getattr(ccrs, proj_class_name) - proj_params = proj_config.get("kwargs", {}) - return proj_class(**proj_params) +import dataclass_wizard + +# Local +from .datastore import ( + DATASTORES, + MDPDatastore, + NpyFilesDatastoreMEPS, + init_datastore, +) + + +class DatastoreKindStr(str): + VALID_KINDS = DATASTORES.keys() + + def __new__(cls, value): + if value not in cls.VALID_KINDS: + raise ValueError(f"Invalid datastore kind: {value}") + return super().__new__(cls, value) + + +@dataclasses.dataclass +class DatastoreSelection: + """ + Configuration for selecting a datastore to use with neural-lam. + + Attributes + ---------- + kind : DatastoreKindStr + The kind of datastore to use, currently `mdp` or `npyfilesmeps` are + implemented. + config_path : str + The path to the configuration file for the selected datastore, this is + assumed to be relative to the configuration file for neural-lam. + """ + + kind: DatastoreKindStr + config_path: str + + +@dataclasses.dataclass +class ManualStateFeatureWeighting: + """ + Configuration for weighting the state features in the loss function where + the weights are manually specified. + + Attributes + ---------- + weights : Dict[str, float] + Manual weights for the state features. + """ + + weights: Dict[str, float] + + +@dataclasses.dataclass +class UniformFeatureWeighting: + """ + Configuration for weighting the state features in the loss function where + all state features are weighted equally. + """ + + pass + + +@dataclasses.dataclass +class TrainingConfig: + """ + Configuration related to training neural-lam + + Attributes + ---------- + state_feature_weighting : Union[ManualStateFeatureWeighting, + UnformFeatureWeighting] + The method to use for weighting the state features in the loss + function. Defaults to uniform weighting (`UnformFeatureWeighting`, i.e. + all features are weighted equally). + """ + + state_feature_weighting: Union[ + ManualStateFeatureWeighting, UniformFeatureWeighting + ] = dataclasses.field(default_factory=UniformFeatureWeighting) + + +@dataclasses.dataclass +class NeuralLAMConfig(dataclass_wizard.JSONWizard, dataclass_wizard.YAMLWizard): + """ + Dataclass for Neural-LAM configuration. This class is used to load and + store the configuration for using Neural-LAM. + + Attributes + ---------- + datastore : DatastoreSelection + The configuration for the datastore to use. + training : TrainingConfig + The configuration for training the model. + """ + + datastore: DatastoreSelection + training: TrainingConfig = dataclasses.field(default_factory=TrainingConfig) + + class _(dataclass_wizard.JSONWizard.Meta): + """ + Define the configuration class as a JSON wizard class. + + Together `tag_key` and `auto_assign_tags` enable that when a `Union` of + types are used for an attribute, the specific type to deserialize to + can be specified in the serialised data using the `tag_key` value. In + our case we call the tag key `__config_class__` to indicate to the + user that they should pick a dataclass describing configuration in + neural-lam. This Union-based selection allows us to support different + configuration attributes for different choices of methods for example + and is used when picking between different feature weighting methods in + the `TrainingConfig` class. `auto_assign_tags` is set to True to + automatically set that tag key (i.e. `__config_class__` in the config + file) should just be the class name of the dataclass to deserialize to. + """ + + tag_key = "__config_class__" + auto_assign_tags = True + # ensure that all parts of the loaded configuration match the + # dataclasses used + # TODO: this should be enabled once + # https://github.com/rnag/dataclass-wizard/issues/137 is fixed, but + # currently cannot be used together with `auto_assign_tags` due to a + # bug it seems + # raise_on_unknown_json_key = True + + +class InvalidConfigError(Exception): + pass + + +def load_config_and_datastore( + config_path: str, +) -> tuple[NeuralLAMConfig, Union[MDPDatastore, NpyFilesDatastoreMEPS]]: + """ + Load the neural-lam configuration and the datastore specified in the + configuration. + + Parameters + ---------- + config_path : str + Path to the Neural-LAM configuration file. + + Returns + ------- + tuple[NeuralLAMConfig, Union[MDPDatastore, NpyFilesDatastoreMEPS]] + The Neural-LAM configuration and the loaded datastore. + """ + try: + config = NeuralLAMConfig.from_yaml_file(config_path) + except dataclass_wizard.errors.UnknownJSONKey as ex: + raise InvalidConfigError( + "There was an error loading the configuration file at " + f"{config_path}. " + ) from ex + # datastore config is assumed to be relative to the config file + datastore_config_path = ( + Path(config_path).parent / config.datastore.config_path + ) + datastore = init_datastore( + datastore_kind=config.datastore.kind, config_path=datastore_config_path + ) + + return config, datastore diff --git a/neural_lam/create_mesh.py b/neural_lam/create_graph.py similarity index 74% rename from neural_lam/create_mesh.py rename to neural_lam/create_graph.py index 21b8bf6e..ef979be3 100644 --- a/neural_lam/create_mesh.py +++ b/neural_lam/create_graph.py @@ -13,7 +13,8 @@ from torch_geometric.utils.convert import from_networkx # Local -from . import config +from .config import load_config_and_datastore +from .datastore.base import BaseRegularGridDatastore def plot_graph(graph, title=None): @@ -108,8 +109,8 @@ def from_networkx_with_start_index(nx_graph, start_index): def mk_2d_graph(xy, nx, ny): - xm, xM = np.amin(xy[0][0, :]), np.amax(xy[0][0, :]) - ym, yM = np.amin(xy[1][:, 0]), np.amax(xy[1][:, 0]) + xm, xM = np.amin(xy[:, :, 0][:, 0]), np.amax(xy[:, :, 0][:, 0]) + ym, yM = np.amin(xy[:, :, 1][0, :]), np.amax(xy[:, :, 1][0, :]) # avoid nodes on border dx = (xM - xm) / nx @@ -117,19 +118,19 @@ def mk_2d_graph(xy, nx, ny): lx = np.linspace(xm + dx / 2, xM - dx / 2, nx) ly = np.linspace(ym + dy / 2, yM - dy / 2, ny) - mg = np.meshgrid(lx, ly) - g = networkx.grid_2d_graph(len(ly), len(lx)) + mg = np.meshgrid(lx, ly, indexing="ij") # Use 'ij' indexing for (Nx,Ny) + g = networkx.grid_2d_graph(len(lx), len(ly)) for node in g.nodes: g.nodes[node]["pos"] = np.array([mg[0][node], mg[1][node]]) # add diagonal edges g.add_edges_from( - [((x, y), (x + 1, y + 1)) for x in range(nx - 1) for y in range(ny - 1)] + [((x, y), (x + 1, y + 1)) for y in range(ny - 1) for x in range(nx - 1)] + [ ((x + 1, y), (x, y + 1)) - for x in range(nx - 1) for y in range(ny - 1) + for x in range(nx - 1) ] ) @@ -153,46 +154,82 @@ def prepend_node_index(graph, new_index): return networkx.relabel_nodes(graph, to_mapping, copy=True) -def main(input_args=None): - parser = ArgumentParser(description="Graph generation arguments") - parser.add_argument( - "--data_config", - type=str, - default="neural_lam/data_config.yaml", - help="Path to data config file (default: neural_lam/data_config.yaml)", - ) - parser.add_argument( - "--graph", - type=str, - default="multiscale", - help="Name to save graph as (default: multiscale)", - ) - parser.add_argument( - "--plot", - action="store_true", - help="If graphs should be plotted during generation " - "(default: False)", - ) - parser.add_argument( - "--levels", - type=int, - help="Limit multi-scale mesh to given number of levels, " - "from bottom up (default: None (no limit))", - ) - parser.add_argument( - "--hierarchical", - action="store_true", - help="Generate hierarchical mesh graph (default: False)", - ) - args = parser.parse_args(input_args) - - # Load grid positions - config_loader = config.Config.from_file(args.data_config) - static_dir_path = os.path.join("data", config_loader.dataset.name, "static") - graph_dir_path = os.path.join("graphs", args.graph) +def create_graph( + graph_dir_path: str, + xy: np.ndarray, + n_max_levels: int, + hierarchical: bool, + create_plot: bool, +): + """ + Create graph components from `xy` grid coordinates and store in + `graph_dir_path`. + + Creates the following files for all graphs: + - g2m_edge_index.pt [2, N_g2m_edges] + - g2m_features.pt [N_g2m_edges, d_features] + - m2g_edge_index.pt [2, N_m2m_edges] + - m2g_features.pt [N_m2m_edges, d_features] + - m2m_edge_index.pt list of [2, N_m2m_edges_level], length==n_levels + - m2m_features.pt list of [N_m2m_edges_level, d_features], + length==n_levels + - mesh_features.pt list of [N_mesh_nodes_level, d_mesh_static], + length==n_levels + + where + d_features: + number of features per edge (currently d_features==3, for + edge-length, x and y) + N_g2m_edges: + number of edges in the graph from grid-to-mesh + N_m2g_edges: + number of edges in the graph from mesh-to-grid + N_m2m_edges_level: + number of edges in the graph from mesh-to-mesh at a given level + (list index corresponds to the level) + d_mesh_static: + number of static features per mesh node (currently + d_mesh_static==2, for x and y) + N_mesh_nodes_level: + number of nodes in the mesh at a given level + + And in addition for hierarchical graphs: + - mesh_up_edge_index.pt + list of [2, N_mesh_updown_edges_level], length==n_levels-1 + - mesh_up_features.pt + list of [N_mesh_updown_edges_level, d_features], length==n_levels-1 + - mesh_down_edge_index.pt + list of [2, N_mesh_updown_edges_level], length==n_levels-1 + - mesh_down_features.pt + list of [N_mesh_updown_edges_level, d_features], length==n_levels-1 + + where N_mesh_updown_edges_level is the number of edges in the graph from + mesh-to-mesh between two consecutive levels (list index corresponds index + of lower level) + + + Parameters + ---------- + graph_dir_path : str + Path to store the graph components. + xy : np.ndarray + Grid coordinates, expected to be of shape (Nx, Ny, 2). + n_max_levels : int + Limit multi-scale mesh to given number of levels, from bottom up + (default: None (no limit)). + hierarchical : bool + Generate hierarchical mesh graph (default: False). + create_plot : bool + If graphs should be plotted during generation (default: False). + + Returns + ------- + None + + """ os.makedirs(graph_dir_path, exist_ok=True) - xy = np.load(os.path.join(static_dir_path, "nwp_xy.npy")) + print(f"Writing graph components to {graph_dir_path}") grid_xy = torch.tensor(xy) pos_max = torch.max(torch.abs(grid_xy)) @@ -202,29 +239,29 @@ def main(input_args=None): # # graph geometry - nx = 3 # number of children = nx**2 - nlev = int(np.log(max(xy.shape)) / np.log(nx)) + nx = 3 # number of children =nx**2 + nlev = int(np.log(max(xy.shape[:2])) / np.log(nx)) nleaf = nx**nlev # leaves at the bottom = nleaf**2 mesh_levels = nlev - 1 - if args.levels: + if n_max_levels: # Limit the levels in mesh graph - mesh_levels = min(mesh_levels, args.levels) + mesh_levels = min(mesh_levels, n_max_levels) - print(f"nlev: {nlev}, nleaf: {nleaf}, mesh_levels: {mesh_levels}") + # print(f"nlev: {nlev}, nleaf: {nleaf}, mesh_levels: {mesh_levels}") # multi resolution tree levels G = [] for lev in range(1, mesh_levels + 1): n = int(nleaf / (nx**lev)) g = mk_2d_graph(xy, n, n) - if args.plot: + if create_plot: plot_graph(from_networkx(g), title=f"Mesh graph, level {lev}") plt.show() G.append(g) - if args.hierarchical: + if hierarchical: # Relabel nodes of each level with level index first G = [ prepend_node_index(graph, level_i) @@ -297,7 +334,7 @@ def main(input_args=None): up_graphs.append(pyg_up) down_graphs.append(pyg_down) - if args.plot: + if create_plot: plot_graph( pyg_down, title=f"Down graph, {from_level} -> {to_level}" ) @@ -363,7 +400,7 @@ def main(input_args=None): m2m_graphs = [pyg_m2m] mesh_pos = [pyg_m2m.pos.to(torch.float32)] - if args.plot: + if create_plot: plot_graph(pyg_m2m, title="Mesh-to-mesh") plt.show() @@ -395,7 +432,7 @@ def main(input_args=None): ) # grid nodes - Ny, Nx = xy.shape[1:] + Nx, Ny = xy.shape[:2] G_grid = networkx.grid_2d_graph(Ny, Nx) G_grid.clear_edges() @@ -403,7 +440,9 @@ def main(input_args=None): # vg features (only pos introduced here) for node in G_grid.nodes: # pos is in feature but here explicit for convenience - G_grid.nodes[node]["pos"] = np.array([xy[0][node], xy[1][node]]) + G_grid.nodes[node]["pos"] = xy[ + node[1], node[0] + ] # xy is already (Nx,Ny,2) # add 1000 to node key to separate grid nodes (1000,i,j) from mesh nodes # (i,j) and impose sorting order such that vm are the first nodes @@ -412,7 +451,9 @@ def main(input_args=None): # build kd tree for grid point pos # order in vg_list should be same as in vg_xy vg_list = list(G_grid.nodes) - vg_xy = np.array([[xy[0][node[1:]], xy[1][node[1:]]] for node in vg_list]) + vg_xy = np.array( + [xy[node[2], node[1]] for node in vg_list] + ) # xy is already (Nx,Ny,2) kdt_g = scipy.spatial.KDTree(vg_xy) # now add (all) mesh nodes, include features (pos) @@ -444,7 +485,7 @@ def main(input_args=None): pyg_g2m = from_networkx(G_g2m) - if args.plot: + if create_plot: plot_graph(pyg_g2m, title="Grid-to-mesh") plt.show() @@ -483,7 +524,7 @@ def main(input_args=None): ) pyg_m2g = from_networkx(G_m2g_int) - if args.plot: + if create_plot: plot_graph(pyg_m2g, title="Mesh-to-grid") plt.show() @@ -494,5 +535,76 @@ def main(input_args=None): save_edges(pyg_m2g, "m2g", graph_dir_path) +def create_graph_from_datastore( + datastore: BaseRegularGridDatastore, + output_root_path: str, + n_max_levels: int = None, + hierarchical: bool = False, + create_plot: bool = False, +): + if isinstance(datastore, BaseRegularGridDatastore): + xy = datastore.get_xy(category="state", stacked=False) + else: + raise NotImplementedError( + "Only graph creation for BaseRegularGridDatastore is supported" + ) + + create_graph( + graph_dir_path=output_root_path, + xy=xy, + n_max_levels=n_max_levels, + hierarchical=hierarchical, + create_plot=create_plot, + ) + + +def cli(input_args=None): + parser = ArgumentParser(description="Graph generation arguments") + parser.add_argument( + "--config_path", + type=str, + help="Path to neural-lam configuration file", + ) + parser.add_argument( + "--name", + type=str, + default="multiscale", + help="Name to save graph as (default: multiscale)", + ) + parser.add_argument( + "--plot", + action="store_true", + help="If graphs should be plotted during generation " + "(default: False)", + ) + parser.add_argument( + "--levels", + type=int, + help="Limit multi-scale mesh to given number of levels, " + "from bottom up (default: None (no limit))", + ) + parser.add_argument( + "--hierarchical", + action="store_true", + help="Generate hierarchical mesh graph (default: False)", + ) + args = parser.parse_args(input_args) + + assert ( + args.config_path is not None + ), "Specify your config with --config_path" + + # Load neural-lam configuration and datastore to use + _, datastore = load_config_and_datastore(config_path=args.config_path) + + create_graph_from_datastore( + datastore=datastore, + output_root_path=os.path.join(datastore.root_path, "graph", args.name), + n_max_levels=args.levels, + hierarchical=args.hierarchical, + create_plot=args.plot, + ) + + if __name__ == "__main__": - main() + cli() diff --git a/neural_lam/create_grid_features.py b/neural_lam/create_grid_features.py deleted file mode 100644 index adabd9dc..00000000 --- a/neural_lam/create_grid_features.py +++ /dev/null @@ -1,63 +0,0 @@ -# Standard library -import os -from argparse import ArgumentParser - -# Third-party -import numpy as np -import torch - -# Local -from . import config - - -def main(): - """ - Pre-compute all static features related to the grid nodes - """ - parser = ArgumentParser(description="Training arguments") - parser.add_argument( - "--data_config", - type=str, - default="neural_lam/data_config.yaml", - help="Path to data config file (default: neural_lam/data_config.yaml)", - ) - args = parser.parse_args() - config_loader = config.Config.from_file(args.data_config) - - static_dir_path = os.path.join("data", config_loader.dataset.name, "static") - - # -- Static grid node features -- - grid_xy = torch.tensor( - np.load(os.path.join(static_dir_path, "nwp_xy.npy")) - ) # (2, N_y, N_x) - grid_xy = grid_xy.flatten(1, 2).T # (N_grid, 2) - pos_max = torch.max(torch.abs(grid_xy)) - grid_xy = grid_xy / pos_max # Divide by maximum coordinate - - geopotential = torch.tensor( - np.load(os.path.join(static_dir_path, "surface_geopotential.npy")) - ) # (N_y, N_x) - geopotential = geopotential.flatten(0, 1).unsqueeze(1) # (N_grid,1) - gp_min = torch.min(geopotential) - gp_max = torch.max(geopotential) - # Rescale geopotential to [0,1] - geopotential = (geopotential - gp_min) / (gp_max - gp_min) # (N_grid, 1) - - grid_border_mask = torch.tensor( - np.load(os.path.join(static_dir_path, "border_mask.npy")), - dtype=torch.int64, - ) # (N_y, N_x) - grid_border_mask = ( - grid_border_mask.flatten(0, 1).to(torch.float).unsqueeze(1) - ) # (N_grid, 1) - - # Concatenate grid features - grid_features = torch.cat( - (grid_xy, geopotential, grid_border_mask), dim=1 - ) # (N_grid, 4) - - torch.save(grid_features, os.path.join(static_dir_path, "grid_features.pt")) - - -if __name__ == "__main__": - main() diff --git a/neural_lam/data_config.yaml b/neural_lam/data_config.yaml deleted file mode 100644 index f1527849..00000000 --- a/neural_lam/data_config.yaml +++ /dev/null @@ -1,64 +0,0 @@ -dataset: - name: meps_example - var_names: - - pres_0g - - pres_0s - - nlwrs_0 - - nswrs_0 - - r_2 - - r_65 - - t_2 - - t_65 - - t_500 - - t_850 - - u_65 - - u_850 - - v_65 - - v_850 - - wvint_0 - - z_1000 - - z_500 - var_units: - - Pa - - Pa - - $\mathrm{W}/\mathrm{m}^2$ - - $\mathrm{W}/\mathrm{m}^2$ - - "" - - "" - - K - - K - - K - - K - - m/s - - m/s - - m/s - - m/s - - $\mathrm{kg}/\mathrm{m}^2$ - - $\mathrm{m}^2/\mathrm{s}^2$ - - $\mathrm{m}^2/\mathrm{s}^2$ - var_longnames: - - pres_heightAboveGround_0_instant - - pres_heightAboveSea_0_instant - - nlwrs_heightAboveGround_0_accum - - nswrs_heightAboveGround_0_accum - - r_heightAboveGround_2_instant - - r_hybrid_65_instant - - t_heightAboveGround_2_instant - - t_hybrid_65_instant - - t_isobaricInhPa_500_instant - - t_isobaricInhPa_850_instant - - u_hybrid_65_instant - - u_isobaricInhPa_850_instant - - v_hybrid_65_instant - - v_isobaricInhPa_850_instant - - wvint_entireAtmosphere_0_instant - - z_isobaricInhPa_1000_instant - - z_isobaricInhPa_500_instant - num_forcing_features: 16 -grid_shape_state: [268, 238] -projection: - class: LambertConformal - kwargs: - central_longitude: 15.0 - central_latitude: 63.3 - standard_parallels: [63.3, 63.3] diff --git a/neural_lam/datastore/__init__.py b/neural_lam/datastore/__init__.py new file mode 100644 index 00000000..40e683ac --- /dev/null +++ b/neural_lam/datastore/__init__.py @@ -0,0 +1,26 @@ +# Local +from .base import BaseDatastore # noqa +from .mdp import MDPDatastore # noqa +from .npyfilesmeps import NpyFilesDatastoreMEPS # noqa + +DATASTORE_CLASSES = [ + MDPDatastore, + NpyFilesDatastoreMEPS, +] + +DATASTORES = { + datastore.SHORT_NAME: datastore for datastore in DATASTORE_CLASSES +} + + +def init_datastore(datastore_kind, config_path): + DatastoreClass = DATASTORES.get(datastore_kind) + + if DatastoreClass is None: + raise NotImplementedError( + f"Datastore kind {datastore_kind} is not implemented" + ) + + datastore = DatastoreClass(config_path=config_path) + + return datastore diff --git a/neural_lam/datastore/base.py b/neural_lam/datastore/base.py new file mode 100644 index 00000000..0317c2e5 --- /dev/null +++ b/neural_lam/datastore/base.py @@ -0,0 +1,553 @@ +# Standard library +import abc +import collections +import dataclasses +import functools +from functools import cached_property +from pathlib import Path +from typing import List, Union + +# Third-party +import cartopy.crs as ccrs +import numpy as np +import xarray as xr +from pandas.core.indexes.multi import MultiIndex + + +class BaseDatastore(abc.ABC): + """ + Base class for weather data used in the neural-lam package. A datastore + defines the interface for accessing weather data by providing methods to + access the data in a processed format that can be used for training and + evaluation of neural networks. + + NOTE: All methods return either primitive types, `numpy.ndarray`, + `xarray.DataArray` or `xarray.Dataset` objects, not `pytorch.Tensor` + objects. Conversion to `pytorch.Tensor` objects should be done in the + `weather_dataset.WeatherDataset` class (which inherits from + `torch.utils.data.Dataset` and uses the datastore to access the data). + + # Forecast vs analysis data + If the datastore is used to represent forecast rather than analysis data, + then the `is_forecast` attribute should be set to True, and returned data + from `get_dataarray` is assumed to have `analysis_time` and `forecast_time` + dimensions (rather than just `time`). + + # Ensemble vs deterministic data + If the datastore is used to represent ensemble data, then the `is_ensemble` + attribute should be set to True, and returned data from `get_dataarray` is + assumed to have an `ensemble_member` dimension. + + # Grid index + All methods that return data specific to a grid point (like + `get_dataarray`) should have a single dimension named `grid_index` that + represents the spatial grid index of the data. The actual x, y coordinates + of the grid points should be stored in the `x` and `y` coordinates of the + dataarray or dataset with the `grid_index` dimension as the coordinate for + each of the `x` and `y` coordinates. + """ + + is_ensemble: bool = False + is_forecast: bool = False + + @property + @abc.abstractmethod + def root_path(self) -> Path: + """ + The root path to the datastore. It is relative to this that any derived + files (for example the graph components) are stored. + + Returns + ------- + pathlib.Path + The root path to the datastore. + + """ + pass + + @property + @abc.abstractmethod + def config(self) -> collections.abc.Mapping: + """The configuration of the datastore. + + Returns + ------- + collections.abc.Mapping + The configuration of the datastore, any dict like object can be + returned. + + """ + pass + + @property + @abc.abstractmethod + def step_length(self) -> int: + """The step length of the dataset in hours. + + Returns: + int: The step length in hours. + + """ + pass + + @abc.abstractmethod + def get_vars_units(self, category: str) -> List[str]: + """Get the units of the variables in the given category. + + Parameters + ---------- + category : str + The category of the variables (state/forcing/static). + + Returns + ------- + List[str] + The units of the variables. + + """ + pass + + @abc.abstractmethod + def get_vars_names(self, category: str) -> List[str]: + """Get the names of the variables in the given category. + + Parameters + ---------- + category : str + The category of the variables (state/forcing/static). + + Returns + ------- + List[str] + The names of the variables. + + """ + pass + + @abc.abstractmethod + def get_vars_long_names(self, category: str) -> List[str]: + """Get the long names of the variables in the given category. + + Parameters + ---------- + category : str + The category of the variables (state/forcing/static). + + Returns + ------- + List[str] + The long names of the variables. + + """ + pass + + @abc.abstractmethod + def get_num_data_vars(self, category: str) -> int: + """Get the number of data variables in the given category. + + Parameters + ---------- + category : str + The category of the variables (state/forcing/static). + + Returns + ------- + int + The number of data variables. + + """ + pass + + @abc.abstractmethod + def get_standardization_dataarray(self, category: str) -> xr.Dataset: + """ + Return the standardization (i.e. scaling to mean of 0.0 and standard + deviation of 1.0) dataarray for the given category. This should contain + a `{category}_mean` and `{category}_std` variable for each variable in + the category. For `category=="state"`, the dataarray should also + contain a `state_diff_mean` and `state_diff_std` variable for the one- + step differences of the state variables. The returned dataarray should + at least have dimensions of `({category}_feature)`, but can also + include for example `grid_index` (if the standardization is done per + grid point for example). + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + + Returns + ------- + xr.Dataset + The standardization dataarray for the given category, with variables + for the mean and standard deviation of the variables (and + differences for state variables). + + """ + pass + + @abc.abstractmethod + def get_dataarray( + self, category: str, split: str + ) -> Union[xr.DataArray, None]: + """ + Return the processed data (as a single `xr.DataArray`) for the given + category of data and test/train/val-split that covers all the data (in + space and time) of a given category (state/forcing/static). A + datastore must be able to return for the "state" category, but + "forcing" and "static" are optional (in which case the method should + return `None`). For the "static" category the `split` is allowed to be + `None` because the static data is the same for all splits. + + The returned dataarray is expected to at minimum have dimensions of + `(grid_index, {category}_feature)` so that any spatial dimensions have + been stacked into a single dimension and all variables and levels have + been stacked into a single feature dimension named by the `category` of + data being loaded. + + For categories of data that have a time dimension (i.e. not static + data), the dataarray is expected additionally have `(analysis_time, + elapsed_forecast_duration)` dimensions if `is_forecast` is True, or + `(time)` if `is_forecast` is False. + + If the data is ensemble data, the dataarray is expected to have an + additional `ensemble_member` dimension. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + split : str + The time split to filter the dataset (train/val/test). + + Returns + ------- + xr.DataArray or None + The xarray DataArray object with processed dataset. + + """ + pass + + @cached_property + @abc.abstractmethod + def boundary_mask(self) -> xr.DataArray: + """ + Return the boundary mask for the dataset, with spatial dimensions + stacked. Where the value is 1, the grid point is a boundary point, and + where the value is 0, the grid point is not a boundary point. + + Returns + ------- + xr.DataArray + The boundary mask for the dataset, with dimensions + `('grid_index',)`. + + """ + pass + + @abc.abstractmethod + def get_xy(self, category: str) -> np.ndarray: + """ + Return the x, y coordinates of the dataset as a numpy arrays for a + given category of data. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + + Returns + ------- + np.ndarray + The x, y coordinates of the dataset with shape `[n_grid_points, 2]`. + """ + + @property + @abc.abstractmethod + def coords_projection(self) -> ccrs.Projection: + """Return the projection object for the coordinates. + + The projection object is used to plot the coordinates on a map. + + Returns + ------- + cartopy.crs.Projection: + The projection object. + + """ + pass + + @functools.lru_cache + def get_xy_extent(self, category: str) -> List[float]: + """ + Return the extent of the x, y coordinates for a given category of data. + The extent should be returned as a list of 4 floats with `[xmin, xmax, + ymin, ymax]` which can then be used to set the extent of a plot. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + + Returns + ------- + List[float] + The extent of the x, y coordinates. + + """ + xy = self.get_xy(category, stacked=False) + extent = [xy[0].min(), xy[0].max(), xy[1].min(), xy[1].max()] + return [float(v) for v in extent] + + @property + @abc.abstractmethod + def num_grid_points(self) -> int: + """Return the number of grid points in the dataset. + + Returns + ------- + int + The number of grid points in the dataset. + + """ + pass + + @cached_property + @abc.abstractmethod + def state_feature_weights_values(self) -> List[float]: + """ + Return the weights for each state feature as a list of floats. The + weights are defined by the user in a config file for the datastore. + + Implementations of this method must assert that there is one weight for + each state feature in the datastore. The weights can be used to scale + the loss function for each state variable (e.g. via the standard + deviation of the 1-step differences of the state variables). + + Returns: + List[float]: The weights for each state feature. + """ + pass + + @functools.lru_cache + def expected_dim_order(self, category: str = None) -> tuple[str]: + """ + Return the expected dimension order for the dataarray or dataset + returned by `get_dataarray` for the given category of data. The + dimension order is the order of the dimensions in the dataarray or + dataset, and is used to check that the data is in the expected format. + + This is necessary so that when stacking and unstacking the spatial grid + we can ensure that the dimension order is the same as what is returned + from `get_dataarray`. And also ensures that downstream uses of a + datastore (e.g. WeatherDataset) sees the data in a common structure. + + If the category is None, then the it assumed that data only represents + a 1D scalar field varying with grid-index. + + The order is constructed to match the order in `pytorch.Tensor` objects + that will be constructed from the data so that the last two dimensions + are always the grid-index and feature dimensions (i.e. the order is + `[..., grid_index, {category}_feature]`), with any time-related and + ensemble-number dimension(s) coming before these two. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + + Returns + ------- + List[str] + The expected dimension order for the dataarray or dataset. + + """ + dim_order = [] + + if category is not None: + if category != "static": + # static data does not vary in time + if self.is_forecast: + dim_order.extend( + ["analysis_time", "elapsed_forecast_duration"] + ) + elif not self.is_forecast: + dim_order.append("time") + + if self.is_ensemble and category == "state": + # XXX: for now we only assume ensemble data for state variables + dim_order.append("ensemble_member") + + dim_order.append("grid_index") + + if category is not None: + dim_order.append(f"{category}_feature") + + return tuple(dim_order) + + +@dataclasses.dataclass +class CartesianGridShape: + """Dataclass to store the shape of a grid.""" + + x: int + y: int + + +class BaseRegularGridDatastore(BaseDatastore): + """ + Base class for weather data stored on a regular grid (like a chess-board, + as opposed to a irregular grid where each cell cannot be indexed by just + two integers, see https://en.wikipedia.org/wiki/Regular_grid). In addition + to the methods and attributes required for weather data in general (see + `BaseDatastore`) for regular-gridded source data each `grid_index` + coordinate value is assumed to be associated with `x` and `y`-values that + allow the processed data-arrays can be reshaped back into into 2D + xy-gridded arrays. + + The following methods and attributes must be implemented for datastore that + represents regular-gridded data: + - `grid_shape_state` (property): 2D shape of the grid for the state + variables. + - `get_xy` (method): Return the x, y coordinates of the dataset, with the + option to not stack the coordinates (so that they are returned as a 2D + grid). + + The operation of going from (x,y)-indexed regular grid + to `grid_index`-indexed data-array is called "stacking" and the reverse + operation is called "unstacking". This class provides methods to stack and + unstack the spatial grid coordinates of the data-arrays (called + `stack_grid_coords` and `unstack_grid_coords` respectively). + """ + + CARTESIAN_COORDS = ["x", "y"] + + @cached_property + @abc.abstractmethod + def grid_shape_state(self) -> CartesianGridShape: + """The shape of the grid for the state variables. + + Returns + ------- + CartesianGridShape: + The shape of the grid for the state variables, which has `x` and + `y` attributes. + + """ + pass + + @abc.abstractmethod + def get_xy(self, category: str, stacked: bool = True) -> np.ndarray: + """Return the x, y coordinates of the dataset. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + stacked : bool + Whether to stack the x, y coordinates. The parameter `stacked` has + been introduced in this class. Parent class `BaseDatastore` has the + same method signature but without the `stacked` parameter. Defaults + to `True` to match the behaviour of `BaseDatastore.get_xy()` which + always returns the coordinates stacked. + + Returns + ------- + np.ndarray + The x, y coordinates of the dataset, returned differently based on + the value of `stacked`: - `stacked==True`: shape `(n_grid_points, + 2)` where + n_grid_points=N_x*N_y. + - `stacked==False`: shape `(N_x, N_y, 2)` + """ + pass + + def unstack_grid_coords( + self, da_or_ds: Union[xr.DataArray, xr.Dataset] + ) -> Union[xr.DataArray, xr.Dataset]: + """ + Unstack the spatial grid coordinates from `grid_index` into separate `x` + and `y` dimensions to create a 2D grid. Only performs unstacking if the + data is currently stacked (has grid_index dimension). + + Parameters + ---------- + da_or_ds : xr.DataArray or xr.Dataset + The dataarray or dataset to unstack the grid coordinates of. + + Returns + ------- + xr.DataArray or xr.Dataset + The dataarray or dataset with the grid coordinates unstacked. + """ + # Return original data if already unstacked (no grid_index dimension) + if "grid_index" not in da_or_ds.dims: + return da_or_ds + + # Check whether `grid_index` is a multi-index + if not isinstance(da_or_ds.indexes.get("grid_index"), MultiIndex): + da_or_ds = da_or_ds.set_index(grid_index=self.CARTESIAN_COORDS) + + da_or_ds_unstacked = da_or_ds.unstack("grid_index") + + # Ensure that the x, y dimensions are in the correct order + dims = da_or_ds_unstacked.dims + xy_dim_order = [d for d in dims if d in self.CARTESIAN_COORDS] + + if xy_dim_order != self.CARTESIAN_COORDS: + da_or_ds_unstacked = da_or_ds_unstacked.transpose("x", "y") + + return da_or_ds_unstacked + + def stack_grid_coords( + self, da_or_ds: Union[xr.DataArray, xr.Dataset] + ) -> Union[xr.DataArray, xr.Dataset]: + """ + Stack the spatial grid coordinates (x and y) into a single `grid_index` + dimension. Only performs stacking if the data is currently unstacked + (has x and y dimensions). + + Parameters + ---------- + da_or_ds : xr.DataArray or xr.Dataset + The dataarray or dataset to stack the grid coordinates of. + + Returns + ------- + xr.DataArray or xr.Dataset + The dataarray or dataset with the grid coordinates stacked. + """ + # Return original data if already stacked (has grid_index dimension) + if "grid_index" in da_or_ds.dims: + return da_or_ds + + da_or_ds_stacked = da_or_ds.stack(grid_index=self.CARTESIAN_COORDS) + + # infer what category of data the array represents by finding the + # dimension named in the format `{category}_feature` + category = None + for dim in da_or_ds_stacked.dims: + if dim.endswith("_feature"): + if category is not None: + raise ValueError( + "Multiple dimensions ending with '_feature' found in " + f"dataarray: {da_or_ds_stacked}. Cannot infer category." + ) + category = dim.split("_")[0] + + dim_order = self.expected_dim_order(category=category) + + return da_or_ds_stacked.transpose(*dim_order) + + @property + @functools.lru_cache + def num_grid_points(self) -> int: + """Return the number of grid points in the dataset. + + Returns + ------- + int + The number of grid points in the dataset. + + """ + return self.grid_shape_state.x * self.grid_shape_state.y diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py new file mode 100644 index 00000000..10593a82 --- /dev/null +++ b/neural_lam/datastore/mdp.py @@ -0,0 +1,464 @@ +# Standard library +import warnings +from functools import cached_property +from pathlib import Path +from typing import List + +# Third-party +import cartopy.crs as ccrs +import mllam_data_prep as mdp +import xarray as xr +from loguru import logger +from numpy import ndarray + +# Local +from .base import BaseRegularGridDatastore, CartesianGridShape + + +class MDPDatastore(BaseRegularGridDatastore): + """ + Datastore class for datasets made with the mllam_data_prep library + (https://github.com/mllam/mllam-data-prep). This class wraps the + `mllam_data_prep` library to do the necessary transforms to create the + different categories (state/forcing/static) of data, with the actual + transform to do being specified in the configuration file. + """ + + SHORT_NAME = "mdp" + + def __init__(self, config_path, n_boundary_points=30, reuse_existing=True): + """ + Construct a new MDPDatastore from the configuration file at + `config_path`. A boundary mask is created with `n_boundary_points` + boundary points. If `reuse_existing` is True, the dataset is loaded + from a zarr file if it exists (unless the config has been modified + since the zarr was created), otherwise it is created from the + configuration file. + + Parameters + ---------- + config_path : str + The path to the configuration file, this will be fed to the + `mllam_data_prep.Config.from_yaml_file` method to then call + `mllam_data_prep.create_dataset` to create the dataset. + n_boundary_points : int + The number of boundary points to use in the boundary mask. + reuse_existing : bool + Whether to reuse an existing dataset zarr file if it exists and its + creation date is newer than the configuration file. + + """ + self._config_path = Path(config_path) + self._root_path = self._config_path.parent + self._config = mdp.Config.from_yaml_file(self._config_path) + fp_ds = self._root_path / self._config_path.name.replace( + ".yaml", ".zarr" + ) + + self._ds = None + if reuse_existing and fp_ds.exists(): + # check that the zarr directory is newer than the config file + if fp_ds.stat().st_mtime < self._config_path.stat().st_mtime: + logger.warning( + "Config file has been modified since zarr was created. " + f"The old zarr archive (in {fp_ds}) will be used." + "To generate new zarr-archive, move the old one first." + ) + self._ds = xr.open_zarr(fp_ds, consolidated=True) + + if self._ds is None: + self._ds = mdp.create_dataset(config=self._config) + self._ds.to_zarr(fp_ds) + self._n_boundary_points = n_boundary_points + + print("The loaded datastore contains the following features:") + for category in ["state", "forcing", "static"]: + if len(self.get_vars_names(category)) > 0: + var_names = self.get_vars_names(category) + print(f" {category:<8s}: {' '.join(var_names)}") + + # check that all three train/val/test splits are available + required_splits = ["train", "val", "test"] + available_splits = list(self._ds.splits.split_name.values) + if not all(split in available_splits for split in required_splits): + raise ValueError( + f"Missing required splits: {required_splits} in available " + f"splits: {available_splits}" + ) + + print("With the following splits (over time):") + for split in required_splits: + da_split = self._ds.splits.sel(split_name=split) + da_split_start = da_split.sel(split_part="start").load().item() + da_split_end = da_split.sel(split_part="end").load().item() + print(f" {split:<8s}: {da_split_start} to {da_split_end}") + + # find out the dimension order for the stacking to grid-index + dim_order = None + for input_dataset in self._config.inputs.values(): + dim_order_ = input_dataset.dim_mapping["grid_index"].dims + if dim_order is None: + dim_order = dim_order_ + else: + assert ( + dim_order == dim_order_ + ), "all inputs must have the same dimension order" + + self.CARTESIAN_COORDS = dim_order + + @property + def root_path(self) -> Path: + """The root path of the dataset. + + Returns + ------- + Path + The root path of the dataset. + + """ + return self._root_path + + @property + def config(self) -> mdp.Config: + """The configuration of the dataset. + + Returns + ------- + mdp.Config + The configuration of the dataset. + + """ + return self._config + + @property + def step_length(self) -> int: + """The length of the time steps in hours. + + Returns + ------- + int + The length of the time steps in hours. + + """ + da_dt = self._ds["time"].diff("time") + return (da_dt.dt.seconds[0] // 3600).item() + + def get_vars_units(self, category: str) -> List[str]: + """Return the units of the variables in the given category. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + + Returns + ------- + List[str] + The units of the variables in the given category. + + """ + if category not in self._ds and category == "forcing": + warnings.warn("no forcing data found in datastore") + return [] + return self._ds[f"{category}_feature_units"].values.tolist() + + def get_vars_names(self, category: str) -> List[str]: + """Return the names of the variables in the given category. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + + Returns + ------- + List[str] + The names of the variables in the given category. + + """ + if category not in self._ds and category == "forcing": + warnings.warn("no forcing data found in datastore") + return [] + return self._ds[f"{category}_feature"].values.tolist() + + def get_vars_long_names(self, category: str) -> List[str]: + """ + Return the long names of the variables in the given category. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + + Returns + ------- + List[str] + The long names of the variables in the given category. + + """ + if category not in self._ds and category == "forcing": + warnings.warn("no forcing data found in datastore") + return [] + return self._ds[f"{category}_feature_long_name"].values.tolist() + + def get_num_data_vars(self, category: str) -> int: + """Return the number of variables in the given category. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + + Returns + ------- + int + The number of variables in the given category. + + """ + return len(self.get_vars_names(category)) + + def get_dataarray(self, category: str, split: str) -> xr.DataArray: + """ + Return the processed data (as a single `xr.DataArray`) for the given + category of data and test/train/val-split that covers all the data (in + space and time) of a given category (state/forcin g/static). "state" is + the only required category, for other categories, the method will + return `None` if the category is not found in the datastore. + + The returned dataarray will at minimum have dimensions of `(grid_index, + {category}_feature)` so that any spatial dimensions have been stacked + into a single dimension and all variables and levels have been stacked + into a single feature dimension named by the `category` of data being + loaded. + + For categories of data that have a time dimension (i.e. not static + data), the dataarray will additionally have `(analysis_time, + elapsed_forecast_duration)` dimensions if `is_forecast` is True, or + `(time)` if `is_forecast` is False. + + If the data is ensemble data, the dataarray will have an additional + `ensemble_member` dimension. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + split : str + The time split to filter the dataset (train/val/test). + + Returns + ------- + xr.DataArray or None + The xarray DataArray object with processed dataset. + + """ + if category not in self._ds and category == "forcing": + warnings.warn("no forcing data found in datastore") + return None + + da_category = self._ds[category] + + # set units on x y coordinates if missing + for coord in ["x", "y"]: + if "units" not in da_category[coord].attrs: + da_category[coord].attrs["units"] = "m" + + # set multi-index for grid-index + da_category = da_category.set_index(grid_index=self.CARTESIAN_COORDS) + + if "time" in da_category.dims: + t_start = ( + self._ds.splits.sel(split_name=split) + .sel(split_part="start") + .load() + .item() + ) + t_end = ( + self._ds.splits.sel(split_name=split) + .sel(split_part="end") + .load() + .item() + ) + da_category = da_category.sel(time=slice(t_start, t_end)) + + dim_order = self.expected_dim_order(category=category) + return da_category.transpose(*dim_order) + + def get_standardization_dataarray(self, category: str) -> xr.Dataset: + """ + Return the standardization dataarray for the given category. This + should contain a `{category}_mean` and `{category}_std` variable for + each variable in the category. For `category=="state"`, the dataarray + should also contain a `state_diff_mean` and `state_diff_std` variable + for the one- step differences of the state variables. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + + Returns + ------- + xr.Dataset + The standardization dataarray for the given category, with + variables for the mean and standard deviation of the variables (and + differences for state variables). + + """ + ops = ["mean", "std"] + split = "train" + stats_variables = { + f"{category}__{split}__{op}": f"{category}_{op}" for op in ops + } + if category == "state": + stats_variables.update( + {f"state__{split}__diff_{op}": f"state_diff_{op}" for op in ops} + ) + + ds_stats = self._ds[stats_variables.keys()].rename(stats_variables) + return ds_stats + + @cached_property + def boundary_mask(self) -> xr.DataArray: + """ + Produce a 0/1 mask for the boundary points of the dataset, these will + sit at the edges of the domain (in x/y extent) and will be used to mask + out the boundary points from the loss function and to overwrite the + boundary points from the prediction. For now this is created when the + mask is requested, but in the future this could be saved to the zarr + file. + + Returns + ------- + xr.DataArray + A 0/1 mask for the boundary points of the dataset, where 1 is a + boundary point and 0 is not. + + """ + ds_unstacked = self.unstack_grid_coords(da_or_ds=self._ds) + da_state_variable = ( + ds_unstacked["state"].isel(time=0).isel(state_feature=0) + ) + da_domain_allzero = xr.zeros_like(da_state_variable) + ds_unstacked["boundary_mask"] = da_domain_allzero.isel( + x=slice(self._n_boundary_points, -self._n_boundary_points), + y=slice(self._n_boundary_points, -self._n_boundary_points), + ) + ds_unstacked["boundary_mask"] = ds_unstacked.boundary_mask.fillna( + 1 + ).astype(int) + return self.stack_grid_coords(da_or_ds=ds_unstacked.boundary_mask) + + @property + def coords_projection(self) -> ccrs.Projection: + """ + Return the projection of the coordinates. + + NOTE: currently this expects the projection information to be in the + `extra` section of the configuration file, with a `projection` key + containing a `class_name` and `kwargs` for constructing the + `cartopy.crs.Projection` object. This is a temporary solution until + the projection information can be parsed in the produced dataset + itself. `mllam-data-prep` ignores the contents of the `extra` section + of the config file which is why we need to check that the necessary + parts are there. + + Returns + ------- + ccrs.Projection + The projection of the coordinates. + + """ + if "projection" not in self._config.extra: + raise ValueError( + "projection information not found in the configuration file " + f"({self._config_path}). Please add the projection information" + "to the `extra` section of the config, by adding a " + "`projection` key with the class name and kwargs of the " + "projection." + ) + + projection_info = self._config.extra["projection"] + if "class_name" not in projection_info: + raise ValueError( + "class_name not found in the projection information. Please " + "add the class name of the projection to the `projection` key " + "in the `extra` section of the config." + ) + if "kwargs" not in projection_info: + raise ValueError( + "kwargs not found in the projection information. Please add " + "the keyword arguments of the projection to the `projection` " + "key in the `extra` section of the config." + ) + + class_name = projection_info["class_name"] + ProjectionClass = getattr(ccrs, class_name) + kwargs = projection_info["kwargs"] + + globe_kwargs = kwargs.pop("globe", {}) + if len(globe_kwargs) > 0: + kwargs["globe"] = ccrs.Globe(**globe_kwargs) + + return ProjectionClass(**kwargs) + + @cached_property + def grid_shape_state(self): + """The shape of the cartesian grid for the state variables. + + Returns + ------- + CartesianGridShape + The shape of the cartesian grid for the state variables. + + """ + ds_state = self.unstack_grid_coords(self._ds["state"]) + da_x, da_y = ds_state.x, ds_state.y + assert da_x.ndim == da_y.ndim == 1 + return CartesianGridShape(x=da_x.size, y=da_y.size) + + def get_xy(self, category: str, stacked: bool) -> ndarray: + """Return the x, y coordinates of the dataset. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + stacked : bool + Whether to stack the x, y coordinates. + + Returns + ------- + np.ndarray + The x, y coordinates of the dataset, returned differently based on + the value of `stacked`: + - `stacked==True`: shape `(n_grid_points, 2)` where + n_grid_points=N_x*N_y. + - `stacked==False`: shape `(N_x, N_y, 2)` + + """ + # assume variables are stored in dimensions [grid_index, ...] + ds_category = self.unstack_grid_coords(da_or_ds=self._ds[category]) + + da_xs = ds_category.x + da_ys = ds_category.y + + assert da_xs.ndim == da_ys.ndim == 1, "x and y coordinates must be 1D" + + da_x, da_y = xr.broadcast(da_xs, da_ys) + da_xy = xr.concat([da_x, da_y], dim="grid_coord") + + if stacked: + da_xy = da_xy.stack(grid_index=self.CARTESIAN_COORDS).transpose( + "grid_index", + "grid_coord", + ) + else: + dims = [ + "x", + "y", + "grid_coord", + ] + da_xy = da_xy.transpose(*dims) + + return da_xy.values diff --git a/neural_lam/datastore/npyfilesmeps/__init__.py b/neural_lam/datastore/npyfilesmeps/__init__.py new file mode 100644 index 00000000..397a5075 --- /dev/null +++ b/neural_lam/datastore/npyfilesmeps/__init__.py @@ -0,0 +1,2 @@ +# Local +from .store import NpyFilesDatastoreMEPS # noqa diff --git a/neural_lam/create_parameter_weights.py b/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py similarity index 71% rename from neural_lam/create_parameter_weights.py rename to neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py index 4867e609..f2c80e8a 100644 --- a/neural_lam/create_parameter_weights.py +++ b/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py @@ -2,16 +2,17 @@ import os import subprocess from argparse import ArgumentParser +from pathlib import Path # Third-party -import numpy as np import torch import torch.distributed as dist from torch.utils.data.distributed import DistributedSampler from tqdm import tqdm -# Local -from . import WeatherDataset, config +# First-party +from neural_lam import WeatherDataset +from neural_lam.datastore import init_datastore class PaddedWeatherDataset(torch.utils.data.Dataset): @@ -101,6 +102,10 @@ def save_stats( mean = torch.mean(means, dim=0) # (d_features,) second_moment = torch.mean(squares, dim=0) # (d_features,) std = torch.sqrt(second_moment - mean**2) # (d_features,) + print( + f"Saving {filename_prefix} mean and std.-dev. to " + f"{filename_prefix}_mean.pt and {filename_prefix}_std.pt" + ) torch.save( mean.cpu(), os.path.join(static_dir_path, f"{filename_prefix}_mean.pt") ) @@ -119,100 +124,65 @@ def save_stats( flux_mean = torch.mean(flux_means) # (,) flux_second_moment = torch.mean(flux_squares) # (,) flux_std = torch.sqrt(flux_second_moment - flux_mean**2) # (,) + print("Saving flux mean and std.-dev. to flux_stats.pt") torch.save( torch.stack((flux_mean, flux_std)).cpu(), os.path.join(static_dir_path, "flux_stats.pt"), ) -def main(): +def main( + datastore_config_path, batch_size, step_length, n_workers, distributed +): """ Pre-compute parameter weights to be used in loss function + + Arguments + --------- + datastore_config_path : str + Path to datastore config file + batch_size : int + Batch size when iterating over the dataset + step_length : int + Step length in hours to consider single time step + n_workers : int + Number of workers in data loader + distributed : bool + Run the script in distributed """ - parser = ArgumentParser(description="Training arguments") - parser.add_argument( - "--data_config", - type=str, - default="neural_lam/data_config.yaml", - help="Path to data config file (default: neural_lam/data_config.yaml)", - ) - parser.add_argument( - "--batch_size", - type=int, - default=32, - help="Batch size when iterating over the dataset", - ) - parser.add_argument( - "--step_length", - type=int, - default=3, - help="Step length in hours to consider single time step (default: 3)", - ) - parser.add_argument( - "--n_workers", - type=int, - default=4, - help="Number of workers in data loader (default: 4)", - ) - parser.add_argument( - "--distributed", - action="store_true", - help="Run the script in distributed mode (default: False)", - ) - args = parser.parse_args() - distributed = bool(args.distributed) rank = get_rank() world_size = get_world_size() - config_loader = config.Config.from_file(args.data_config) + datastore = init_datastore( + datastore_kind="npyfilesmeps", config_path=datastore_config_path + ) - if distributed: + static_dir_path = Path(datastore_config_path).parent / "static" + os.makedirs(static_dir_path, exist_ok=True) + if distributed: setup(rank, world_size) device = torch.device( f"cuda:{rank}" if torch.cuda.is_available() else "cpu" ) torch.cuda.set_device(device) if torch.cuda.is_available() else None - if rank == 0: - static_dir_path = os.path.join( - "data", config_loader.dataset.name, "static" - ) - # Create parameter weights based on height - # based on fig A.1 in graph cast paper - w_dict = { - "2": 1.0, - "0": 0.1, - "65": 0.065, - "1000": 0.1, - "850": 0.05, - "500": 0.03, - } - w_list = np.array( - [ - w_dict[par.split("_")[-2]] - for par in config_loader.dataset.var_longnames - ] - ) - print("Saving parameter weights...") - np.save( - os.path.join(static_dir_path, "parameter_weights.npy"), - w_list.astype("float32"), - ) - - # Load dataset without any subsampling + # Setting this to the original value of the Oskarsson et al. paper (2023) + # 65 forecast steps - 2 initial steps = 63 + ar_steps = 63 ds = WeatherDataset( - config_loader.dataset.name, + datastore=datastore, split="train", - subsample_step=1, - pred_length=63, + ar_steps=ar_steps, standardize=False, + num_past_forcing_steps=0, + num_future_forcing_steps=0, ) if distributed: ds = PaddedWeatherDataset( ds, world_size, - args.batch_size, + batch_size, ) sampler = DistributedSampler( ds, num_replicas=world_size, rank=rank, shuffle=False @@ -221,9 +191,9 @@ def main(): sampler = None loader = torch.utils.data.DataLoader( ds, - args.batch_size, + batch_size, shuffle=False, - num_workers=args.n_workers, + num_workers=n_workers, sampler=sampler, ) @@ -231,7 +201,7 @@ def main(): print("Computing mean and std.-dev. for parameters...") means, squares, flux_means, flux_squares = [], [], [], [] - for init_batch, target_batch, forcing_batch in tqdm(loader): + for init_batch, target_batch, forcing_batch, _ in tqdm(loader): if distributed: init_batch, target_batch, forcing_batch = ( init_batch.to(device), @@ -240,8 +210,8 @@ def main(): ) # (N_batch, N_t, N_grid, d_features) batch = torch.cat((init_batch, target_batch), dim=1) - # Flux at 1st windowed position is index 1 in forcing - flux_batch = forcing_batch[:, :, :, 1] + # Flux at 1st windowed position is index 0 in forcing + flux_batch = forcing_batch[:, :, :, 0] # (N_batch, d_features,) means.append(torch.mean(batch, dim=(1, 2)).cpu()) squares.append( @@ -254,29 +224,34 @@ def main(): means_gathered, squares_gathered = [None] * world_size, [ None ] * world_size - flux_means_gathered, flux_squares_gathered = [None] * world_size, [ - None - ] * world_size + flux_means_gathered, flux_squares_gathered = ( + [None] * world_size, + [None] * world_size, + ) dist.all_gather_object(means_gathered, torch.cat(means, dim=0)) dist.all_gather_object(squares_gathered, torch.cat(squares, dim=0)) dist.all_gather_object(flux_means_gathered, flux_means) dist.all_gather_object(flux_squares_gathered, flux_squares) if rank == 0: - means_gathered, squares_gathered = torch.cat( - means_gathered, dim=0 - ), torch.cat(squares_gathered, dim=0) - flux_means_gathered, flux_squares_gathered = torch.tensor( - flux_means_gathered - ), torch.tensor(flux_squares_gathered) + means_gathered, squares_gathered = ( + torch.cat(means_gathered, dim=0), + torch.cat(squares_gathered, dim=0), + ) + flux_means_gathered, flux_squares_gathered = ( + torch.tensor(flux_means_gathered), + torch.tensor(flux_squares_gathered), + ) original_indices = ds.get_original_indices() - means, squares = [means_gathered[i] for i in original_indices], [ - squares_gathered[i] for i in original_indices - ] - flux_means, flux_squares = [ - flux_means_gathered[i] for i in original_indices - ], [flux_squares_gathered[i] for i in original_indices] + means, squares = ( + [means_gathered[i] for i in original_indices], + [squares_gathered[i] for i in original_indices], + ) + flux_means, flux_squares = ( + [flux_means_gathered[i] for i in original_indices], + [flux_squares_gathered[i] for i in original_indices], + ) else: means = [torch.cat(means, dim=0)] # (N_batch, d_features,) squares = [torch.cat(squares, dim=0)] # (N_batch, d_features,) @@ -299,17 +274,18 @@ def main(): if rank == 0: print("Computing mean and std.-dev. for one-step differences...") ds_standard = WeatherDataset( - config_loader.dataset.name, + datastore=datastore, split="train", - subsample_step=1, - pred_length=63, + ar_steps=ar_steps, standardize=True, + num_past_forcing_steps=0, + num_future_forcing_steps=0, ) # Re-load with standardization if distributed: ds_standard = PaddedWeatherDataset( ds_standard, world_size, - args.batch_size, + batch_size, ) sampler_standard = DistributedSampler( ds_standard, num_replicas=world_size, rank=rank, shuffle=False @@ -318,16 +294,18 @@ def main(): sampler_standard = None loader_standard = torch.utils.data.DataLoader( ds_standard, - args.batch_size, + batch_size, shuffle=False, - num_workers=args.n_workers, + num_workers=n_workers, sampler=sampler_standard, ) - used_subsample_len = (65 // args.step_length) * args.step_length + used_subsample_len = (65 // step_length) * step_length diff_means, diff_squares = [], [] - for init_batch, target_batch, _ in tqdm(loader_standard, disable=rank != 0): + for init_batch, target_batch, _, _ in tqdm( + loader_standard, disable=rank != 0 + ): if distributed: init_batch, target_batch = init_batch.to(device), target_batch.to( device @@ -337,13 +315,13 @@ def main(): # Note: batch contains only 1h-steps stepped_batch = torch.cat( [ - batch[:, ss_i : used_subsample_len : args.step_length] - for ss_i in range(args.step_length) + batch[:, ss_i:used_subsample_len:step_length] + for ss_i in range(step_length) ], dim=0, ) # (N_batch', N_t, N_grid, d_features), - # N_batch' = args.step_length*N_batch + # N_batch' = step_length*N_batch batch_diffs = stepped_batch[:, 1:] - stepped_batch[:, :-1] # (N_batch', N_t-1, N_grid, d_features) diff_means.append(torch.mean(batch_diffs, dim=(1, 2)).cpu()) @@ -353,9 +331,10 @@ def main(): if distributed and world_size > 1: dist.barrier() - diff_means_gathered, diff_squares_gathered = [None] * world_size, [ - None - ] * world_size + diff_means_gathered, diff_squares_gathered = ( + [None] * world_size, + [None] * world_size, + ) dist.all_gather_object( diff_means_gathered, torch.cat(diff_means, dim=0) ) @@ -364,19 +343,21 @@ def main(): ) if rank == 0: - diff_means_gathered, diff_squares_gathered = torch.cat( - diff_means_gathered, dim=0 - ).view(-1, *diff_means[0].shape), torch.cat( - diff_squares_gathered, dim=0 - ).view( - -1, *diff_squares[0].shape + diff_means_gathered, diff_squares_gathered = ( + torch.cat(diff_means_gathered, dim=0).view( + -1, *diff_means[0].shape + ), + torch.cat(diff_squares_gathered, dim=0).view( + -1, *diff_squares[0].shape + ), ) original_indices = ds_standard.get_original_window_indices( - args.step_length + step_length + ) + diff_means, diff_squares = ( + [diff_means_gathered[i] for i in original_indices], + [diff_squares_gathered[i] for i in original_indices], ) - diff_means, diff_squares = [ - diff_means_gathered[i] for i in original_indices - ], [diff_squares_gathered[i] for i in original_indices] diff_means = [torch.cat(diff_means, dim=0)] # (N_batch', d_features,) diff_squares = [torch.cat(diff_squares, dim=0)] # (N_batch', d_features,) @@ -388,5 +369,47 @@ def main(): dist.destroy_process_group() +def cli(): + parser = ArgumentParser(description="Training arguments") + parser.add_argument( + "--datastore_config_path", + type=str, + help="Path to data config file", + ) + parser.add_argument( + "--batch_size", + type=int, + default=32, + help="Batch size when iterating over the dataset", + ) + parser.add_argument( + "--step_length", + type=int, + default=3, + help="Step length in hours to consider single time step (default: 3)", + ) + parser.add_argument( + "--n_workers", + type=int, + default=4, + help="Number of workers in data loader (default: 4)", + ) + parser.add_argument( + "--distributed", + action="store_true", + help="Run the script in distributed mode (default: False)", + ) + args = parser.parse_args() + distributed = bool(args.distributed) + + main( + datastore_config_path=args.datastore_config_path, + batch_size=args.batch_size, + step_length=args.step_length, + n_workers=args.n_workers, + distributed=distributed, + ) + + if __name__ == "__main__": - main() + cli() diff --git a/neural_lam/datastore/npyfilesmeps/config.py b/neural_lam/datastore/npyfilesmeps/config.py new file mode 100644 index 00000000..1a9d7295 --- /dev/null +++ b/neural_lam/datastore/npyfilesmeps/config.py @@ -0,0 +1,66 @@ +# Standard library +from dataclasses import dataclass, field +from typing import Any, Dict, List + +# Third-party +import dataclass_wizard + + +@dataclass +class Projection: + """Represents the projection information for a dataset, including the type + of projection and its parameters. Capable of creating a cartopy.crs + projection object. + + Attributes: + class_name: The class name of the projection, this should be a valid + cartopy.crs class. + kwargs: A dictionary of keyword arguments specific to the projection + type. + + """ + + class_name: str + kwargs: Dict[str, Any] + + +@dataclass +class Dataset: + """Contains information about the dataset, including variable names, units, + and descriptions. + + Attributes: + name: The name of the dataset. + var_names: A list of variable names in the dataset. + var_units: A list of units for each variable. + var_longnames: A list of long, descriptive names for each variable. + num_forcing_features: The number of forcing features in the dataset. + + """ + + name: str + var_names: List[str] + var_units: List[str] + var_longnames: List[str] + num_forcing_features: int + num_timesteps: int + step_length: int + num_ensemble_members: int + remove_state_features_with_index: List[int] = field(default_factory=list) + + +@dataclass +class NpyDatastoreConfig(dataclass_wizard.YAMLWizard): + """Configuration for loading and processing a dataset, including dataset + details, grid shape, and projection information. + + Attributes: + dataset: An instance of Dataset containing details about the dataset. + grid_shape_state: A list representing the shape of the grid state. + projection: An instance of Projection containing projection details. + + """ + + dataset: Dataset + grid_shape_state: List[int] + projection: Projection diff --git a/neural_lam/datastore/npyfilesmeps/store.py b/neural_lam/datastore/npyfilesmeps/store.py new file mode 100644 index 00000000..42e80706 --- /dev/null +++ b/neural_lam/datastore/npyfilesmeps/store.py @@ -0,0 +1,788 @@ +""" +Numpy-files based datastore to support the MEPS example dataset introduced in +neural-lam v0.1.0. +""" + +# Standard library +import functools +import re +import warnings +from functools import cached_property +from pathlib import Path +from typing import List + +# Third-party +import cartopy.crs as ccrs +import dask +import dask.array +import dask.delayed +import numpy as np +import parse +import torch +import xarray as xr +from xarray.core.dataarray import DataArray + +# Local +from ..base import BaseRegularGridDatastore, CartesianGridShape +from .config import NpyDatastoreConfig + +STATE_FILENAME_FORMAT = "nwp_{analysis_time:%Y%m%d%H}_mbr{member_id:03d}.npy" +TOA_SW_DOWN_FLUX_FILENAME_FORMAT = ( + "nwp_toa_downwelling_shortwave_flux_{analysis_time:%Y%m%d%H}.npy" +) +OPEN_WATER_FILENAME_FORMAT = "wtr_{analysis_time:%Y%m%d%H}.npy" + + +def _load_np(fp, add_feature_dim, feature_dim_mask=None): + arr = np.load(fp) + if add_feature_dim: + arr = arr[..., np.newaxis] + if feature_dim_mask is not None: + arr = arr[..., feature_dim_mask] + return arr + + +class NpyFilesDatastoreMEPS(BaseRegularGridDatastore): + __doc__ = f""" + Represents a dataset stored as numpy files on disk. The dataset is assumed + to be stored in a directory structure where each sample is stored in a + separate file. The file-name format is assumed to be + '{STATE_FILENAME_FORMAT}' + + The MEPS dataset is organised into three splits: train, val, and test. Each + split has a set of files which are: + + - `{STATE_FILENAME_FORMAT}`: + The state variables for a forecast started at `analysis_time` with + member id `member_id`. The dimensions of the array are + `[forecast_timestep, y, x, feature]`. + + - `{TOA_SW_DOWN_FLUX_FILENAME_FORMAT}`: + The top-of-atmosphere downwelling shortwave flux at `time`. The + dimensions of the array are `[forecast_timestep, y, x]`. + + - `{OPEN_WATER_FILENAME_FORMAT}`: + The open water fraction at `time`. The dimensions of the array are + `[y, x]`. + + + Folder structure: + + meps_example_reduced + ├── data_config.yaml + ├── samples + │ ├── test + │ │ ├── nwp_2022090100_mbr000.npy + │ │ ├── nwp_2022090100_mbr001.npy + │ │ ├── nwp_2022090112_mbr000.npy + │ │ ├── nwp_2022090112_mbr001.npy + │ │ ├── ... + │ │ ├── nwp_toa_downwelling_shortwave_flux_2022090100.npy + │ │ ├── nwp_toa_downwelling_shortwave_flux_2022090112.npy + │ │ ├── ... + │ │ ├── wtr_2022090100.npy + │ │ ├── wtr_2022090112.npy + │ │ └── ... + │ ├── train + │ │ ├── nwp_2022040100_mbr000.npy + │ │ ├── nwp_2022040100_mbr001.npy + │ │ ├── ... + │ │ ├── nwp_2022040112_mbr000.npy + │ │ ├── nwp_2022040112_mbr001.npy + │ │ ├── ... + │ │ ├── nwp_toa_downwelling_shortwave_flux_2022040100.npy + │ │ ├── nwp_toa_downwelling_shortwave_flux_2022040112.npy + │ │ ├── ... + │ │ ├── wtr_2022040100.npy + │ │ ├── wtr_2022040112.npy + │ │ └── ... + │ └── val + │ ├── nwp_2022060500_mbr000.npy + │ ├── nwp_2022060500_mbr001.npy + │ ├── ... + │ ├── nwp_2022060512_mbr000.npy + │ ├── nwp_2022060512_mbr001.npy + │ ├── ... + │ ├── nwp_toa_downwelling_shortwave_flux_2022060500.npy + │ ├── nwp_toa_downwelling_shortwave_flux_2022060512.npy + │ ├── ... + │ ├── wtr_2022060500.npy + │ ├── wtr_2022060512.npy + │ └── ... + └── static + ├── border_mask.npy + ├── diff_mean.pt + ├── diff_std.pt + ├── flux_stats.pt + ├── grid_features.pt + ├── nwp_xy.npy + ├── parameter_mean.pt + ├── parameter_std.pt + ├── parameter_weights.npy + └── surface_geopotential.npy + + For the MEPS dataset: + N_t' = 65 + N_t = 65//subsample_step (= 21 for 3h steps) + dim_y = 268 + dim_x = 238 + N_grid = 268x238 = 63784 + d_features = 17 (d_features' = 18) + d_forcing = 5 + + For the MEPS reduced dataset: + N_t' = 65 + N_t = 65//subsample_step (= 21 for 3h steps) + dim_y = 134 + dim_x = 119 + N_grid = 134x119 = 15946 + d_features = 8 + d_forcing = 1 + """ + SHORT_NAME = "npyfilesmeps" + + is_ensemble = True + is_forecast = True + + def __init__( + self, + config_path, + ): + """ + Create a new NpyFilesDatastore using the configuration file at the + given path. The config file should be a YAML file and will be loaded + into an instance of the `NpyDatastoreConfig` dataclass. + + Internally, the datastore uses dask.delayed to load the data from the + numpy files, so that the data isn't actually loaded until it's needed. + + Parameters + ---------- + config_path : str + The path to the configuration file for the datastore. + + """ + self._config_path = Path(config_path) + self._root_path = self._config_path.parent + self._config = NpyDatastoreConfig.from_yaml_file(self._config_path) + + self._num_ensemble_members = self.config.dataset.num_ensemble_members + self._num_timesteps = self.config.dataset.num_timesteps + self._step_length = self.config.dataset.step_length + self._remove_state_features_with_index = ( + self.config.dataset.remove_state_features_with_index + ) + + @property + def root_path(self) -> Path: + """ + The root path of the datastore on disk. This is the directory relative + to which graphs and other files can be stored. + + Returns + ------- + Path + The root path of the datastore + + """ + return self._root_path + + @property + def config(self) -> NpyDatastoreConfig: + """The configuration for the datastore. + + Returns + ------- + NpyDatastoreConfig + The configuration for the datastore. + + """ + return self._config + + def get_dataarray(self, category: str, split: str) -> DataArray: + """ + Get the data array for the given category and split of data. If the + category is 'state', the data array will be a concatenation of the data + arrays for all ensemble members. The data will be loaded as a dask + array, so that the data isn't actually loaded until it's needed. + + Parameters + ---------- + category : str + The category of the data to load. One of 'state', 'forcing', or + 'static'. + split : str + The dataset split to load the data for. One of 'train', 'val', or + 'test'. + + Returns + ------- + xr.DataArray + The data array for the given category and split, with dimensions + per category: + state: `[elapsed_forecast_duration, analysis_time, grid_index, + feature, ensemble_member]` + forcing: `[elapsed_forecast_duration, analysis_time, grid_index, + feature]` + static: `[grid_index, feature]` + + """ + if category == "state": + das = [] + # for the state category, we need to load all ensemble members + for member in range(self._num_ensemble_members): + da_member = self._get_single_timeseries_dataarray( + features=self.get_vars_names(category="state"), + split=split, + member=member, + ) + das.append(da_member) + da = xr.concat(das, dim="ensemble_member") + + elif category == "forcing": + # the forcing features are in separate files, so we need to load + # them separately + features = ["toa_downwelling_shortwave_flux", "open_water_fraction"] + das = [ + self._get_single_timeseries_dataarray( + features=[feature], split=split + ) + for feature in features + ] + da = xr.concat(das, dim="feature") + + # add datetime forcing as a feature + # to do this we create a forecast time variable which has the + # dimensions of (analysis_time, elapsed_forecast_duration) with + # values that are the actual forecast time of each time step. By + # calling .chunk({"elapsed_forecast_duration": 1}) this time + # variable is turned into a dask array and so execution of the + # calculation is delayed until the feature values are actually + # used. + da_forecast_time = ( + da.analysis_time + da.elapsed_forecast_duration + ).chunk({"elapsed_forecast_duration": 1}) + da_datetime_forcing_features = self._calc_datetime_forcing_features( + da_time=da_forecast_time + ) + da = xr.concat([da, da_datetime_forcing_features], dim="feature") + + elif category == "static": + # the static features are collected in three files: + # - surface_geopotential + # - border_mask + # - x, y + das = [] + for features in [ + ["surface_geopotential"], + ["border_mask"], + ["x", "y"], + ]: + da = self._get_single_timeseries_dataarray( + features=features, split=split + ) + das.append(da) + da = xr.concat(das, dim="feature") + + else: + raise NotImplementedError(category) + + da = da.rename(dict(feature=f"{category}_feature")) + + # stack the [x, y] dimensions into a `grid_index` dimension + da = self.stack_grid_coords(da) + + # check that we have the right features + actual_features = da[f"{category}_feature"].values.tolist() + expected_features = self.get_vars_names(category=category) + if actual_features != expected_features: + raise ValueError( + f"Expected features {expected_features}, got {actual_features}" + ) + + dim_order = self.expected_dim_order(category=category) + da = da.transpose(*dim_order) + + return da + + def _get_single_timeseries_dataarray( + self, features: List[str], split: str, member: int = None + ) -> DataArray: + """ + Get the data array spanning the complete time series for a given set of + features and split of data. For state features the `member` argument + should be specified to select the ensemble member to load. The data + will be loaded using dask.delayed, so that the data isn't actually + loaded until it's needed. + + Parameters + ---------- + features : List[str] + The list of features to load the data for. For the 'state' + category, this should be the result of + `self.get_vars_names(category="state")`, for the 'forcing' category + this should be the list of forcing features to load, and for the + 'static' category this should be the list of static features to + load. + split : str + The dataset split to load the data for. One of 'train', 'val', or + 'test'. + member : int, optional + The ensemble member to load. Only applicable for the 'state' + category. + + Returns + ------- + xr.DataArray + The data array for the given category and split, with dimensions + `[elapsed_forecast_duration, analysis_time, grid_index, feature]` + for all categories of data + + """ + if ( + set(features).difference(self.get_vars_names(category="static")) + == set() + ): + assert split in ( + "train", + "val", + "test", + None, + ), "Unknown dataset split" + else: + assert split in ( + "train", + "val", + "test", + ), f"Unknown dataset split {split} for features {features}" + + if member is not None and features != self.get_vars_names( + category="state" + ): + raise ValueError( + "Member can only be specified for the 'state' category" + ) + + concat_axis = 0 + + file_params = {} + add_feature_dim = False + features_vary_with_analysis_time = True + feature_dim_mask = None + if features == self.get_vars_names(category="state"): + filename_format = STATE_FILENAME_FORMAT + file_dims = ["elapsed_forecast_duration", "y", "x", "feature"] + # only select one member for now + file_params["member_id"] = member + fp_samples = self.root_path / "samples" / split + if self._remove_state_features_with_index: + n_to_drop = len(self._remove_state_features_with_index) + feature_dim_mask = np.ones( + len(features) + n_to_drop, dtype=bool + ) + feature_dim_mask[self._remove_state_features_with_index] = False + elif features == ["toa_downwelling_shortwave_flux"]: + filename_format = TOA_SW_DOWN_FLUX_FILENAME_FORMAT + file_dims = ["elapsed_forecast_duration", "y", "x", "feature"] + add_feature_dim = True + fp_samples = self.root_path / "samples" / split + elif features == ["open_water_fraction"]: + filename_format = OPEN_WATER_FILENAME_FORMAT + file_dims = ["y", "x", "feature"] + add_feature_dim = True + fp_samples = self.root_path / "samples" / split + elif features == ["surface_geopotential"]: + filename_format = "surface_geopotential.npy" + file_dims = ["y", "x", "feature"] + add_feature_dim = True + features_vary_with_analysis_time = False + # XXX: surface_geopotential is the same for all splits, and so + # saved in static/ + fp_samples = self.root_path / "static" + elif features == ["border_mask"]: + filename_format = "border_mask.npy" + file_dims = ["y", "x", "feature"] + add_feature_dim = True + features_vary_with_analysis_time = False + # XXX: border_mask is the same for all splits, and so saved in + # static/ + fp_samples = self.root_path / "static" + elif features == ["x", "y"]: + filename_format = "nwp_xy.npy" + # NB: for x, y the feature dimension is the first one + file_dims = ["feature", "y", "x"] + features_vary_with_analysis_time = False + # XXX: x, y are the same for all splits, and so saved in static/ + fp_samples = self.root_path / "static" + else: + raise NotImplementedError( + f"Reading of variables set `{features}` not supported" + ) + + if features_vary_with_analysis_time: + dims = ["analysis_time"] + file_dims + else: + dims = file_dims + + coords = {} + arr_shape = [] + + xy = self.get_xy(category="state", stacked=False) + xs = xy[:, :, 0] + ys = xy[:, :, 1] + # Check if x-coordinates are constant along columns + assert np.allclose(xs, xs[:, [0]]), "x-coordinates are not constant" + # Check if y-coordinates are constant along rows + assert np.allclose(ys, ys[[0], :]), "y-coordinates are not constant" + # Extract unique x and y coordinates + x = xs[:, 0] # Unique x-coordinates (changes along the first axis) + y = ys[0, :] # Unique y-coordinates (changes along the second axis) + for d in dims: + if d == "elapsed_forecast_duration": + coord_values = ( + self.step_length + * np.arange(self._num_timesteps) + * np.timedelta64(1, "h") + ) + elif d == "analysis_time": + coord_values = self._get_analysis_times(split=split) + elif d == "y": + coord_values = y + elif d == "x": + coord_values = x + elif d == "feature": + coord_values = features + else: + raise NotImplementedError(f"Dimension {d} not supported") + + coords[d] = coord_values + if d != "analysis_time": + # analysis_time varies across the different files, but not + # within a single file + arr_shape.append(len(coord_values)) + + if features_vary_with_analysis_time: + filepaths = [ + fp_samples + / filename_format.format( + analysis_time=analysis_time, **file_params + ) + for analysis_time in coords["analysis_time"] + ] + else: + filepaths = [fp_samples / filename_format.format(**file_params)] + + # use dask.delayed to load the numpy files, so that loading isn't + # done until the data is actually needed + arrays = [ + dask.array.from_delayed( + dask.delayed(_load_np)( + fp=fp, + add_feature_dim=add_feature_dim, + feature_dim_mask=feature_dim_mask, + ), + shape=arr_shape, + dtype=np.float32, + ) + for fp in filepaths + ] + + # read a single timestep and check the shape + arr0 = arrays[0].compute() + if not list(arr0.shape) == arr_shape: + raise Exception( + f"Expected shape {arr_shape} for a single file, got " + f"{list(arr0.shape)}. Maybe the number of features given " + f"in the datastore config ({features}) is incorrect?" + ) + + if features_vary_with_analysis_time: + arr_all = dask.array.stack(arrays, axis=concat_axis) + else: + arr_all = arrays[0] + + da = xr.DataArray(arr_all, dims=dims, coords=coords) + + return da + + def _get_analysis_times(self, split) -> List[np.datetime64]: + """Get the analysis times for the given split by parsing the filenames + of all the files found for the given split. + + Parameters + ---------- + split : str + The dataset split to get the analysis times for. + + Returns + ------- + List[dt.datetime] + The analysis times for the given split. + + """ + pattern = re.sub(r"{analysis_time:[^}]*}", "*", STATE_FILENAME_FORMAT) + pattern = re.sub(r"{member_id:[^}]*}", "*", pattern) + + sample_dir = self.root_path / "samples" / split + sample_files = sample_dir.glob(pattern) + times = [] + for fp in sample_files: + name_parts = parse.parse(STATE_FILENAME_FORMAT, fp.name) + times.append(name_parts["analysis_time"]) + + if len(times) == 0: + raise ValueError( + f"No files found in {sample_dir} with pattern {pattern}" + ) + + return times + + def _calc_datetime_forcing_features(self, da_time: xr.DataArray): + da_hour_angle = da_time.dt.hour / 12 * np.pi + da_year_angle = da_time.dt.dayofyear / 365 * 2 * np.pi + + da_datetime_forcing = xr.concat( + ( + np.sin(da_hour_angle), + np.cos(da_hour_angle), + np.sin(da_year_angle), + np.cos(da_year_angle), + ), + dim="feature", + ) + da_datetime_forcing = (da_datetime_forcing + 1) / 2 # Rescale to [0,1] + da_datetime_forcing["feature"] = [ + "sin_hour", + "cos_hour", + "sin_year", + "cos_year", + ] + + return da_datetime_forcing + + def get_vars_units(self, category: str) -> List[str]: + if category == "state": + return self.config.dataset.var_units + elif category == "forcing": + return [ + "W/m^2", + "1", + "1", + "1", + "1", + "1", + ] + elif category == "static": + return ["m^2/s^2", "1", "m", "m"] + else: + raise NotImplementedError(f"Category {category} not supported") + + def get_vars_names(self, category: str) -> List[str]: + if category == "state": + return self.config.dataset.var_names + elif category == "forcing": + # XXX: this really shouldn't be hard-coded here, this should be in + # the config + return [ + "toa_downwelling_shortwave_flux", + "open_water_fraction", + "sin_hour", + "cos_hour", + "sin_year", + "cos_year", + ] + elif category == "static": + return ["surface_geopotential", "border_mask", "x", "y"] + else: + raise NotImplementedError(f"Category {category} not supported") + + def get_vars_long_names(self, category: str) -> List[str]: + if category == "state": + return self.config.dataset.var_longnames + else: + # TODO: should we add these? + return self.get_vars_names(category=category) + + def get_num_data_vars(self, category: str) -> int: + return len(self.get_vars_names(category=category)) + + def get_xy(self, category: str, stacked: bool) -> np.ndarray: + """Return the x, y coordinates of the dataset. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + stacked : bool + Whether to stack the x, y coordinates. + + Returns + ------- + np.ndarray + The x, y coordinates of the dataset (with x first then y second), + returned differently based on the value of `stacked`: + - `stacked==True`: shape `(n_grid_points, 2)` where + n_grid_points=N_x*N_y. + - `stacked==False`: shape `(N_x, N_y, 2)` + + """ + + # the array on disk has shape [2, N_y, N_x], where dimension 0 + # contains the [x,y] coordinate pairs for each grid point + arr = np.load(self.root_path / "static" / "nwp_xy.npy") + arr_shape = arr.shape + + assert arr_shape[0] == 2, "Expected 2D array" + grid_shape = self.grid_shape_state + assert arr_shape[1:] == (grid_shape.y, grid_shape.x), "Unexpected shape" + + arr = arr.transpose(2, 1, 0) + + if stacked: + return arr.reshape(-1, 2) + else: + return arr + + @property + def step_length(self) -> int: + """The length of each time step in hours. + + Returns + ------- + int + The length of each time step in hours. + + """ + return self._step_length + + @cached_property + def grid_shape_state(self) -> CartesianGridShape: + """The shape of the cartesian grid for the state variables. + + Returns + ------- + CartesianGridShape + The shape of the cartesian grid for the state variables. + + """ + ny, nx = self.config.grid_shape_state + return CartesianGridShape(x=nx, y=ny) + + @cached_property + def boundary_mask(self) -> xr.DataArray: + """The boundary mask for the dataset. This is a binary mask that is 1 + where the grid cell is on the boundary of the domain, and 0 otherwise. + + Returns + ------- + xr.DataArray + The boundary mask for the dataset, with dimensions `[grid_index]`. + + """ + xy = self.get_xy(category="state", stacked=False) + xs = xy[:, :, 0] + ys = xy[:, :, 1] + # Check if x-coordinates are constant along columns + assert np.allclose(xs, xs[:, [0]]), "x-coordinates are not constant" + # Check if y-coordinates are constant along rows + assert np.allclose(ys, ys[[0], :]), "y-coordinates are not constant" + # Extract unique x and y coordinates + x = xs[:, 0] # Unique x-coordinates (changes along the first axis) + y = ys[0, :] # Unique y-coordinates (changes along the second axis) + values = np.load(self.root_path / "static" / "border_mask.npy") + da_mask = xr.DataArray( + values, dims=["y", "x"], coords=dict(x=x, y=y), name="boundary_mask" + ) + da_mask_stacked_xy = self.stack_grid_coords(da_mask).astype(int) + return da_mask_stacked_xy + + def get_standardization_dataarray(self, category: str) -> xr.Dataset: + """Return the standardization dataarray for the given category. This + should contain a `{category}_mean` and `{category}_std` variable for + each variable in the category. For `category=="state"`, the dataarray + should also contain a `state_diff_mean` and `state_diff_std` variable + for the one- step differences of the state variables. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + + Returns + ------- + xr.Dataset + The standardization dataarray for the given category, with + variables for the mean and standard deviation of the variables (and + differences for state variables). + + """ + + def load_pickled_tensor(fn): + return torch.load( + self.root_path / "static" / fn, weights_only=True + ).numpy() + + mean_diff_values = None + std_diff_values = None + if category == "state": + mean_values = load_pickled_tensor("parameter_mean.pt") + std_values = load_pickled_tensor("parameter_std.pt") + try: + mean_diff_values = load_pickled_tensor("diff_mean.pt") + std_diff_values = load_pickled_tensor("diff_std.pt") + except FileNotFoundError: + warnings.warn(f"Could not load diff mean/std for {category}") + # XXX: this is a hack, but when running + # compute_standardization_stats the diff mean/std files are + # created, but require the std and mean files + mean_diff_values = np.empty_like(mean_values) + std_diff_values = np.empty_like(std_values) + + elif category == "forcing": + flux_stats = load_pickled_tensor("flux_stats.pt") # (2,) + flux_mean, flux_std = flux_stats + # manually add hour sin/cos and day-of-year sin/cos stats for now + # the mean/std for open_water_fraction is hardcoded for now + mean_values = np.array([flux_mean, 0.0, 0.0, 0.0, 0.0, 0.0]) + std_values = np.array([flux_std, 1.0, 1.0, 1.0, 1.0, 1.0]) + + elif category == "static": + da_static = self.get_dataarray(category="static", split="train") + da_static_mean = da_static.mean(dim=["grid_index"]).compute() + da_static_std = da_static.std(dim=["grid_index"]).compute() + mean_values = da_static_mean.values + std_values = da_static_std.values + else: + raise NotImplementedError(f"Category {category} not supported") + + feature_dim_name = f"{category}_feature" + variables = { + f"{category}_mean": (feature_dim_name, mean_values), + f"{category}_std": (feature_dim_name, std_values), + } + + if mean_diff_values is not None and std_diff_values is not None: + variables["state_diff_mean"] = (feature_dim_name, mean_diff_values) + variables["state_diff_std"] = (feature_dim_name, std_diff_values) + + ds_norm = xr.Dataset( + variables, + coords={feature_dim_name: self.get_vars_names(category=category)}, + ) + + return ds_norm + + @functools.cached_property + def coords_projection(self) -> ccrs.Projection: + """The projection of the spatial coordinates. + + Returns + ------- + ccrs.Projection + The projection of the spatial coordinates. + + """ + proj_class_name = self.config.projection.class_name + ProjectionClass = getattr(ccrs, proj_class_name) + proj_params = self.config.projection.kwargs + return ProjectionClass(**proj_params) diff --git a/neural_lam/datastore/plot_example.py b/neural_lam/datastore/plot_example.py new file mode 100644 index 00000000..2d477271 --- /dev/null +++ b/neural_lam/datastore/plot_example.py @@ -0,0 +1,189 @@ +# Third-party +import matplotlib.pyplot as plt + +# Local +from . import DATASTORES, init_datastore + + +def plot_example_from_datastore( + category, + datastore, + col_dim, + split="train", + standardize=True, + selection={}, + index_selection={}, +): + """ + Create a plot of the data from the datastore. + + Parameters + ---------- + category : str + Category of data to plot, one of "state", "forcing", or "static". + datastore : Datastore + Datastore to retrieve data from. + col_dim : str + Dimension to use for plot facetting into columns. This can be a + template string that can be formatted with the category name. + split : str, optional + Split of data to plot, by default "train". + standardize : bool, optional + Whether to standardize the data before plotting, by default True. + selection : dict, optional + Selections to apply to the dataarray, for example + `time="1990-09-03T0:00" would select this single timestep, by default + {}. + index_selection: dict, optional + Index-based selection to apply to the dataarray, for example + `time=0` would select the first item along the `time` dimension, by + default {}. + + Returns + ------- + Figure + Matplotlib figure object. + """ + da = datastore.get_dataarray(category=category, split=split) + if standardize: + da_stats = datastore.get_standardization_dataarray(category=category) + da = (da - da_stats[f"{category}_mean"]) / da_stats[f"{category}_std"] + da = datastore.unstack_grid_coords(da) + + if len(selection) > 0: + da = da.sel(**selection) + if len(index_selection) > 0: + da = da.isel(**index_selection) + + col = col_dim.format(category=category) + + # check that the column dimension exists and that the resulting shape is 2D + if col not in da.dims: + raise ValueError(f"Column dimension {col} not found in dataarray.") + da_col_item = da.isel({col: 0}).squeeze() + if not len(da_col_item.shape) == 2: + raise ValueError( + f"Column dimension {col} and selection {selection} does not " + "result in a 2D dataarray. Please adjust the column dimension " + "and/or selection. Instead the resulting dataarray is:\n" + f"{da_col_item}" + ) + + crs = datastore.coords_projection + col_wrap = min(4, int(da[col].count())) + g = da.plot( + x="x", + y="y", + col=col, + col_wrap=col_wrap, + subplot_kws={"projection": crs}, + transform=crs, + size=4, + ) + for ax in g.axes.flat: + ax.coastlines() + ax.gridlines(draw_labels=["left", "bottom"]) + ax.set_extent(datastore.get_xy_extent(category=category), crs=crs) + + return g.fig + + +if __name__ == "__main__": + # Standard library + import argparse + + def _parse_dict(arg_str): + key, value = arg_str.split("=") + for op in [int, float]: + try: + value = op(value) + break + except ValueError: + pass + return key, value + + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "--datastore_kind", + type=str, + choices=DATASTORES.keys(), + default="mdp", + help="Kind of datastore to use", + ) + parser.add_argument( + "--datastore_config_path", + type=str, + default=None, + help="Path for the datastore config", + ) + parser.add_argument( + "--category", + default="state", + help="Category of data to plot", + choices=["state", "forcing", "static"], + ) + parser.add_argument( + "--split", default="train", help="Split of data to plot" + ) + parser.add_argument( + "--col-dim", + default="{category}_feature", + help="Dimension to use for plot facetting into columns", + ) + parser.add_argument( + "--disable-standardize", + dest="standardize", + action="store_false", + help="Disable standardization of data", + ) + # add the ability to create dictionary of kwargs + parser.add_argument( + "--selection", + nargs="+", + default=[], + type=_parse_dict, + help="Selections to apply to the dataarray, for example " + "`time='1990-09-03T0:00' would select this single timestep", + ) + parser.add_argument( + "--index-selection", + nargs="+", + default=[], + type=_parse_dict, + help="Index-based selection to apply to the dataarray, for example " + "`time=0` would select the first item along the `time` dimension", + ) + args = parser.parse_args() + + assert ( + args.datastore_config_path is not None + ), "Specify your datastore config with --datastore_config_path" + + selection = dict(args.selection) + index_selection = dict(args.index_selection) + + # check that column dimension is not in the selection + if args.col_dim.format(category=args.category) in selection: + raise ValueError( + f"Column dimension {args.col_dim.format(category=args.category)} " + f"cannot be in the selection ({selection}). Please adjust the " + "column dimension and/or selection." + ) + + datastore = init_datastore( + datastore_kind=args.datastore_kind, + config_path=args.datastore_config_path, + ) + + plot_example_from_datastore( + args.category, + datastore, + split=args.split, + col_dim=args.col_dim, + standardize=args.standardize, + selection=selection, + index_selection=index_selection, + ) + plt.show() diff --git a/neural_lam/loss_weighting.py b/neural_lam/loss_weighting.py new file mode 100644 index 00000000..c842b202 --- /dev/null +++ b/neural_lam/loss_weighting.py @@ -0,0 +1,106 @@ +# Local +from .config import ( + ManualStateFeatureWeighting, + NeuralLAMConfig, + UniformFeatureWeighting, +) +from .datastore.base import BaseDatastore + + +def get_manual_state_feature_weights( + weighting_config: ManualStateFeatureWeighting, datastore: BaseDatastore +) -> list[float]: + """ + Return the state feature weights as a list of floats in the order of the + state features in the datastore. + + Parameters + ---------- + weighting_config : ManualStateFeatureWeighting + Configuration object containing the manual state feature weights. + datastore : BaseDatastore + Datastore object containing the state features. + + Returns + ------- + list[float] + List of floats containing the state feature weights. + """ + state_feature_names = datastore.get_vars_names(category="state") + feature_weight_names = weighting_config.weights.keys() + + # Check that the state_feature_weights dictionary has a weight for each + # state feature in the datastore. + if set(feature_weight_names) != set(state_feature_names): + additional_features = set(feature_weight_names) - set( + state_feature_names + ) + missing_features = set(state_feature_names) - set(feature_weight_names) + raise ValueError( + f"State feature weights must be provided for each state feature" + f"in the datastore ({state_feature_names}). {missing_features}" + " are missing and weights are defined for the features " + f"{additional_features} which are not in the datastore." + ) + + state_feature_weights = [ + weighting_config.weights[feature] for feature in state_feature_names + ] + return state_feature_weights + + +def get_uniform_state_feature_weights(datastore: BaseDatastore) -> list[float]: + """ + Return the state feature weights as a list of floats in the order of the + state features in the datastore. + + The weights are uniform, i.e. 1.0/n_features for each feature. + + Parameters + ---------- + datastore : BaseDatastore + Datastore object containing the state features. + + Returns + ------- + list[float] + List of floats containing the state feature weights. + """ + state_feature_names = datastore.get_vars_names(category="state") + n_features = len(state_feature_names) + return [1.0 / n_features] * n_features + + +def get_state_feature_weighting( + config: NeuralLAMConfig, datastore: BaseDatastore +) -> list[float]: + """ + Return the state feature weights as a list of floats in the order of the + state features in the datastore. The weights are determined based on the + configuration in the NeuralLAMConfig object. + + Parameters + ---------- + config : NeuralLAMConfig + Configuration object for neural-lam. + datastore : BaseDatastore + Datastore object containing the state features. + + Returns + ------- + list[float] + List of floats containing the state feature weights. + """ + weighting_config = config.training.state_feature_weighting + + if isinstance(weighting_config, ManualStateFeatureWeighting): + weights = get_manual_state_feature_weights(weighting_config, datastore) + elif isinstance(weighting_config, UniformFeatureWeighting): + weights = get_uniform_state_feature_weights(datastore) + else: + raise NotImplementedError( + "Unsupported state feature weighting configuration: " + f"{weighting_config}" + ) + + return weights diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index e94de8c6..bc4c6719 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -9,7 +9,10 @@ import wandb # Local -from .. import config, metrics, utils, vis +from .. import metrics, vis +from ..config import NeuralLAMConfig +from ..datastore import BaseDatastore +from ..loss_weighting import get_state_feature_weighting class ARModel(pl.LightningModule): @@ -21,35 +24,78 @@ class ARModel(pl.LightningModule): # pylint: disable=arguments-differ # Disable to override args/kwargs from superclass - def __init__(self, args): + def __init__( + self, + args, + config: NeuralLAMConfig, + datastore: BaseDatastore, + ): super().__init__() - self.save_hyperparameters() + self.save_hyperparameters(ignore=["datastore"]) self.args = args - self.config_loader = config.Config.from_file(args.data_config) + self._datastore = datastore + num_state_vars = datastore.get_num_data_vars(category="state") + num_forcing_vars = datastore.get_num_data_vars(category="forcing") + da_static_features = datastore.get_dataarray( + category="static", split=None + ) + da_state_stats = datastore.get_standardization_dataarray( + category="state" + ) + da_boundary_mask = datastore.boundary_mask + num_past_forcing_steps = args.num_past_forcing_steps + num_future_forcing_steps = args.num_future_forcing_steps + + # Load static features for grid/data, NB: self.predict_step assumes + # dimension order to be (grid_index, static_feature) + arr_static = da_static_features.transpose( + "grid_index", "static_feature" + ).values + self.register_buffer( + "grid_static_features", + torch.tensor(arr_static, dtype=torch.float32), + persistent=False, + ) + + state_stats = { + "state_mean": torch.tensor( + da_state_stats.state_mean.values, dtype=torch.float32 + ), + "state_std": torch.tensor( + da_state_stats.state_std.values, dtype=torch.float32 + ), + "diff_mean": torch.tensor( + da_state_stats.state_diff_mean.values, dtype=torch.float32 + ), + "diff_std": torch.tensor( + da_state_stats.state_diff_std.values, dtype=torch.float32 + ), + } + + for key, val in state_stats.items(): + self.register_buffer(key, val, persistent=False) - # Load static features for grid/data - static_data_dict = utils.load_static_data( - self.config_loader.dataset.name + state_feature_weights = get_state_feature_weighting( + config=config, datastore=datastore + ) + self.feature_weights = torch.tensor( + state_feature_weights, dtype=torch.float32 ) - for static_data_name, static_data_tensor in static_data_dict.items(): - self.register_buffer( - static_data_name, static_data_tensor, persistent=False - ) # Double grid output dim. to also output std.-dev. self.output_std = bool(args.output_std) if self.output_std: # Pred. dim. in grid cell - self.grid_output_dim = 2 * self.config_loader.num_data_vars() + self.grid_output_dim = 2 * num_state_vars else: # Pred. dim. in grid cell - self.grid_output_dim = self.config_loader.num_data_vars() + self.grid_output_dim = num_state_vars # Store constant per-variable std.-dev. weighting - # Note that this is the inverse of the multiplicative weighting + # NOTE that this is the inverse of the multiplicative weighting # in wMSE/wMAE self.register_buffer( "per_var_std", - self.step_diff_std / torch.sqrt(self.param_weights), + self.diff_std / torch.sqrt(self.feature_weights), persistent=False, ) @@ -58,21 +104,29 @@ def __init__(self, args): self.num_grid_nodes, grid_static_dim, ) = self.grid_static_features.shape + self.grid_dim = ( - 2 * self.config_loader.num_data_vars() + 2 * self.grid_output_dim + grid_static_dim - + self.config_loader.dataset.num_forcing_features + + num_forcing_vars + * (num_past_forcing_steps + num_future_forcing_steps + 1) ) # Instantiate loss function self.loss = metrics.get_metric(args.loss) + boundary_mask = torch.tensor( + da_boundary_mask.values, dtype=torch.float32 + ).unsqueeze( + 1 + ) # add feature dim + + self.register_buffer("boundary_mask", boundary_mask, persistent=False) # Pre-compute interior mask for use in loss function self.register_buffer( - "interior_mask", 1.0 - self.border_mask, persistent=False + "interior_mask", 1.0 - self.boundary_mask, persistent=False ) # (num_grid_nodes, 1), 1 for non-border - self.step_length = args.step_length # Number of hours per pred. step self.val_metrics = { "mse": [], } @@ -116,18 +170,18 @@ def expand_to_batch(x, batch_size): def predict_step(self, prev_state, prev_prev_state, forcing): """ Step state one step ahead using prediction model, X_{t-1}, X_t -> X_t+1 - prev_state: (B, num_grid_nodes, feature_dim), X_t - prev_prev_state: (B, num_grid_nodes, feature_dim), X_{t-1} - forcing: (B, num_grid_nodes, forcing_dim) + prev_state: (B, num_grid_nodes, feature_dim), X_t prev_prev_state: (B, + num_grid_nodes, feature_dim), X_{t-1} forcing: (B, num_grid_nodes, + forcing_dim) """ raise NotImplementedError("No prediction step implemented") def unroll_prediction(self, init_states, forcing_features, true_states): """ Roll out prediction taking multiple autoregressive steps with model - init_states: (B, 2, num_grid_nodes, d_f) - forcing_features: (B, pred_steps, num_grid_nodes, d_static_f) - true_states: (B, pred_steps, num_grid_nodes, d_f) + init_states: (B, 2, num_grid_nodes, d_f) forcing_features: (B, + pred_steps, num_grid_nodes, d_static_f) true_states: (B, pred_steps, + num_grid_nodes, d_f) """ prev_prev_state = init_states[:, 0] prev_state = init_states[:, 1] @@ -142,12 +196,12 @@ def unroll_prediction(self, init_states, forcing_features, true_states): pred_state, pred_std = self.predict_step( prev_state, prev_prev_state, forcing ) - # state: (B, num_grid_nodes, d_f) - # pred_std: (B, num_grid_nodes, d_f) or None + # state: (B, num_grid_nodes, d_f) pred_std: (B, num_grid_nodes, + # d_f) or None # Overwrite border with true state new_state = ( - self.border_mask * border_state + self.boundary_mask * border_state + self.interior_mask * pred_state ) @@ -173,32 +227,27 @@ def unroll_prediction(self, init_states, forcing_features, true_states): def common_step(self, batch): """ - Predict on single batch - batch consists of: - init_states: (B, 2, num_grid_nodes, d_features) - target_states: (B, pred_steps, num_grid_nodes, d_features) - forcing_features: (B, pred_steps, num_grid_nodes, d_forcing), + Predict on single batch batch consists of: init_states: (B, 2, + num_grid_nodes, d_features) target_states: (B, pred_steps, + num_grid_nodes, d_features) forcing_features: (B, pred_steps, + num_grid_nodes, d_forcing), where index 0 corresponds to index 1 of init_states """ - ( - init_states, - target_states, - forcing_features, - ) = batch + (init_states, target_states, forcing_features, batch_times) = batch prediction, pred_std = self.unroll_prediction( init_states, forcing_features, target_states ) # (B, pred_steps, num_grid_nodes, d_f) - # prediction: (B, pred_steps, num_grid_nodes, d_f) - # pred_std: (B, pred_steps, num_grid_nodes, d_f) or (d_f,) + # prediction: (B, pred_steps, num_grid_nodes, d_f) pred_std: (B, + # pred_steps, num_grid_nodes, d_f) or (d_f,) - return prediction, target_states, pred_std + return prediction, target_states, pred_std, batch_times def training_step(self, batch): """ Train on single batch """ - prediction, target, pred_std = self.common_step(batch) + prediction, target, pred_std, _ = self.common_step(batch) # Compute loss batch_loss = torch.mean( @@ -209,14 +258,19 @@ def training_step(self, batch): log_dict = {"train_loss": batch_loss} self.log_dict( - log_dict, prog_bar=True, on_step=True, on_epoch=True, sync_dist=True + log_dict, + prog_bar=True, + on_step=True, + on_epoch=True, + sync_dist=True, + batch_size=batch[0].shape[0], ) return batch_loss def all_gather_cat(self, tensor_to_gather): """ - Gather tensors across all ranks, and concatenate across dim. 0 - (instead of stacking in new dim. 0) + Gather tensors across all ranks, and concatenate across dim. 0 (instead + of stacking in new dim. 0) tensor_to_gather: (d1, d2, ...), distributed over K ranks @@ -230,7 +284,7 @@ def validation_step(self, batch, batch_idx): """ Run validation on single batch """ - prediction, target, pred_std = self.common_step(batch) + prediction, target, pred_std, _ = self.common_step(batch) time_step_loss = torch.mean( self.loss( @@ -244,10 +298,15 @@ def validation_step(self, batch, batch_idx): val_log_dict = { f"val_loss_unroll{step}": time_step_loss[step - 1] for step in self.args.val_steps_to_log + if step <= len(time_step_loss) } val_log_dict["val_mean_loss"] = mean_loss self.log_dict( - val_log_dict, on_step=False, on_epoch=True, sync_dist=True + val_log_dict, + on_step=False, + on_epoch=True, + sync_dist=True, + batch_size=batch[0].shape[0], ) # Store MSEs @@ -276,9 +335,10 @@ def test_step(self, batch, batch_idx): """ Run test on single batch """ - prediction, target, pred_std = self.common_step(batch) - # prediction: (B, pred_steps, num_grid_nodes, d_f) - # pred_std: (B, pred_steps, num_grid_nodes, d_f) or (d_f,) + # TODO Here batch_times can be used for plotting routines + prediction, target, pred_std, batch_times = self.common_step(batch) + # prediction: (B, pred_steps, num_grid_nodes, d_f) pred_std: (B, + # pred_steps, num_grid_nodes, d_f) or (d_f,) time_step_loss = torch.mean( self.loss( @@ -296,13 +356,16 @@ def test_step(self, batch, batch_idx): test_log_dict["test_mean_loss"] = mean_loss self.log_dict( - test_log_dict, on_step=False, on_epoch=True, sync_dist=True + test_log_dict, + on_step=False, + on_epoch=True, + sync_dist=True, + batch_size=batch[0].shape[0], ) - # Compute all evaluation metrics for error maps - # Note: explicitly list metrics here, as test_metrics can contain - # additional ones, computed differently, but that should be aggregated - # on_test_epoch_end + # Compute all evaluation metrics for error maps Note: explicitly list + # metrics here, as test_metrics can contain additional ones, computed + # differently, but that should be aggregated on_test_epoch_end for metric_name in ("mse", "mae"): metric_func = metrics.get_metric(metric_name) batch_metric_vals = metric_func( @@ -338,7 +401,8 @@ def test_step(self, batch, batch_idx): ): # Need to plot more example predictions n_additional_examples = min( - prediction.shape[0], self.n_example_pred - self.plotted_examples + prediction.shape[0], + self.n_example_pred - self.plotted_examples, ) self.plot_examples( @@ -349,19 +413,19 @@ def plot_examples(self, batch, n_examples, prediction=None): """ Plot the first n_examples forecasts from batch - batch: batch with data to plot corresponding forecasts for - n_examples: number of forecasts to plot - prediction: (B, pred_steps, num_grid_nodes, d_f), existing prediction. + batch: batch with data to plot corresponding forecasts for n_examples: + number of forecasts to plot prediction: (B, pred_steps, num_grid_nodes, + d_f), existing prediction. Generate if None. """ if prediction is None: - prediction, target = self.common_step(batch) + prediction, target, _, _ = self.common_step(batch) target = batch[1] # Rescale to original data scale - prediction_rescaled = prediction * self.data_std + self.data_mean - target_rescaled = target * self.data_std + self.data_mean + prediction_rescaled = prediction * self.state_std + self.state_mean + target_rescaled = target * self.state_std + self.state_mean # Iterate over the examples for pred_slice, target_slice in zip( @@ -395,18 +459,17 @@ def plot_examples(self, batch, n_examples, prediction=None): # Create one figure per variable at this time step var_figs = [ vis.plot_prediction( - pred_t[:, var_i], - target_t[:, var_i], - self.interior_mask[:, 0], - self.config_loader, + pred=pred_t[:, var_i], + target=target_t[:, var_i], + datastore=self._datastore, title=f"{var_name} ({var_unit}), " - f"t={t_i} ({self.step_length * t_i} h)", + f"t={t_i} ({self._datastore.step_length * t_i} h)", vrange=var_vrange, ) for var_i, (var_name, var_unit, var_vrange) in enumerate( zip( - self.config_loader.dataset.var_names, - self.config_loader.dataset.var_units, + self._datastore.get_vars_names("state"), + self._datastore.get_vars_units("state"), var_vranges, ) ) @@ -417,7 +480,7 @@ def plot_examples(self, batch, n_examples, prediction=None): { f"{var_name}_example_{example_i}": wandb.Image(fig) for var_name, fig in zip( - self.config_loader.dataset.var_names, var_figs + self._datastore.get_vars_names("state"), var_figs ) } ) @@ -441,19 +504,19 @@ def plot_examples(self, batch, n_examples, prediction=None): def create_metric_log_dict(self, metric_tensor, prefix, metric_name): """ - Put together a dict with everything to log for one metric. - Also saves plots as pdf and csv if using test prefix. + Put together a dict with everything to log for one metric. Also saves + plots as pdf and csv if using test prefix. metric_tensor: (pred_steps, d_f), metric values per time and variable - prefix: string, prefix to use for logging - metric_name: string, name of the metric + prefix: string, prefix to use for logging metric_name: string, name of + the metric - Return: - log_dict: dict with everything to log for given metric + Return: log_dict: dict with everything to log for given metric """ log_dict = {} metric_fig = vis.plot_error_map( - metric_tensor, self.config_loader, step_length=self.step_length + errors=metric_tensor, + datastore=self._datastore, ) full_log_name = f"{prefix}_{metric_name}" log_dict[full_log_name] = wandb.Image(metric_fig) @@ -471,17 +534,13 @@ def create_metric_log_dict(self, metric_tensor, prefix, metric_name): ) # Check if metrics are watched, log exact values for specific vars + var_names = self._datastore.get_vars_names(category="state") if full_log_name in self.args.metrics_watch: for var_i, timesteps in self.args.var_leads_metrics_watch.items(): - var = self.config_loader.dataset.var_names[var_i] - log_dict.update( - { - f"{full_log_name}_{var}_step_{step}": metric_tensor[ - step - 1, var_i - ] # 1-indexed in data_config - for step in timesteps - } - ) + var_name = var_names[var_i] + for step in timesteps: + key = f"{full_log_name}_{var_name}_step_{step}" + log_dict[key] = metric_tensor[step - 1, var_i] return log_dict @@ -508,8 +567,8 @@ def aggregate_and_plot_metrics(self, metrics_dict, prefix): metric_tensor_averaged = torch.sqrt(metric_tensor_averaged) metric_name = metric_name.replace("mse", "rmse") - # Note: we here assume rescaling for all metrics is linear - metric_rescaled = metric_tensor_averaged * self.data_std + # NOTE: we here assume rescaling for all metrics is linear + metric_rescaled = metric_tensor_averaged * self.state_std # (pred_steps, d_f) log_dict.update( self.create_metric_log_dict( @@ -523,8 +582,8 @@ def aggregate_and_plot_metrics(self, metrics_dict, prefix): def on_test_epoch_end(self): """ - Compute test metrics and make plots at the end of test epoch. - Will gather stored tensors and perform plotting and logging on rank 0. + Compute test metrics and make plots at the end of test epoch. Will + gather stored tensors and perform plotting and logging on rank 0. """ # Create error maps for all test metrics self.aggregate_and_plot_metrics(self.test_metrics, prefix="test") @@ -540,10 +599,10 @@ def on_test_epoch_end(self): loss_map_figs = [ vis.plot_spatial_error( - loss_map, - self.interior_mask[:, 0], - self.config_loader, - title=f"Test loss, t={t_i} ({self.step_length * t_i} h)", + error=loss_map, + datastore=self._datastore, + title=f"Test loss, t={t_i} " + f"({self._datastore.step_length * t_i} h)", ) for t_i, loss_map in zip( self.args.val_steps_to_log, mean_spatial_loss @@ -557,7 +616,7 @@ def on_test_epoch_end(self): # also make without title and save as pdf pdf_loss_map_figs = [ vis.plot_spatial_error( - loss_map, self.interior_mask[:, 0], self.config_loader + error=loss_map, datastore=self._datastore ) for loss_map in mean_spatial_loss ] diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index 99629073..6233b4d1 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -3,6 +3,8 @@ # Local from .. import utils +from ..config import NeuralLAMConfig +from ..datastore import BaseDatastore from ..interaction_net import InteractionNet from .ar_model import ARModel @@ -13,13 +15,16 @@ class BaseGraphModel(ARModel): the encode-process-decode idea. """ - def __init__(self, args): - super().__init__(args) + def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): + super().__init__(args, config=config, datastore=datastore) # Load graph with static features # NOTE: (IMPORTANT!) mesh nodes MUST have the first # num_mesh_nodes indices, - self.hierarchical, graph_ldict = utils.load_graph(args.graph) + graph_dir_path = datastore.root_path / "graph" / args.graph + self.hierarchical, graph_ldict = utils.load_graph( + graph_dir_path=graph_dir_path + ) for name, attr_value in graph_ldict.items(): # Make BufferLists module members and register tensors as buffers if isinstance(attr_value, torch.Tensor): @@ -157,7 +162,7 @@ def predict_step(self, prev_state, prev_prev_state, forcing): pred_delta_mean, pred_std_raw = net_output.chunk( 2, dim=-1 ) # both (B, num_grid_nodes, d_f) - # Note: The predicted std. is not scaled in any way here + # NOTE: The predicted std. is not scaled in any way here # linter for some reason does not think softplus is callable # pylint: disable-next=not-callable pred_std = torch.nn.functional.softplus(pred_std_raw) @@ -166,9 +171,7 @@ def predict_step(self, prev_state, prev_prev_state, forcing): pred_std = None # Rescale with one-step difference statistics - rescaled_delta_mean = ( - pred_delta_mean * self.step_diff_std + self.step_diff_mean - ) + rescaled_delta_mean = pred_delta_mean * self.diff_std + self.diff_mean # Residual connection for full state return prev_state + rescaled_delta_mean, pred_std diff --git a/neural_lam/models/base_hi_graph_model.py b/neural_lam/models/base_hi_graph_model.py index a2ebcc1b..8ec46b4f 100644 --- a/neural_lam/models/base_hi_graph_model.py +++ b/neural_lam/models/base_hi_graph_model.py @@ -3,6 +3,8 @@ # Local from .. import utils +from ..config import NeuralLAMConfig +from ..datastore import BaseDatastore from ..interaction_net import InteractionNet from .base_graph_model import BaseGraphModel @@ -12,8 +14,8 @@ class BaseHiGraphModel(BaseGraphModel): Base class for hierarchical graph models. """ - def __init__(self, args): - super().__init__(args) + def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): + super().__init__(args, config=config, datastore=datastore) # Track number of nodes, edges on each level # Flatten lists for efficient embedding diff --git a/neural_lam/models/graph_lam.py b/neural_lam/models/graph_lam.py index d73f7ad8..68b7d01e 100644 --- a/neural_lam/models/graph_lam.py +++ b/neural_lam/models/graph_lam.py @@ -3,6 +3,8 @@ # Local from .. import utils +from ..config import NeuralLAMConfig +from ..datastore import BaseDatastore from ..interaction_net import InteractionNet from .base_graph_model import BaseGraphModel @@ -15,8 +17,8 @@ class GraphLAM(BaseGraphModel): Oskarsson et al. (2023). """ - def __init__(self, args): - super().__init__(args) + def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): + super().__init__(args, config=config, datastore=datastore) assert ( not self.hierarchical diff --git a/neural_lam/models/hi_lam.py b/neural_lam/models/hi_lam.py index 4f3aec05..c340c95d 100644 --- a/neural_lam/models/hi_lam.py +++ b/neural_lam/models/hi_lam.py @@ -2,6 +2,8 @@ from torch import nn # Local +from ..config import NeuralLAMConfig +from ..datastore import BaseDatastore from ..interaction_net import InteractionNet from .base_hi_graph_model import BaseHiGraphModel @@ -13,8 +15,8 @@ class HiLAM(BaseHiGraphModel): The Hi-LAM model from Oskarsson et al. (2023) """ - def __init__(self, args): - super().__init__(args) + def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): + super().__init__(args, config=config, datastore=datastore) # Make down GNNs, both for down edges and same level self.mesh_down_gnns = nn.ModuleList( @@ -200,5 +202,6 @@ def hi_processor_step( up_same_gnns, ) - # Note: We return all, even though only down edges really are used later + # NOTE: We return all, even though only down edges really are used + # later return mesh_rep_levels, mesh_same_rep, mesh_up_rep, mesh_down_rep diff --git a/neural_lam/models/hi_lam_parallel.py b/neural_lam/models/hi_lam_parallel.py index b40a9424..a0a84d29 100644 --- a/neural_lam/models/hi_lam_parallel.py +++ b/neural_lam/models/hi_lam_parallel.py @@ -3,6 +3,8 @@ import torch_geometric as pyg # Local +from ..config import NeuralLAMConfig +from ..datastore import BaseDatastore from ..interaction_net import InteractionNet from .base_hi_graph_model import BaseHiGraphModel @@ -16,8 +18,8 @@ class HiLAMParallel(BaseHiGraphModel): of Hi-LAM. """ - def __init__(self, args): - super().__init__(args) + def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): + super().__init__(args, config=config, datastore=datastore) # Processor GNNs # Create the complete edge_index combining all edges for processing @@ -92,5 +94,6 @@ def hi_processor_step( self.num_levels + (self.num_levels - 1) : ] # Last are down edges - # Note: We return all, even though only down edges really are used later + # TODO: We return all, even though only down edges really are used + # later return mesh_rep_levels, mesh_same_rep, mesh_up_rep, mesh_down_rep diff --git a/plot_graph.py b/neural_lam/plot_graph.py similarity index 88% rename from plot_graph.py rename to neural_lam/plot_graph.py index e47e62c0..999c8e53 100644 --- a/plot_graph.py +++ b/neural_lam/plot_graph.py @@ -1,4 +1,5 @@ # Standard library +import os from argparse import ArgumentParser # Third-party @@ -6,8 +7,9 @@ import plotly.graph_objects as go import torch_geometric as pyg -# First-party -from neural_lam import config, utils +# Local +from . import utils +from .config import load_config_and_datastore MESH_HEIGHT = 0.1 MESH_LEVEL_DIST = 0.2 @@ -15,15 +17,13 @@ def main(): - """ - Plot graph structure in 3D using plotly - """ + """Plot graph structure in 3D using plotly.""" parser = ArgumentParser(description="Plot graph") parser.add_argument( - "--data_config", + "--datastore_config_path", type=str, - default="neural_lam/data_config.yaml", - help="Path to data config file (default: neural_lam/data_config.yaml)", + default="tests/datastore_examples/mdp/config.yaml", + help="Path for the datastore config", ) parser.add_argument( "--graph", @@ -43,10 +43,17 @@ def main(): ) args = parser.parse_args() - config_loader = config.Config.from_file(args.data_config) + _, datastore = load_config_and_datastore( + config_path=args.datastore_config_path + ) + + xy = datastore.get_xy("state", stacked=True) # (N_grid, 2) + pos_max = np.max(np.abs(xy)) + grid_pos = xy / pos_max # Divide by maximum coordinate # Load graph data - hierarchical, graph_ldict = utils.load_graph(args.graph) + graph_dir_path = os.path.join(datastore.root_path, "graph", args.graph) + hierarchical, graph_ldict = utils.load_graph(graph_dir_path=graph_dir_path) (g2m_edge_index, m2g_edge_index, m2m_edge_index,) = ( graph_ldict["g2m_edge_index"], graph_ldict["m2g_edge_index"], @@ -58,12 +65,6 @@ def main(): ) mesh_static_features = graph_ldict["mesh_static_features"] - grid_static_features = utils.load_static_data(config_loader.dataset.name)[ - "grid_static_features" - ] - - # Extract values needed, turn to numpy - grid_pos = grid_static_features[:, :2].numpy() # Add in z-dimension z_grid = GRID_HEIGHT * np.ones((grid_pos.shape[0],)) grid_pos = np.concatenate( diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index c1a6cb89..74146c89 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -8,10 +8,13 @@ import pytorch_lightning as pl import torch from lightning_fabric.utilities import seed +from loguru import logger # Local -from . import WeatherDataset, config, utils +from . import utils +from .config import load_config_and_datastore from .models import GraphLAM, HiLAM, HiLAMParallel +from .weather_dataset import WeatherDataModule MODELS = { "graph_lam": GraphLAM, @@ -20,18 +23,16 @@ } +@logger.catch def main(input_args=None): - """ - Main function for training and evaluating models - """ + """Main function for training and evaluating models.""" parser = ArgumentParser( description="Train or evaluate NeurWP models for LAM" ) parser.add_argument( - "--data_config", + "--config_path", type=str, - default="neural_lam/data_config.yaml", - help="Path to data config file (default: neural_lam/data_config.yaml)", + help="Path to the configuration for neural-lam", ) parser.add_argument( "--model", @@ -39,17 +40,11 @@ def main(input_args=None): default="graph_lam", help="Model architecture to train/evaluate (default: graph_lam)", ) - parser.add_argument( - "--subset_ds", - action="store_true", - help="Use only a small subset of the dataset, for debugging" - "(default: false)", - ) parser.add_argument( "--seed", type=int, default=42, help="random seed (default: 42)" ) parser.add_argument( - "--n_workers", + "--num_workers", type=int, default=4, help="Number of workers in data loader (default: 4)", @@ -124,31 +119,18 @@ def main(input_args=None): # Training options parser.add_argument( - "--ar_steps", + "--ar_steps_train", type=int, default=1, - help="Number of steps to unroll prediction for in loss (1-19) " + help="Number of steps to unroll prediction for in loss function " "(default: 1)", ) - parser.add_argument( - "--control_only", - action="store_true", - help="Train only on control member of ensemble data " - "(default: False)", - ) parser.add_argument( "--loss", type=str, default="wmse", help="Loss function to use, see metric.py (default: wmse)", ) - parser.add_argument( - "--step_length", - type=int, - default=3, - help="Step length in hours to consider single time step 1-3 " - "(default: 3)", - ) parser.add_argument( "--lr", type=float, default=1e-3, help="learning rate (default: 0.001)" ) @@ -167,6 +149,13 @@ def main(input_args=None): help="Eval model on given data split (val/test) " "(default: None (train model))", ) + parser.add_argument( + "--ar_steps_eval", + type=int, + default=10, + help="Number of steps to unroll prediction for during evaluation " + "(default: 10)", + ) parser.add_argument( "--n_example_pred", type=int, @@ -184,9 +173,10 @@ def main(input_args=None): ) parser.add_argument( "--val_steps_to_log", - type=list, + nargs="+", + type=int, default=[1, 2, 3, 5, 10, 15, 19], - help="Steps to log val loss for (default: [1, 2, 3, 5, 10, 15, 19])", + help="Steps to log val loss for (default: 1 2 3 5 10 15 19)", ) parser.add_argument( "--metrics_watch", @@ -201,15 +191,28 @@ def main(input_args=None): help="""JSON string with variable-IDs and lead times to log watched metrics (e.g. '{"1": [1, 2], "3": [3, 4]}')""", ) + parser.add_argument( + "--num_past_forcing_steps", + type=int, + default=1, + help="Number of past time steps to use as input for forcing data", + ) + parser.add_argument( + "--num_future_forcing_steps", + type=int, + default=1, + help="Number of future time steps to use as input for forcing data", + ) args = parser.parse_args(input_args) args.var_leads_metrics_watch = { int(k): v for k, v in json.loads(args.var_leads_metrics_watch).items() } - config_loader = config.Config.from_file(args.data_config) # Asserts for arguments + assert ( + args.config_path is not None + ), "Specify your config with --config_path" assert args.model in MODELS, f"Unknown model: {args.model}" - assert args.step_length <= 3, "Too high step length" assert args.eval in ( None, "val", @@ -222,33 +225,19 @@ def main(input_args=None): # Set seed seed.seed_everything(args.seed) - # Load data - train_loader = torch.utils.data.DataLoader( - WeatherDataset( - config_loader.dataset.name, - pred_length=args.ar_steps, - split="train", - subsample_step=args.step_length, - subset=args.subset_ds, - control_only=args.control_only, - ), - args.batch_size, - shuffle=True, - num_workers=args.n_workers, - ) - max_pred_length = (65 // args.step_length) - 2 # 19 - val_loader = torch.utils.data.DataLoader( - WeatherDataset( - config_loader.dataset.name, - pred_length=max_pred_length, - split="val", - subsample_step=args.step_length, - subset=args.subset_ds, - control_only=args.control_only, - ), - args.batch_size, - shuffle=False, - num_workers=args.n_workers, + # Load neural-lam configuration and datastore to use + config, datastore = load_config_and_datastore(config_path=args.config_path) + + # Create datamodule + data_module = WeatherDataModule( + datastore=datastore, + ar_steps_train=args.ar_steps_train, + ar_steps_eval=args.ar_steps_eval, + standardize=True, + num_past_forcing_steps=args.num_past_forcing_steps, + num_future_forcing_steps=args.num_future_forcing_steps, + batch_size=args.batch_size, + num_workers=args.num_workers, ) # Instantiate model + trainer @@ -261,12 +250,13 @@ def main(input_args=None): device_name = "cpu" # Load model parameters Use new args for model - model_class = MODELS[args.model] - model = model_class(args) + ModelClass = MODELS[args.model] + model = ModelClass(args, config=config, datastore=datastore) - prefix = "subset-" if args.subset_ds else "" if args.eval: - prefix = prefix + f"eval-{args.eval}-" + prefix = f"eval-{args.eval}-" + else: + prefix = "train-" run_name = ( f"{prefix}{args.model}-{args.processor_layers}x{args.hidden_dim}-" f"{time.strftime('%m_%d_%H')}-{random_run_id:04d}" @@ -279,7 +269,9 @@ def main(input_args=None): save_last=True, ) logger = pl.loggers.WandbLogger( - project=args.wandb_project, name=run_name, config=args + project=args.wandb_project, + name=run_name, + config=dict(training=vars(args), datastore=datastore._config), ) trainer = pl.Trainer( max_epochs=args.epochs, @@ -296,36 +288,12 @@ def main(input_args=None): # Only init once, on rank 0 only if trainer.global_rank == 0: utils.init_wandb_metrics( - logger, args.val_steps_to_log + logger, val_steps=args.val_steps_to_log ) # Do after wandb.init - if args.eval: - if args.eval == "val": - eval_loader = val_loader - else: # Test - eval_loader = torch.utils.data.DataLoader( - WeatherDataset( - config_loader.dataset.name, - pred_length=max_pred_length, - split="test", - subsample_step=args.step_length, - subset=args.subset_ds, - ), - args.batch_size, - shuffle=False, - num_workers=args.n_workers, - ) - - print(f"Running evaluation on {args.eval}") - trainer.test(model=model, dataloaders=eval_loader, ckpt_path=args.load) + trainer.test(model=model, datamodule=data_module, ckpt_path=args.load) else: - # Train model - trainer.fit( - model=model, - train_dataloaders=train_loader, - val_dataloaders=val_loader, - ckpt_path=args.load, - ) + trainer.fit(model=model, datamodule=data_module, ckpt_path=args.load) if __name__ == "__main__": diff --git a/neural_lam/utils.py b/neural_lam/utils.py index c47c44ff..4a0752e4 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -3,90 +3,11 @@ import shutil # Third-party -import numpy as np import torch from torch import nn from tueplots import bundles, figsizes -def load_dataset_stats(dataset_name, device="cpu"): - """ - Load arrays with stored dataset statistics from pre-processing - """ - static_dir_path = os.path.join("data", dataset_name, "static") - - def loads_file(fn): - return torch.load( - os.path.join(static_dir_path, fn), - map_location=device, - weights_only=True, - ) - - data_mean = loads_file("parameter_mean.pt") # (d_features,) - data_std = loads_file("parameter_std.pt") # (d_features,) - - flux_stats = loads_file("flux_stats.pt") # (2,) - flux_mean, flux_std = flux_stats - - return { - "data_mean": data_mean, - "data_std": data_std, - "flux_mean": flux_mean, - "flux_std": flux_std, - } - - -def load_static_data(dataset_name, device="cpu"): - """ - Load static files related to dataset - """ - static_dir_path = os.path.join("data", dataset_name, "static") - - def loads_file(fn): - return torch.load( - os.path.join(static_dir_path, fn), - map_location=device, - weights_only=True, - ) - - # Load border mask, 1. if node is part of border, else 0. - border_mask_np = np.load(os.path.join(static_dir_path, "border_mask.npy")) - border_mask = ( - torch.tensor(border_mask_np, dtype=torch.float32, device=device) - .flatten(0, 1) - .unsqueeze(1) - ) # (N_grid, 1) - - grid_static_features = loads_file( - "grid_features.pt" - ) # (N_grid, d_grid_static) - - # Load step diff stats - step_diff_mean = loads_file("diff_mean.pt") # (d_f,) - step_diff_std = loads_file("diff_std.pt") # (d_f,) - - # Load parameter std for computing validation errors in original data scale - data_mean = loads_file("parameter_mean.pt") # (d_features,) - data_std = loads_file("parameter_std.pt") # (d_features,) - - # Load loss weighting vectors - param_weights = torch.tensor( - np.load(os.path.join(static_dir_path, "parameter_weights.npy")), - dtype=torch.float32, - device=device, - ) # (d_f,) - - return { - "border_mask": border_mask, - "grid_static_features": grid_static_features, - "step_diff_mean": step_diff_mean, - "step_diff_std": step_diff_std, - "data_mean": data_mean, - "data_std": data_std, - "param_weights": param_weights, - } - - class BufferList(nn.Module): """ A list of torch buffer tensors that sit together as a Module with no @@ -112,12 +33,50 @@ def __iter__(self): return (self[i] for i in range(len(self))) -def load_graph(graph_name, device="cpu"): - """ - Load all tensors representing the graph +def load_graph(graph_dir_path, device="cpu"): + """Load all tensors representing the graph from `graph_dir_path`. + + Needs the following files for all graphs: + - m2m_edge_index.pt + - g2m_edge_index.pt + - m2g_edge_index.pt + - m2m_features.pt + - g2m_features.pt + - m2g_features.pt + - mesh_features.pt + + And in addition for hierarchical graphs: + - mesh_up_edge_index.pt + - mesh_down_edge_index.pt + - mesh_up_features.pt + - mesh_down_features.pt + + Parameters + ---------- + graph_dir_path : str + Path to directory containing the graph files. + device : str + Device to load tensors to. + + Returns + ------- + hierarchical : bool + Whether the graph is hierarchical. + graph : dict + Dictionary containing the graph tensors, with keys as follows: + - g2m_edge_index + - m2g_edge_index + - m2m_edge_index + - mesh_up_edge_index + - mesh_down_edge_index + - g2m_features + - m2g_features + - m2m_features + - mesh_up_features + - mesh_down_features + - mesh_static_features + """ - # Define helper lambda function - graph_dir_path = os.path.join("graphs", graph_name) def loads_file(fn): return torch.load( @@ -137,7 +96,8 @@ def loads_file(fn): hierarchical = n_levels > 1 # Nor just single level mesh graph # Load static edge features - m2m_features = loads_file("m2m_features.pt") # List of (M_m2m[l], d_edge_f) + # List of (M_m2m[l], d_edge_f) + m2m_features = loads_file("m2m_features.pt") g2m_features = loads_file("g2m_features.pt") # (M_g2m, d_edge_f) m2g_features = loads_file("m2g_features.pt") # (M_m2g, d_edge_f) @@ -259,9 +219,9 @@ def fractional_plot_bundle(fraction): Get the tueplots bundle, but with figure width as a fraction of the page width. """ - # If latex is not available, some visualizations might not render correctly, - # but will at least not raise an error. - # Alternatively, use unicode raised numbers. + # If latex is not available, some visualizations might not render + # correctly, but will at least not raise an error. Alternatively, use + # unicode raised numbers. usetex = True if shutil.which("latex") else False bundle = bundles.neurips2023(usetex=usetex, family="serif") bundle.update(figsizes.neurips2023()) diff --git a/neural_lam/vis.py b/neural_lam/vis.py index 2f22bef1..b9d18b39 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -5,10 +5,11 @@ # Local from . import utils +from .datastore.base import BaseRegularGridDatastore @matplotlib.rc_context(utils.fractional_plot_bundle(1)) -def plot_error_map(errors, data_config, title=None, step_length=3): +def plot_error_map(errors, datastore: BaseRegularGridDatastore, title=None): """ Plot a heatmap of errors of different variables at different predictions horizons @@ -16,6 +17,7 @@ def plot_error_map(errors, data_config, title=None, step_length=3): """ errors_np = errors.T.cpu().numpy() # (d_f, pred_steps) d_f, pred_steps = errors_np.shape + step_length = datastore.step_length # Normalize all errors to [0,1] for color map max_errors = errors_np.max(axis=1) # d_f @@ -48,11 +50,10 @@ def plot_error_map(errors, data_config, title=None, step_length=3): ax.set_xlabel("Lead time (h)", size=label_size) ax.set_yticks(np.arange(d_f)) + var_names = datastore.get_vars_names(category="state") + var_units = datastore.get_vars_units(category="state") y_ticklabels = [ - f"{name} ({unit})" - for name, unit in zip( - data_config.dataset.var_names, data_config.dataset.var_units - ) + f"{name} ({unit})" for name, unit in zip(var_names, var_units) ] ax.set_yticklabels(y_ticklabels, rotation=30, size=label_size) @@ -64,11 +65,17 @@ def plot_error_map(errors, data_config, title=None, step_length=3): @matplotlib.rc_context(utils.fractional_plot_bundle(1)) def plot_prediction( - pred, target, obs_mask, data_config, title=None, vrange=None + pred, + target, + datastore: BaseRegularGridDatastore, + title=None, + vrange=None, ): """ Plot example prediction and grond truth. + Each has shape (N_grid,) + """ # Get common scale for values if vrange is None: @@ -77,8 +84,11 @@ def plot_prediction( else: vmin, vmax = vrange + extent = datastore.get_xy_extent("state") + # Set up masking of border region - mask_reshaped = obs_mask.reshape(*data_config.grid_shape_state) + da_mask = datastore.unstack_grid_coords(datastore.boundary_mask) + mask_reshaped = da_mask.values pixel_alpha = ( mask_reshaped.clamp(0.7, 1).cpu().numpy() ) # Faded border region @@ -87,16 +97,21 @@ def plot_prediction( 1, 2, figsize=(13, 7), - subplot_kw={"projection": data_config.coords_projection}, + subplot_kw={"projection": datastore.coords_projection}, ) # Plot pred and target for ax, data in zip(axes, (target, pred)): ax.coastlines() # Add coastline outlines - data_grid = data.reshape(*data_config.grid_shape_state).cpu().numpy() + data_grid = ( + data.reshape(list(datastore.grid_shape_state.values.values())) + .cpu() + .numpy() + ) im = ax.imshow( data_grid, origin="lower", + extent=extent, alpha=pixel_alpha, vmin=vmin, vmax=vmax, @@ -116,7 +131,9 @@ def plot_prediction( @matplotlib.rc_context(utils.fractional_plot_bundle(1)) -def plot_spatial_error(error, obs_mask, data_config, title=None, vrange=None): +def plot_spatial_error( + error, datastore: BaseRegularGridDatastore, title=None, vrange=None +): """ Plot errors over spatial map Error and obs_mask has shape (N_grid,) @@ -128,23 +145,31 @@ def plot_spatial_error(error, obs_mask, data_config, title=None, vrange=None): else: vmin, vmax = vrange + extent = datastore.get_xy_extent("state") + # Set up masking of border region - mask_reshaped = obs_mask.reshape(*data_config.grid_shape_state) + da_mask = datastore.unstack_grid_coords(datastore.boundary_mask) + mask_reshaped = da_mask.values pixel_alpha = ( mask_reshaped.clamp(0.7, 1).cpu().numpy() ) # Faded border region fig, ax = plt.subplots( figsize=(5, 4.8), - subplot_kw={"projection": data_config.coords_projection}, + subplot_kw={"projection": datastore.coords_projection}, ) ax.coastlines() # Add coastline outlines - error_grid = error.reshape(*data_config.grid_shape_state).cpu().numpy() + error_grid = ( + error.reshape(list(datastore.grid_shape_state.values.values())) + .cpu() + .numpy() + ) im = ax.imshow( error_grid, origin="lower", + extent=extent, alpha=pixel_alpha, vmin=vmin, vmax=vmax, diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 29977789..532e3c90 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -1,262 +1,695 @@ # Standard library -import datetime as dt -import glob -import os +import datetime +import warnings +from typing import Union # Third-party import numpy as np +import pytorch_lightning as pl import torch +import xarray as xr -# Local -from . import utils +# First-party +from neural_lam.datastore.base import BaseDatastore class WeatherDataset(torch.utils.data.Dataset): - """ - For our dataset: - N_t' = 65 - N_t = 65//subsample_step (= 21 for 3h steps) - dim_y = 268 - dim_x = 238 - N_grid = 268x238 = 63784 - d_features = 17 (d_features' = 18) - d_forcing = 5 + """Dataset class for weather data. + + This class loads and processes weather data from a given datastore. + + Parameters + ---------- + datastore : BaseDatastore + The datastore to load the data from (e.g. mdp). + split : str, optional + The data split to use ("train", "val" or "test"). Default is "train". + ar_steps : int, optional + The number of autoregressive steps. Default is 3. + num_past_forcing_steps: int, optional + Number of past time steps to include in forcing input. If set to i, + forcing from times t-i, t-i+1, ..., t-1, t (and potentially beyond, + given num_future_forcing_steps) are included as forcing inputs at time t + Default is 1. + num_future_forcing_steps: int, optional + Number of future time steps to include in forcing input. If set to j, + forcing from times t, t+1, ..., t+j-1, t+j (and potentially times before + t, given num_past_forcing_steps) are included as forcing inputs at time + t. Default is 1. + standardize : bool, optional + Whether to standardize the data. Default is True. """ def __init__( self, - dataset_name, - pred_length=19, + datastore: BaseDatastore, split="train", - subsample_step=3, + ar_steps=3, + num_past_forcing_steps=1, + num_future_forcing_steps=1, standardize=True, - subset=False, - control_only=False, ): super().__init__() - assert split in ("train", "val", "test"), "Unknown dataset split" - self.sample_dir_path = os.path.join( - "data", dataset_name, "samples", split - ) + self.split = split + self.ar_steps = ar_steps + self.datastore = datastore + self.num_past_forcing_steps = num_past_forcing_steps + self.num_future_forcing_steps = num_future_forcing_steps - member_file_regexp = ( - "nwp*mbr000.npy" if control_only else "nwp*mbr*.npy" + self.da_state = self.datastore.get_dataarray( + category="state", split=self.split ) - sample_paths = glob.glob( - os.path.join(self.sample_dir_path, member_file_regexp) + self.da_forcing = self.datastore.get_dataarray( + category="forcing", split=self.split ) - self.sample_names = [path.split("/")[-1][4:-4] for path in sample_paths] - # Now on form "yyymmddhh_mbrXXX" - if subset: - self.sample_names = self.sample_names[:50] # Limit to 50 samples + # check that with the provided data-arrays and ar_steps that we have a + # non-zero amount of samples + if self.__len__() <= 0: + raise ValueError( + "The provided datastore only provides " + f"{len(self.da_state.time)} total time steps, which is too few " + "to create a single sample for the WeatherDataset " + f"configuration used in the `{split}` split. You could try " + "either reducing the number of autoregressive steps " + "(`ar_steps`) and/or the forcing window size " + "(`num_past_forcing_steps` and `num_future_forcing_steps`)" + ) + + # Check the dimensions and their ordering + parts = dict(state=self.da_state) + if self.da_forcing is not None: + parts["forcing"] = self.da_forcing - self.sample_length = pred_length + 2 # 2 init states - self.subsample_step = subsample_step - self.original_sample_length = ( - 65 // self.subsample_step - ) # 21 for 3h steps - assert ( - self.sample_length <= self.original_sample_length - ), "Requesting too long time series samples" + for part, da in parts.items(): + expected_dim_order = self.datastore.expected_dim_order( + category=part + ) + if da.dims != expected_dim_order: + raise ValueError( + f"The dimension order of the `{part}` data ({da.dims}) " + f"does not match the expected dimension order " + f"({expected_dim_order}). Maybe you forgot to transpose " + "the data in `BaseDatastore.get_dataarray`?" + ) # Set up for standardization + # TODO: This will become part of ar_model.py soon! self.standardize = standardize if standardize: - ds_stats = utils.load_dataset_stats(dataset_name, "cpu") - self.data_mean, self.data_std, self.flux_mean, self.flux_std = ( - ds_stats["data_mean"], - ds_stats["data_std"], - ds_stats["flux_mean"], - ds_stats["flux_std"], + self.ds_state_stats = self.datastore.get_standardization_dataarray( + category="state" ) - # If subsample index should be sampled (only duing training) - self.random_subsample = split == "train" + self.da_state_mean = self.ds_state_stats.state_mean + self.da_state_std = self.ds_state_stats.state_std + + if self.da_forcing is not None: + self.ds_forcing_stats = ( + self.datastore.get_standardization_dataarray( + category="forcing" + ) + ) + self.da_forcing_mean = self.ds_forcing_stats.forcing_mean + self.da_forcing_std = self.ds_forcing_stats.forcing_std def __len__(self): - return len(self.sample_names) + if self.datastore.is_forecast: + # for now we simply create a single sample for each analysis time + # and then take the first (2 + ar_steps) forecast times. In + # addition we only use the first ensemble member (if ensemble data + # has been provided). + # This means that for each analysis time we get a single sample - def __getitem__(self, idx): - # === Sample === - sample_name = self.sample_names[idx] - sample_path = os.path.join( - self.sample_dir_path, f"nwp_{sample_name}.npy" + if self.datastore.is_ensemble: + warnings.warn( + "only using first ensemble member, so dataset size is " + " effectively reduced by the number of ensemble members " + f"({self.da_state.ensemble_member.size})", + UserWarning, + ) + + # check that there are enough forecast steps available to create + # samples given the number of autoregressive steps requested + n_forecast_steps = self.da_state.elapsed_forecast_duration.size + if n_forecast_steps < 2 + self.ar_steps: + raise ValueError( + "The number of forecast steps available " + f"({n_forecast_steps}) is less than the required " + f"2+ar_steps (2+{self.ar_steps}={2 + self.ar_steps}) for " + "creating a sample with initial and target states." + ) + + return self.da_state.analysis_time.size + else: + # Calculate the number of samples in the dataset n_samples = total + # time steps - (autoregressive steps + past forcing + future + # forcing) + #: + # Where: + # - total time steps: len(self.da_state.time) + # - autoregressive steps: self.ar_steps + # - past forcing: max(2, self.num_past_forcing_steps) (at least 2 + # time steps are required for the initial state) + # - future forcing: self.num_future_forcing_steps + return ( + len(self.da_state.time) + - self.ar_steps + - max(2, self.num_past_forcing_steps) + - self.num_future_forcing_steps + ) + + def _slice_state_time(self, da_state, idx, n_steps: int): + """ + Produce a time slice of the given dataarray `da_state` (state) starting + at `idx` and with `n_steps` steps. An `offset`is calculated based on the + `num_past_forcing_steps` class attribute. `Offset` is used to offset the + start of the sample, to assert that enough previous time steps are + available for the 2 initial states and any corresponding forcings + (calculated in `_slice_forcing_time`). + + Parameters + ---------- + da_state : xr.DataArray + The dataarray to slice. This is expected to have a `time` dimension + if the datastore is providing analysis only data, and a + `analysis_time` and `elapsed_forecast_duration` dimensions if the + datastore is providing forecast data. + idx : int + The index of the time step to start the sample from. + n_steps : int + The number of time steps to include in the sample. + + Returns + ------- + da_sliced : xr.DataArray + The sliced dataarray with dims ('time', 'grid_index', + 'state_feature'). + """ + # The current implementation requires at least 2 time steps for the + # initial state (see GraphCast). + init_steps = 2 + # slice the dataarray to include the required number of time steps + if self.datastore.is_forecast: + start_idx = max(0, self.num_past_forcing_steps - init_steps) + end_idx = max(init_steps, self.num_past_forcing_steps) + n_steps + # this implies that the data will have both `analysis_time` and + # `elapsed_forecast_duration` dimensions for forecasts. We for now + # simply select a analysis time and the first `n_steps` forecast + # times (given no offset). Note that this means that we get one + # sample per forecast, always starting at forecast time 2. + da_sliced = da_state.isel( + analysis_time=idx, + elapsed_forecast_duration=slice(start_idx, end_idx), + ) + # create a new time dimension so that the produced sample has a + # `time` dimension, similarly to the analysis only data + da_sliced["time"] = ( + da_sliced.analysis_time + da_sliced.elapsed_forecast_duration + ) + da_sliced = da_sliced.swap_dims( + {"elapsed_forecast_duration": "time"} + ) + else: + # For analysis data we slice the time dimension directly. The offset + # is only relevant for the very first (and last) samples in the + # dataset. + start_idx = idx + max(0, self.num_past_forcing_steps - init_steps) + end_idx = ( + idx + max(init_steps, self.num_past_forcing_steps) + n_steps + ) + da_sliced = da_state.isel(time=slice(start_idx, end_idx)) + return da_sliced + + def _slice_forcing_time(self, da_forcing, idx, n_steps: int): + """ + Produce a time slice of the given dataarray `da_forcing` (forcing) + starting at `idx` and with `n_steps` steps. An `offset` is calculated + based on the `num_past_forcing_steps` class attribute. It is used to + offset the start of the sample, to ensure that enough previous time + steps are available for the forcing data. The forcing data is windowed + around the current autoregressive time step to include the past and + future forcings. + + Parameters + ---------- + da_forcing : xr.DataArray + The forcing dataarray to slice. This is expected to have a `time` + dimension if the datastore is providing analysis only data, and a + `analysis_time` and `elapsed_forecast_duration` dimensions if the + datastore is providing forecast data. + idx : int + The index of the time step to start the sample from. + n_steps : int + The number of time steps to include in the sample. + + Returns + ------- + da_concat : xr.DataArray + The sliced dataarray with dims ('time', 'grid_index', + 'window', 'forcing_feature'). + """ + # The current implementation requires at least 2 time steps for the + # initial state (see GraphCast). The forcing data is windowed around the + # current autregressive time step. The two `init_steps` can also be used + # as past forcings. + init_steps = 2 + da_list = [] + + if self.datastore.is_forecast: + # This implies that the data will have both `analysis_time` and + # `elapsed_forecast_duration` dimensions for forecasts. We for now + # simply select an analysis time and the first `n_steps` forecast + # times (given no offset). Note that this means that we get one + # sample per forecast. + # Add a 'time' dimension using the actual forecast times + offset = max(init_steps, self.num_past_forcing_steps) + for step in range(n_steps): + start_idx = offset + step - self.num_past_forcing_steps + end_idx = offset + step + self.num_future_forcing_steps + + current_time = ( + da_forcing.analysis_time[idx] + + da_forcing.elapsed_forecast_duration[offset + step] + ) + + da_sliced = da_forcing.isel( + analysis_time=idx, + elapsed_forecast_duration=slice(start_idx, end_idx + 1), + ) + + da_sliced = da_sliced.rename( + {"elapsed_forecast_duration": "window"} + ) + + # Assign the 'window' coordinate to be relative positions + da_sliced = da_sliced.assign_coords( + window=np.arange(len(da_sliced.window)) + ) + + da_sliced = da_sliced.expand_dims( + dim={"time": [current_time.values]} + ) + + da_list.append(da_sliced) + + # Concatenate the list of DataArrays along the 'time' dimension + da_concat = xr.concat(da_list, dim="time") + + else: + # For analysis data, we slice the time dimension directly. The + # offset is only relevant for the very first (and last) samples in + # the dataset. + offset = idx + max(init_steps, self.num_past_forcing_steps) + for step in range(n_steps): + start_idx = offset + step - self.num_past_forcing_steps + end_idx = offset + step + self.num_future_forcing_steps + + # Slice the data over the desired time window + da_sliced = da_forcing.isel(time=slice(start_idx, end_idx + 1)) + + da_sliced = da_sliced.rename({"time": "window"}) + + # Assign the 'window' coordinate to be relative positions + da_sliced = da_sliced.assign_coords( + window=np.arange(len(da_sliced.window)) + ) + + # Add a 'time' dimension to keep track of steps using actual + # time coordinates + current_time = da_forcing.time[offset + step] + da_sliced = da_sliced.expand_dims( + dim={"time": [current_time.values]} + ) + + da_list.append(da_sliced) + + # Concatenate the list of DataArrays along the 'time' dimension + da_concat = xr.concat(da_list, dim="time") + + return da_concat + + def _build_item_dataarrays(self, idx): + """ + Create the dataarrays for the initial states, target states and forcing + data for the sample at index `idx`. + + Parameters + ---------- + idx : int + The index of the sample to create the dataarrays for. + + Returns + ------- + da_init_states : xr.DataArray + The dataarray for the initial states. + da_target_states : xr.DataArray + The dataarray for the target states. + da_forcing_windowed : xr.DataArray + The dataarray for the forcing data, windowed for the sample. + da_target_times : xr.DataArray + The dataarray for the target times. + """ + # handling ensemble data + if self.datastore.is_ensemble: + # for the now the strategy is to only include the first ensemble + # member + # XXX: this could be changed to include all ensemble members by + # splitting `idx` into two parts, one for the analysis time and one + # for the ensemble member and then increasing self.__len__ to + # include all ensemble members + warnings.warn( + "only use of ensemble member 0 (the first member) is " + "implemented for ensemble data" + ) + i_ensemble = 0 + da_state = self.da_state.isel(ensemble_member=i_ensemble) + else: + da_state = self.da_state + + if self.da_forcing is not None: + if "ensemble_member" in self.da_forcing.dims: + raise NotImplementedError( + "Ensemble member not yet supported for forcing data" + ) + da_forcing = self.da_forcing + else: + da_forcing = None + + # handle time sampling in a way that is compatible with both analysis + # and forecast data + da_state = self._slice_state_time( + da_state=da_state, idx=idx, n_steps=self.ar_steps ) - try: - full_sample = torch.tensor( - np.load(sample_path), dtype=torch.float32 - ) # (N_t', dim_y, dim_x, d_features') - except ValueError: - print(f"Failed to load {sample_path}") - - # Only use every ss_step:th time step, sample which of ss_step - # possible such time series - if self.random_subsample: - subsample_index = torch.randint(0, self.subsample_step, ()).item() + if da_forcing is not None: + da_forcing_windowed = self._slice_forcing_time( + da_forcing=da_forcing, idx=idx, n_steps=self.ar_steps + ) + + # load the data into memory + da_state.load() + if da_forcing is not None: + da_forcing_windowed.load() + + da_init_states = da_state.isel(time=slice(0, 2)) + da_target_states = da_state.isel(time=slice(2, None)) + da_target_times = da_target_states.time + + if self.standardize: + da_init_states = ( + da_init_states - self.da_state_mean + ) / self.da_state_std + da_target_states = ( + da_target_states - self.da_state_mean + ) / self.da_state_std + + if da_forcing is not None: + # XXX: Here we implicitly assume that the last dimension of the + # forcing data is the forcing feature dimension. To standardize + # on `.device` we need a different implementation. (e.g. a + # tensor with repeated means and stds for each "windowed" time.) + da_forcing_windowed = ( + da_forcing_windowed - self.da_forcing_mean + ) / self.da_forcing_std + + if da_forcing is not None: + # stack the `forcing_feature` and `window_sample` dimensions into a + # single `forcing_feature` dimension + da_forcing_windowed = da_forcing_windowed.stack( + forcing_feature_windowed=("forcing_feature", "window") + ) else: - subsample_index = 0 - subsample_end_index = self.original_sample_length * self.subsample_step - sample = full_sample[ - subsample_index : subsample_end_index : self.subsample_step - ] - # (N_t, dim_y, dim_x, d_features') - - # Remove feature 15, "z_height_above_ground" - sample = torch.cat( - (sample[:, :, :, :15], sample[:, :, :, 16:]), dim=3 - ) # (N_t, dim_y, dim_x, d_features) - - # Accumulate solar radiation instead of just subsampling - rad_features = full_sample[:, :, :, 2:4] # (N_t', dim_y, dim_x, 2) - # Accumulate for first time step - init_accum_rad = torch.sum( - rad_features[: (subsample_index + 1)], dim=0, keepdim=True - ) # (1, dim_y, dim_x, 2) - # Accumulate for rest of subsampled sequence - in_subsample_len = ( - subsample_end_index - self.subsample_step + subsample_index + 1 + # create an empty forcing tensor with the right shape + da_forcing_windowed = xr.DataArray( + data=np.empty( + (self.ar_steps, da_state.grid_index.size, 0), + ), + dims=("time", "grid_index", "forcing_feature"), + coords={ + "time": da_target_times, + "grid_index": da_state.grid_index, + "forcing_feature": [], + }, + ) + + return ( + da_init_states, + da_target_states, + da_forcing_windowed, + da_target_times, ) - rad_features_in_subsample = rad_features[ - (subsample_index + 1) : in_subsample_len - ] # (N_t*, dim_y, dim_x, 2), N_t* = (N_t-1)*ss_step - _, dim_y, dim_x, _ = sample.shape - rest_accum_rad = torch.sum( - rad_features_in_subsample.view( - self.original_sample_length - 1, - self.subsample_step, - dim_y, - dim_x, - 2, - ), - dim=1, - ) # (N_t-1, dim_y, dim_x, 2) - accum_rad = torch.cat( - (init_accum_rad, rest_accum_rad), dim=0 - ) # (N_t, dim_y, dim_x, 2) - # Replace in sample - sample[:, :, :, 2:4] = accum_rad - - # Flatten spatial dim - sample = sample.flatten(1, 2) # (N_t, N_grid, d_features) - - # Uniformly sample time id to start sample from - init_id = torch.randint( - 0, 1 + self.original_sample_length - self.sample_length, () + + def __getitem__(self, idx): + """ + Return a single training sample, which consists of the initial states, + target states, forcing and batch times. + + The implementation currently uses xarray.DataArray objects for the + standardization (scaling to mean 0.0 and standard deviation of 1.0) so + that we can make us of xarray's broadcasting capabilities. This makes + it possible to standardization with both global means, but also for + example where a grid-point mean has been computed. This code will have + to be replace if standardization is to be done on the GPU to handle + different shapes of the standardization. + + Parameters + ---------- + idx : int + The index of the sample to return, this will refer to the time of + the initial state. + + Returns + ------- + init_states : TrainingSample + A training sample object containing the initial states, target + states, forcing and batch times. The batch times are the times of + the target steps. + + """ + ( + da_init_states, + da_target_states, + da_forcing_windowed, + da_target_times, + ) = self._build_item_dataarrays(idx=idx) + + tensor_dtype = torch.float32 + + init_states = torch.tensor(da_init_states.values, dtype=tensor_dtype) + target_states = torch.tensor( + da_target_states.values, dtype=tensor_dtype ) - sample = sample[init_id : (init_id + self.sample_length)] - # (sample_length, N_grid, d_features) - if self.standardize: - # Standardize sample - sample = (sample - self.data_mean) / self.data_std - - # Split up sample in init. states and target states - init_states = sample[:2] # (2, N_grid, d_features) - target_states = sample[2:] # (sample_length-2, N_grid, d_features) - - # === Forcing features === - # Now batch-static features are just part of forcing, - # repeated over temporal dimension - # Load water coverage - sample_datetime = sample_name[:10] - water_path = os.path.join( - self.sample_dir_path, f"wtr_{sample_datetime}.npy" + target_times = torch.tensor( + da_target_times.astype("datetime64[ns]").astype("int64").values, + dtype=torch.int64, ) - water_cover_features = torch.tensor( - np.load(water_path), dtype=torch.float32 - ).unsqueeze( - -1 - ) # (dim_y, dim_x, 1) - # Flatten - water_cover_features = water_cover_features.flatten(0, 1) # (N_grid, 1) - # Expand over temporal dimension - water_cover_expanded = water_cover_features.unsqueeze(0).expand( - self.sample_length - 2, -1, -1 # -2 as added on after windowing - ) # (sample_len, N_grid, 1) - - # TOA flux - flux_path = os.path.join( - self.sample_dir_path, - f"nwp_toa_downwelling_shortwave_flux_{sample_datetime}.npy", + + forcing = torch.tensor(da_forcing_windowed.values, dtype=tensor_dtype) + + # init_states: (2, N_grid, d_features) + # target_states: (ar_steps, N_grid, d_features) + # forcing: (ar_steps, N_grid, d_windowed_forcing) + # target_times: (ar_steps,) + + return init_states, target_states, forcing, target_times + + def __iter__(self): + """ + Convenience method to iterate over the dataset. + + This isn't used by pytorch DataLoader which itself implements an + iterator that uses Dataset.__getitem__ and Dataset.__len__. + + """ + for i in range(len(self)): + yield self[i] + + def create_dataarray_from_tensor( + self, + tensor: torch.Tensor, + time: Union[datetime.datetime, list[datetime.datetime]], + category: str, + ): + """ + Construct a xarray.DataArray from a `pytorch.Tensor` with coordinates + for `grid_index`, `time` and `{category}_feature` matching the shape + and number of times provided and add the x/y coordinates from the + datastore. + + The number if times provided is expected to match the shape of the + tensor. For a 2D tensor, the dimensions are assumed to be (grid_index, + {category}_feature) and only a single time should be provided. For a 3D + tensor, the dimensions are assumed to be (time, grid_index, + {category}_feature) and a list of times should be provided. + + Parameters + ---------- + tensor : torch.Tensor + The tensor to construct the DataArray from, this assumed to have + the same dimension ordering as returned by the __getitem__ method + (i.e. time, grid_index, {category}_feature). + time : datetime.datetime or list[datetime.datetime] + The time or times of the tensor. + category : str + The category of the tensor, either "state", "forcing" or "static". + + Returns + ------- + da : xr.DataArray + The constructed DataArray. + """ + + def _is_listlike(obj): + # match list, tuple, numpy array + return hasattr(obj, "__iter__") and not isinstance(obj, str) + + add_time_as_dim = False + if len(tensor.shape) == 2: + dims = ["grid_index", f"{category}_feature"] + if _is_listlike(time): + raise ValueError( + "Expected a single time for a 2D tensor with assumed " + "dimensions (grid_index, {category}_feature), but got " + f"{len(time)} times" + ) + elif len(tensor.shape) == 3: + add_time_as_dim = True + dims = ["time", "grid_index", f"{category}_feature"] + if not _is_listlike(time): + raise ValueError( + "Expected a list of times for a 3D tensor with assumed " + "dimensions (time, grid_index, {category}_feature), but " + "got a single time" + ) + else: + raise ValueError( + "Expected tensor to have 2 or 3 dimensions, but got " + f"{len(tensor.shape)}" + ) + + da_datastore_state = getattr(self, f"da_{category}") + da_grid_index = da_datastore_state.grid_index + da_state_feature = da_datastore_state.state_feature + + coords = { + f"{category}_feature": da_state_feature, + "grid_index": da_grid_index, + } + if add_time_as_dim: + coords["time"] = time + + da = xr.DataArray( + tensor.numpy(), + dims=dims, + coords=coords, ) - flux = torch.tensor(np.load(flux_path), dtype=torch.float32).unsqueeze( - -1 - ) # (N_t', dim_y, dim_x, 1) - if self.standardize: - flux = (flux - self.flux_mean) / self.flux_std - - # Flatten and subsample flux forcing - flux = flux.flatten(1, 2) # (N_t, N_grid, 1) - flux = flux[subsample_index :: self.subsample_step] # (N_t, N_grid, 1) - flux = flux[ - init_id : (init_id + self.sample_length) - ] # (sample_len, N_grid, 1) - - # Time of day and year - dt_obj = dt.datetime.strptime(sample_datetime, "%Y%m%d%H") - dt_obj = dt_obj + dt.timedelta( - hours=2 + subsample_index - ) # Offset for first index - # Extract for initial step - init_hour_in_day = dt_obj.hour - start_of_year = dt.datetime(dt_obj.year, 1, 1) - init_seconds_into_year = (dt_obj - start_of_year).total_seconds() - - # Add increments for all steps - hour_inc = ( - torch.arange(self.sample_length) * self.subsample_step - ) # (sample_len,) - hour_of_day = ( - init_hour_in_day + hour_inc - ) # (sample_len,), Can be > 24 but ok - second_into_year = ( - init_seconds_into_year + hour_inc * 3600 - ) # (sample_len,) - # can roll over to next year, ok because periodicity - - # Encode as sin/cos - # ! Make this more flexible in a separate create_forcings.py script - seconds_in_year = 365 * 24 * 3600 - hour_angle = (hour_of_day / 12) * torch.pi # (sample_len,) - year_angle = ( - (second_into_year / seconds_in_year) * 2 * torch.pi - ) # (sample_len,) - datetime_forcing = torch.stack( - ( - torch.sin(hour_angle), - torch.cos(hour_angle), - torch.sin(year_angle), - torch.cos(year_angle), - ), - dim=1, - ) # (N_t, 4) - datetime_forcing = (datetime_forcing + 1) / 2 # Rescale to [0,1] - datetime_forcing = datetime_forcing.unsqueeze(1).expand( - -1, flux.shape[1], -1 - ) # (sample_len, N_grid, 4) - - # Put forcing features together - forcing_features = torch.cat( - (flux, datetime_forcing), dim=-1 - ) # (sample_len, N_grid, d_forcing) - - # Combine forcing over each window of 3 time steps - forcing_windowed = torch.cat( - ( - forcing_features[:-2], - forcing_features[1:-1], - forcing_features[2:], - ), - dim=2, - ) # (sample_len-2, N_grid, 3*d_forcing) - # Now index 0 of ^ corresponds to forcing at index 0-2 of sample - - # batch-static water cover is added after windowing, - # as it is static over time - forcing = torch.cat((water_cover_expanded, forcing_windowed), dim=2) - # (sample_len-2, N_grid, forcing_dim) - - return init_states, target_states, forcing + for grid_coord in ["x", "y"]: + if ( + grid_coord in da_datastore_state.coords + and grid_coord not in da.coords + ): + da.coords[grid_coord] = da_datastore_state[grid_coord] + + if not add_time_as_dim: + da.coords["time"] = time + + return da + + +class WeatherDataModule(pl.LightningDataModule): + """DataModule for weather data.""" + + def __init__( + self, + datastore: BaseDatastore, + ar_steps_train=3, + ar_steps_eval=25, + standardize=True, + num_past_forcing_steps=1, + num_future_forcing_steps=1, + batch_size=4, + num_workers=16, + ): + super().__init__() + self._datastore = datastore + self.num_past_forcing_steps = num_past_forcing_steps + self.num_future_forcing_steps = num_future_forcing_steps + self.ar_steps_train = ar_steps_train + self.ar_steps_eval = ar_steps_eval + self.standardize = standardize + self.batch_size = batch_size + self.num_workers = num_workers + self.train_dataset = None + self.val_dataset = None + self.test_dataset = None + if num_workers > 0: + # default to spawn for now, as the default on linux "fork" hangs + # when using dask (which the npyfilesmeps datastore uses) + self.multiprocessing_context = "spawn" + else: + self.multiprocessing_context = None + + def setup(self, stage=None): + if stage == "fit" or stage is None: + self.train_dataset = WeatherDataset( + datastore=self._datastore, + split="train", + ar_steps=self.ar_steps_train, + standardize=self.standardize, + num_past_forcing_steps=self.num_past_forcing_steps, + num_future_forcing_steps=self.num_future_forcing_steps, + ) + self.val_dataset = WeatherDataset( + datastore=self._datastore, + split="val", + ar_steps=self.ar_steps_eval, + standardize=self.standardize, + num_past_forcing_steps=self.num_past_forcing_steps, + num_future_forcing_steps=self.num_future_forcing_steps, + ) + + if stage == "test" or stage is None: + self.test_dataset = WeatherDataset( + datastore=self._datastore, + split="test", + ar_steps=self.ar_steps_eval, + standardize=self.standardize, + num_past_forcing_steps=self.num_past_forcing_steps, + num_future_forcing_steps=self.num_future_forcing_steps, + ) + + def train_dataloader(self): + """Load train dataset.""" + return torch.utils.data.DataLoader( + self.train_dataset, + batch_size=self.batch_size, + num_workers=self.num_workers, + shuffle=True, + multiprocessing_context=self.multiprocessing_context, + persistent_workers=True, + ) + + def val_dataloader(self): + """Load validation dataset.""" + return torch.utils.data.DataLoader( + self.val_dataset, + batch_size=self.batch_size, + num_workers=self.num_workers, + shuffle=False, + multiprocessing_context=self.multiprocessing_context, + persistent_workers=True, + ) + + def test_dataloader(self): + """Load test dataset.""" + return torch.utils.data.DataLoader( + self.test_dataset, + batch_size=self.batch_size, + num_workers=self.num_workers, + shuffle=False, + multiprocessing_context=self.multiprocessing_context, + persistent_workers=True, + ) diff --git a/pyproject.toml b/pyproject.toml index 14b7e69a..f0bc0851 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,9 +3,9 @@ name = "neural-lam" version = "0.2.0" description = "LAM-based data-driven forecasting" authors = [ - {name = "Joel Oskarsson", email = "joel.oskarsson@liu.se"}, - {name = "Simon Adamov", email = "Simon.Adamov@meteoswiss.ch"}, - {name = "Leif Denby", email = "lcd@dmi.dk"}, + { name = "Joel Oskarsson", email = "joel.oskarsson@liu.se" }, + { name = "Simon Adamov", email = "Simon.Adamov@meteoswiss.ch" }, + { name = "Leif Denby", email = "lcd@dmi.dk" }, ] # PEP 621 project metadata @@ -24,15 +24,15 @@ dependencies = [ "plotly>=5.15.0", "torch>=2.3.0", "torch-geometric==2.3.1", + "parse>=1.20.2", + "dataclass-wizard>=0.22.3", + "mllam-data-prep>=0.5.0", ] requires-python = ">=3.9" [project.optional-dependencies] -dev = [ - "pre-commit>=3.8.0", - "pytest>=8.3.2", - "pooch>=1.8.2", -] +dev = ["pre-commit>=3.8.0", "pytest>=8.3.2", "pooch>=1.8.2"] + [tool.setuptools] py-modules = ["neural_lam"] @@ -59,6 +59,7 @@ known_first_party = [ # Add first-party modules that may be misclassified by isort "neural_lam", ] +line_length = 80 [tool.flake8] max-line-length = 80 @@ -80,12 +81,9 @@ ignore = [ "create_mesh.py", # Disable linting for now, as major rework is planned/expected ] # Temporary fix for import neural_lam statements until set up as proper package -init-hook='import sys; sys.path.append(".")' +init-hook = 'import sys; sys.path.append(".")' [tool.pylint.TYPECHECK] -generated-members = [ - "numpy.*", - "torch.*", -] +generated-members = ["numpy.*", "torch.*"] [tool.pylint.'MESSAGES CONTROL'] disable = [ "C0114", # 'missing-module-docstring', Do not require module docstrings @@ -96,11 +94,11 @@ disable = [ "W0223", # 'abstract-method', Subclasses do not have to override all abstract methods ] [tool.pylint.DESIGN] -max-statements=100 # Allow for some more involved functions +max-statements = 100 # Allow for some more involved functions [tool.pylint.IMPORTS] -allow-any-import-level="neural_lam" +allow-any-import-level = "neural_lam" [tool.pylint.SIMILARITIES] -min-similarity-lines=10 +min-similarity-lines = 10 [tool.pdm] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..6f579621 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,106 @@ +# Standard library +import os +from pathlib import Path + +# Third-party +import pooch +import yaml + +# First-party +from neural_lam.datastore import DATASTORES, init_datastore +from neural_lam.datastore.npyfilesmeps import ( + compute_standardization_stats as compute_standardization_stats_meps, +) + +# Local +from .dummy_datastore import DummyDatastore + +# Disable weights and biases to avoid unnecessary logging +# and to avoid having to deal with authentication +os.environ["WANDB_DISABLED"] = "true" + +DATASTORE_EXAMPLES_ROOT_PATH = Path("tests/datastore_examples") + +# Initializing variables for the s3 client +S3_BUCKET_NAME = "mllam-testdata" +S3_ENDPOINT_URL = "https://object-store.os-api.cci1.ecmwf.int" +S3_FILE_PATH = "neural-lam/npy/meps_example_reduced.v0.2.0.zip" +S3_FULL_PATH = "/".join([S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_FILE_PATH]) +TEST_DATA_KNOWN_HASH = ( + "7ff2e07e04cfcd77631115f800c9d49188bb2a7c2a2777da3cea219f926d0c86" +) + + +def download_meps_example_reduced_dataset(): + # Download and unzip test data into data/meps_example_reduced + root_path = DATASTORE_EXAMPLES_ROOT_PATH / "npyfilesmeps" + dataset_path = root_path / "meps_example_reduced" + + pooch.retrieve( + url=S3_FULL_PATH, + known_hash=TEST_DATA_KNOWN_HASH, + processor=pooch.Unzip(extract_dir=""), + path=root_path, + fname="meps_example_reduced.zip", + ) + + config_path = dataset_path / "meps_example_reduced.datastore.yaml" + + with open(config_path, "r") as f: + config = yaml.safe_load(f) + + if "class" in config["projection"]: + # XXX: should update the dataset stored on S3 with the change below + # + # rename the `projection.class` key to `projection.class_name` in the + # config this is because the `class` key is reserved for the class + # attribute of the object and so we can't use it to define a python + # dataclass + config["projection"]["class_name"] = config["projection"].pop("class") + + with open(config_path, "w") as f: + yaml.dump(config, f) + + # create parameters, only run if the files we expect are not present + expected_parameter_files = [ + "parameter_mean.pt", + "parameter_std.pt", + "diff_mean.pt", + "diff_std.pt", + ] + expected_parameter_filepaths = [ + dataset_path / "static" / fn for fn in expected_parameter_files + ] + if any(not p.exists() for p in expected_parameter_filepaths): + compute_standardization_stats_meps.main( + datastore_config_path=config_path, + batch_size=8, + step_length=3, + n_workers=0, + distributed=False, + ) + + return config_path + + +DATASTORES_EXAMPLES = dict( + mdp=( + DATASTORE_EXAMPLES_ROOT_PATH + / "mdp" + / "danra_100m_winds" + / "danra.datastore.yaml" + ), + npyfilesmeps=download_meps_example_reduced_dataset(), + dummydata=None, +) + +DATASTORES[DummyDatastore.SHORT_NAME] = DummyDatastore + + +def init_datastore_example(datastore_kind): + datastore = init_datastore( + datastore_kind=datastore_kind, + config_path=DATASTORES_EXAMPLES[datastore_kind], + ) + + return datastore diff --git a/tests/datastore_examples/.gitignore b/tests/datastore_examples/.gitignore new file mode 100644 index 00000000..e84e6493 --- /dev/null +++ b/tests/datastore_examples/.gitignore @@ -0,0 +1,2 @@ +npyfilesmeps/*.zip +npyfilesmeps/meps_example_reduced/ diff --git a/tests/datastore_examples/mdp/danra_100m_winds/.gitignore b/tests/datastore_examples/mdp/danra_100m_winds/.gitignore new file mode 100644 index 00000000..f2828f46 --- /dev/null +++ b/tests/datastore_examples/mdp/danra_100m_winds/.gitignore @@ -0,0 +1,2 @@ +*.zarr/ +graph/ diff --git a/tests/datastore_examples/mdp/danra_100m_winds/config.yaml b/tests/datastore_examples/mdp/danra_100m_winds/config.yaml new file mode 100644 index 00000000..0bb5c5ec --- /dev/null +++ b/tests/datastore_examples/mdp/danra_100m_winds/config.yaml @@ -0,0 +1,9 @@ +datastore: + kind: mdp + config_path: danra.datastore.yaml +training: + state_feature_weighting: + __config_class__: ManualStateFeatureWeighting + weights: + u100m: 1.0 + v100m: 1.0 diff --git a/tests/datastore_examples/mdp/danra_100m_winds/danra.datastore.yaml b/tests/datastore_examples/mdp/danra_100m_winds/danra.datastore.yaml new file mode 100644 index 00000000..3edf1267 --- /dev/null +++ b/tests/datastore_examples/mdp/danra_100m_winds/danra.datastore.yaml @@ -0,0 +1,99 @@ +schema_version: v0.5.0 +dataset_version: v0.1.0 + +output: + variables: + static: [grid_index, static_feature] + state: [time, grid_index, state_feature] + forcing: [time, grid_index, forcing_feature] + coord_ranges: + time: + start: 1990-09-03T00:00 + end: 1990-09-09T00:00 + step: PT3H + chunking: + time: 1 + splitting: + dim: time + splits: + train: + start: 1990-09-03T00:00 + end: 1990-09-06T00:00 + compute_statistics: + ops: [mean, std, diff_mean, diff_std] + dims: [grid_index, time] + val: + start: 1990-09-06T00:00 + end: 1990-09-07T00:00 + test: + start: 1990-09-07T00:00 + end: 1990-09-09T00:00 + +inputs: + danra_height_levels: + path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/height_levels.zarr + dims: [time, x, y, altitude] + variables: + u: + altitude: + values: [100,] + units: m + v: + altitude: + values: [100, ] + units: m + dim_mapping: + time: + method: rename + dim: time + state_feature: + method: stack_variables_by_var_name + dims: [altitude] + name_format: "{var_name}{altitude}m" + grid_index: + method: stack + dims: [x, y] + target_output_variable: state + + danra_surface: + path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/single_levels.zarr + dims: [time, x, y] + variables: + # use surface incoming shortwave radiation as forcing + - swavr0m + dim_mapping: + time: + method: rename + dim: time + grid_index: + method: stack + dims: [x, y] + forcing_feature: + method: stack_variables_by_var_name + name_format: "{var_name}" + target_output_variable: forcing + + danra_lsm: + path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/lsm.zarr + dims: [x, y] + variables: + - lsm + dim_mapping: + grid_index: + method: stack + dims: [x, y] + static_feature: + method: stack_variables_by_var_name + name_format: "{var_name}" + target_output_variable: static + +extra: + projection: + class_name: LambertConformal + kwargs: + central_longitude: 25.0 + central_latitude: 56.7 + standard_parallels: [56.7, 56.7] + globe: + semimajor_axis: 6367470.0 + semiminor_axis: 6367470.0 diff --git a/tests/dummy_datastore.py b/tests/dummy_datastore.py new file mode 100644 index 00000000..9075d404 --- /dev/null +++ b/tests/dummy_datastore.py @@ -0,0 +1,449 @@ +# Standard library +import datetime +import tempfile +from functools import cached_property +from pathlib import Path +from typing import List, Union + +# Third-party +import isodate +import numpy as np +import xarray as xr +from cartopy import crs as ccrs +from numpy import ndarray + +# First-party +from neural_lam.datastore.base import ( + BaseRegularGridDatastore, + CartesianGridShape, +) + + +class DummyDatastore(BaseRegularGridDatastore): + """ + Datastore that creates some dummy data for testing purposes. The data + consists of state, forcing, and static variables, and is stored in a + regular grid (using Lambert Azimuthal Equal Area projection). The domain + is centered on Denmark and has a size of 500x500 km. + """ + + SHORT_NAME = "dummydata" + T0 = isodate.parse_datetime("2021-01-01T00:00:00") + N_FEATURES = dict(state=5, forcing=2, static=1) + CARTESIAN_COORDS = ["x", "y"] + + # center the domain on Denmark + latlon_center = [56, 10] # latitude, longitude + bbox_size_km = [500, 500] # km + + def __init__( + self, config_path=None, n_grid_points=10000, n_timesteps=10 + ) -> None: + """ + Create a dummy datastore with random data. + + Parameters + ---------- + config_path : None + No config file is needed for the dummy datastore. This argument is + only present to match the signature of the other datastores. + n_grid_points : int + The number of grid points in the dataset. Must be a perfect square. + n_timesteps : int + The number of timesteps in the dataset. + """ + assert ( + config_path is None + ), "No config file is needed for the dummy datastore" + + # Ensure n_grid_points is a perfect square + n_points_1d = int(np.sqrt(n_grid_points)) + assert ( + n_points_1d * n_points_1d == n_grid_points + ), "n_grid_points must be a perfect square" + + # create equal area grid + lx, ly = self.bbox_size_km + x = np.linspace(-lx / 2.0 * 1.0e3, lx / 2.0 * 1.0e3, n_points_1d) + y = np.linspace(-ly / 2.0 * 1.0e3, ly / 2.0 * 1.0e3, n_points_1d) + + xs, ys = np.meshgrid(x, y) + + # Create lat/lon coordinates using equal area projection + lon_mesh, lat_mesh = ( + ccrs.PlateCarree() + .transform_points( + src_crs=self.coords_projection, + x=xs.flatten(), + y=ys.flatten(), + )[:, :2] + .T + ) + + # Create base dataset with proper coordinates + self.ds = xr.Dataset( + coords={ + "x": ( + "x", + x, + {"units": "m"}, + ), # Use first column for x coordinates + "y": ( + "y", + y, + {"units": "m"}, + ), # Use first row for y coordinates + "longitude": ( + "grid_index", + lon_mesh.flatten(), + {"units": "degrees_east"}, + ), + "latitude": ( + "grid_index", + lat_mesh.flatten(), + {"units": "degrees_north"}, + ), + } + ) + # Create data variables with proper dimensions + for category, n in self.N_FEATURES.items(): + feature_names = [f"{category}_feat_{i}" for i in range(n)] + feature_units = ["-" for _ in range(n)] # Placeholder units + feature_long_names = [ + f"Long name for {name}" for name in feature_names + ] + + self.ds[f"{category}_feature"] = feature_names + self.ds[f"{category}_feature_units"] = ( + f"{category}_feature", + feature_units, + ) + self.ds[f"{category}_feature_long_name"] = ( + f"{category}_feature", + feature_long_names, + ) + + # Define dimensions and create random data + dims = ["grid_index", f"{category}_feature"] + if category != "static": + dims.append("time") + shape = (n_grid_points, n, n_timesteps) + else: + shape = (n_grid_points, n) + + # Create random data + data = np.random.randn(*shape) + + # Create DataArray with proper dimensions + self.ds[category] = xr.DataArray( + data, + dims=dims, + coords={ + f"{category}_feature": feature_names, + }, + ) + + if category != "static": + dt = datetime.timedelta(hours=self.step_length) + times = [self.T0 + dt * i for i in range(n_timesteps)] + self.ds.coords["time"] = times + + # Add boundary mask + self.ds["boundary_mask"] = xr.DataArray( + np.random.choice([0, 1], size=(n_points_1d, n_points_1d)), + dims=["x", "y"], + ) + + # Stack the spatial dimensions into grid_index + self.ds = self.ds.stack(grid_index=self.CARTESIAN_COORDS) + + # Create temporary directory for storing derived files + self._tempdir = tempfile.TemporaryDirectory() + self._root_path = Path(self._tempdir.name) + self._num_grid_points = n_grid_points + + @property + def root_path(self) -> Path: + """ + The root path to the datastore. It is relative to this that any derived + files (for example the graph components) are stored. + + Returns + ------- + pathlib.Path + The root path to the datastore. + + """ + return self._root_path + + @property + def config(self) -> dict: + """The configuration of the datastore. + + Returns + ------- + collections.abc.Mapping + The configuration of the datastore, any dict like object can be + returned. + + """ + return {} + + @property + def step_length(self) -> int: + """The step length of the dataset in hours. + + Returns: + int: The step length in hours. + + """ + return 1 + + def get_vars_names(self, category: str) -> list[str]: + """Get the names of the variables in the given category. + + Parameters + ---------- + category : str + The category of the variables (state/forcing/static). + + Returns + ------- + List[str] + The names of the variables. + + """ + return self.ds[f"{category}_feature"].values.tolist() + + def get_vars_units(self, category: str) -> list[str]: + """Get the units of the variables in the given category. + + Parameters + ---------- + category : str + The category of the variables (state/forcing/static). + + Returns + ------- + List[str] + The units of the variables. + + """ + return self.ds[f"{category}_feature_units"].values.tolist() + + def get_vars_long_names(self, category: str) -> List[str]: + """Get the long names of the variables in the given category. + + Parameters + ---------- + category : str + The category of the variables (state/forcing/static). + + Returns + ------- + List[str] + The long names of the variables. + + """ + return self.ds[f"{category}_feature_long_name"].values.tolist() + + def get_num_data_vars(self, category: str) -> int: + """Get the number of data variables in the given category. + + Parameters + ---------- + category : str + The category of the variables (state/forcing/static). + + Returns + ------- + int + The number of data variables. + + """ + return self.ds[f"{category}_feature"].size + + def get_standardization_dataarray(self, category: str) -> xr.Dataset: + """ + Return the standardization (i.e. scaling to mean of 0.0 and standard + deviation of 1.0) dataarray for the given category. This should contain + a `{category}_mean` and `{category}_std` variable for each variable in + the category. For `category=="state"`, the dataarray should also + contain a `state_diff_mean` and `state_diff_std` variable for the one- + step differences of the state variables. The returned dataarray should + at least have dimensions of `({category}_feature)`, but can also + include for example `grid_index` (if the standardization is done per + grid point for example). + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + + Returns + ------- + xr.Dataset + The standardization dataarray for the given category, with variables + for the mean and standard deviation of the variables (and + differences for state variables). + + """ + ds_standardization = xr.Dataset() + + ops = ["mean", "std"] + if category == "state": + ops += ["diff_mean", "diff_std"] + + for op in ops: + da_op = xr.ones_like(self.ds[f"{category}_feature"]).astype(float) + ds_standardization[f"{category}_{op}"] = da_op + + return ds_standardization + + def get_dataarray( + self, category: str, split: str + ) -> Union[xr.DataArray, None]: + """ + Return the processed data (as a single `xr.DataArray`) for the given + category of data and test/train/val-split that covers all the data (in + space and time) of a given category (state/forcing/static). A + datastore must be able to return for the "state" category, but + "forcing" and "static" are optional (in which case the method should + return `None`). For the "static" category the `split` is allowed to be + `None` because the static data is the same for all splits. + + The returned dataarray is expected to at minimum have dimensions of + `(grid_index, {category}_feature)` so that any spatial dimensions have + been stacked into a single dimension and all variables and levels have + been stacked into a single feature dimension named by the `category` of + data being loaded. + + For categories of data that have a time dimension (i.e. not static + data), the dataarray is expected additionally have `(analysis_time, + elapsed_forecast_duration)` dimensions if `is_forecast` is True, or + `(time)` if `is_forecast` is False. + + If the data is ensemble data, the dataarray is expected to have an + additional `ensemble_member` dimension. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + split : str + The time split to filter the dataset (train/val/test). + + Returns + ------- + xr.DataArray or None + The xarray DataArray object with processed dataset. + + """ + dim_order = self.expected_dim_order(category=category) + return self.ds[category].transpose(*dim_order) + + @cached_property + def boundary_mask(self) -> xr.DataArray: + """ + Return the boundary mask for the dataset, with spatial dimensions + stacked. Where the value is 1, the grid point is a boundary point, and + where the value is 0, the grid point is not a boundary point. + + Returns + ------- + xr.DataArray + The boundary mask for the dataset, with dimensions + `('grid_index',)`. + + """ + return self.ds["boundary_mask"] + + def get_xy(self, category: str, stacked: bool) -> ndarray: + """Return the x, y coordinates of the dataset. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + stacked : bool + Whether to stack the x, y coordinates. + + Returns + ------- + np.ndarray + The x, y coordinates of the dataset, returned differently based on + the value of `stacked`: + - `stacked==True`: shape `(n_grid_points, 2)` where + n_grid_points=N_x*N_y. + - `stacked==False`: shape `(N_x, N_y, 2)` + + """ + # assume variables are stored in dimensions [grid_index, ...] + ds_category = self.unstack_grid_coords(da_or_ds=self.ds[category]) + + da_xs = ds_category.x + da_ys = ds_category.y + + assert da_xs.ndim == da_ys.ndim == 1, "x and y coordinates must be 1D" + + da_x, da_y = xr.broadcast(da_xs, da_ys) + da_xy = xr.concat([da_x, da_y], dim="grid_coord") + + if stacked: + da_xy = da_xy.stack(grid_index=self.CARTESIAN_COORDS).transpose( + "grid_index", + "grid_coord", + ) + else: + dims = [ + "x", + "y", + "grid_coord", + ] + da_xy = da_xy.transpose(*dims) + + return da_xy.values + + @property + def coords_projection(self) -> ccrs.Projection: + """Return the projection object for the coordinates. + + The projection object is used to plot the coordinates on a map. + + Returns + ------- + cartopy.crs.Projection: + The projection object. + + """ + # make a projection centered on Denmark + lat_center, lon_center = self.latlon_center + return ccrs.LambertAzimuthalEqualArea( + central_latitude=lat_center, central_longitude=lon_center + ) + + @property + def num_grid_points(self) -> int: + """Return the number of grid points in the dataset. + + Returns + ------- + int + The number of grid points in the dataset. + + """ + return self._num_grid_points + + @cached_property + def grid_shape_state(self) -> CartesianGridShape: + """The shape of the grid for the state variables. + + Returns + ------- + CartesianGridShape: + The shape of the grid for the state variables, which has `x` and + `y` attributes. + """ + + n_points_1d = int(np.sqrt(self.num_grid_points)) + return CartesianGridShape(x=n_points_1d, y=n_points_1d) diff --git a/tests/test_cli.py b/tests/test_cli.py index e90daa04..0dbd04a1 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,18 +1,12 @@ # First-party import neural_lam -import neural_lam.create_grid_features -import neural_lam.create_mesh -import neural_lam.create_parameter_weights +import neural_lam.create_graph import neural_lam.train_model def test_import(): - """ - This test just ensures that each cli entry-point can be imported for now, - eventually we should test their execution too - """ + """This test just ensures that each cli entry-point can be imported for now, + eventually we should test their execution too.""" assert neural_lam is not None - assert neural_lam.create_mesh is not None - assert neural_lam.create_grid_features is not None - assert neural_lam.create_parameter_weights is not None + assert neural_lam.create_graph is not None assert neural_lam.train_model is not None diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 00000000..1ff40bc6 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,72 @@ +# Third-party +import pytest + +# First-party +import neural_lam.config as nlconfig + + +@pytest.mark.parametrize( + "state_weighting_config", + [ + nlconfig.ManualStateFeatureWeighting( + weights=dict(u100m=1.0, v100m=0.5) + ), + nlconfig.UniformFeatureWeighting(), + ], +) +def test_config_serialization(state_weighting_config): + c = nlconfig.NeuralLAMConfig( + datastore=nlconfig.DatastoreSelection(kind="mdp", config_path=""), + training=nlconfig.TrainingConfig( + state_feature_weighting=state_weighting_config + ), + ) + + assert c == c.from_json(c.to_json()) + assert c == c.from_yaml(c.to_yaml()) + + +yaml_training_defaults = """ +datastore: + kind: mdp + config_path: "" +""" + +default_config = nlconfig.NeuralLAMConfig( + datastore=nlconfig.DatastoreSelection(kind="mdp", config_path=""), + training=nlconfig.TrainingConfig( + state_feature_weighting=nlconfig.UniformFeatureWeighting() + ), +) + +yaml_training_manual_weights = """ +datastore: + kind: mdp + config_path: "" +training: + state_feature_weighting: + __config_class__: ManualStateFeatureWeighting + weights: + u100m: 1.0 + v100m: 1.0 +""" + +manual_weights_config = nlconfig.NeuralLAMConfig( + datastore=nlconfig.DatastoreSelection(kind="mdp", config_path=""), + training=nlconfig.TrainingConfig( + state_feature_weighting=nlconfig.ManualStateFeatureWeighting( + weights=dict(u100m=1.0, v100m=1.0) + ) + ), +) + +yaml_samples = zip( + [yaml_training_defaults, yaml_training_manual_weights], + [default_config, manual_weights_config], +) + + +@pytest.mark.parametrize("yaml_str, config_expected", yaml_samples) +def test_config_load_from_yaml(yaml_str, config_expected): + c = nlconfig.NeuralLAMConfig.from_yaml(yaml_str) + assert c == config_expected diff --git a/tests/test_datasets.py b/tests/test_datasets.py new file mode 100644 index 00000000..419aece0 --- /dev/null +++ b/tests/test_datasets.py @@ -0,0 +1,261 @@ +# Standard library +from pathlib import Path + +# Third-party +import numpy as np +import pytest +import torch +from torch.utils.data import DataLoader + +# First-party +from neural_lam import config as nlconfig +from neural_lam.create_graph import create_graph_from_datastore +from neural_lam.datastore import DATASTORES +from neural_lam.datastore.base import BaseRegularGridDatastore +from neural_lam.models.graph_lam import GraphLAM +from neural_lam.weather_dataset import WeatherDataset +from tests.conftest import init_datastore_example +from tests.dummy_datastore import DummyDatastore + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_dataset_item_shapes(datastore_name): + """Check that the `datastore.get_dataarray` method is implemented. + + Validate the shapes of the tensors match between the different + components of the training sample. + + init_states: (2, N_grid, d_features) + target_states: (ar_steps, N_grid, d_features) + forcing: (ar_steps, N_grid, d_windowed_forcing) # batch_times: (ar_steps,) + + """ + datastore = init_datastore_example(datastore_name) + N_gridpoints = datastore.num_grid_points + + N_pred_steps = 4 + num_past_forcing_steps = 1 + num_future_forcing_steps = 1 + dataset = WeatherDataset( + datastore=datastore, + split="train", + ar_steps=N_pred_steps, + num_past_forcing_steps=num_past_forcing_steps, + num_future_forcing_steps=num_future_forcing_steps, + ) + + item = dataset[0] + + # unpack the item, this is the current return signature for + # WeatherDataset.__getitem__ + init_states, target_states, forcing, target_times = item + + # initial states + assert init_states.ndim == 3 + assert init_states.shape[0] == 2 # two time steps go into the input + assert init_states.shape[1] == N_gridpoints + assert init_states.shape[2] == datastore.get_num_data_vars("state") + + # output states + assert target_states.ndim == 3 + assert target_states.shape[0] == N_pred_steps + assert target_states.shape[1] == N_gridpoints + assert target_states.shape[2] == datastore.get_num_data_vars("state") + + # forcing + assert forcing.ndim == 3 + assert forcing.shape[0] == N_pred_steps + assert forcing.shape[1] == N_gridpoints + assert forcing.shape[2] == datastore.get_num_data_vars("forcing") * ( + num_past_forcing_steps + num_future_forcing_steps + 1 + ) + + # batch times + assert target_times.ndim == 1 + assert target_times.shape[0] == N_pred_steps + + # try to get the last item of the dataset to ensure slicing and stacking + # operations are working as expected and are consistent with the dataset + # length + dataset[len(dataset) - 1] + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_dataset_item_create_dataarray_from_tensor(datastore_name): + datastore = init_datastore_example(datastore_name) + + N_pred_steps = 4 + num_past_forcing_steps = 1 + num_future_forcing_steps = 1 + dataset = WeatherDataset( + datastore=datastore, + split="train", + ar_steps=N_pred_steps, + num_past_forcing_steps=num_past_forcing_steps, + num_future_forcing_steps=num_future_forcing_steps, + ) + + idx = 0 + + # unpack the item, this is the current return signature for + # WeatherDataset.__getitem__ + _, target_states, _, target_times_arr = dataset[idx] + _, da_target_true, _, da_target_times_true = dataset._build_item_dataarrays( + idx=idx + ) + + target_times = np.array(target_times_arr, dtype="datetime64[ns]") + np.testing.assert_equal(target_times, da_target_times_true.values) + + da_target = dataset.create_dataarray_from_tensor( + tensor=target_states, category="state", time=target_times + ) + + # conversion to torch.float32 may lead to loss of precision + np.testing.assert_allclose( + da_target.values, da_target_true.values, rtol=1e-6 + ) + assert da_target.dims == da_target_true.dims + for dim in da_target.dims: + np.testing.assert_equal( + da_target[dim].values, da_target_true[dim].values + ) + + if isinstance(datastore, BaseRegularGridDatastore): + # test unstacking the grid coordinates + da_target_unstacked = datastore.unstack_grid_coords(da_target) + assert all( + coord_name in da_target_unstacked.coords + for coord_name in ["x", "y"] + ) + + # check construction of a single time + da_target_single = dataset.create_dataarray_from_tensor( + tensor=target_states[0], category="state", time=target_times[0] + ) + + # check that the content is the same + # conversion to torch.float32 may lead to loss of precision + np.testing.assert_allclose( + da_target_single.values, da_target_true[0].values, rtol=1e-6 + ) + assert da_target_single.dims == da_target_true[0].dims + for dim in da_target_single.dims: + np.testing.assert_equal( + da_target_single[dim].values, da_target_true[0][dim].values + ) + + if isinstance(datastore, BaseRegularGridDatastore): + # test unstacking the grid coordinates + da_target_single_unstacked = datastore.unstack_grid_coords( + da_target_single + ) + assert all( + coord_name in da_target_single_unstacked.coords + for coord_name in ["x", "y"] + ) + + +@pytest.mark.parametrize("split", ["train", "val", "test"]) +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_single_batch(datastore_name, split): + """Check that the `datastore.get_dataarray` method is implemented. + + And that it returns an xarray DataArray with the correct dimensions. + + """ + datastore = init_datastore_example(datastore_name) + + device_name = ( + torch.device("cuda") if torch.cuda.is_available() else "cpu" + ) # noqa + + graph_name = "1level" + + class ModelArgs: + output_std = False + loss = "mse" + restore_opt = False + n_example_pred = 1 + graph = graph_name + hidden_dim = 4 + hidden_layers = 1 + processor_layers = 2 + mesh_aggr = "sum" + num_past_forcing_steps = 1 + num_future_forcing_steps = 1 + + args = ModelArgs() + + graph_dir_path = Path(datastore.root_path) / "graph" / graph_name + + def _create_graph(): + if not graph_dir_path.exists(): + create_graph_from_datastore( + datastore=datastore, + output_root_path=str(graph_dir_path), + n_max_levels=1, + ) + + if not isinstance(datastore, BaseRegularGridDatastore): + with pytest.raises(NotImplementedError): + _create_graph() + pytest.skip("Skipping on model-run on non-regular grid datastores") + + _create_graph() + + config = nlconfig.NeuralLAMConfig( + datastore=nlconfig.DatastoreSelection( + kind=datastore.SHORT_NAME, config_path=datastore.root_path + ) + ) + + dataset = WeatherDataset(datastore=datastore, split=split, ar_steps=2) + + model = GraphLAM(args=args, datastore=datastore, config=config) # noqa + + model_device = model.to(device_name) + data_loader = DataLoader(dataset, batch_size=2) + batch = next(iter(data_loader)) + batch_device = [part.to(device_name) for part in batch] + model_device.common_step(batch_device) + model_device.training_step(batch_device) + + +@pytest.mark.parametrize( + "dataset_config", + [ + {"past": 0, "future": 0, "ar_steps": 1, "exp_len_reduction": 3}, + {"past": 2, "future": 0, "ar_steps": 1, "exp_len_reduction": 3}, + {"past": 0, "future": 2, "ar_steps": 1, "exp_len_reduction": 5}, + {"past": 4, "future": 0, "ar_steps": 1, "exp_len_reduction": 5}, + {"past": 0, "future": 0, "ar_steps": 5, "exp_len_reduction": 7}, + {"past": 3, "future": 3, "ar_steps": 2, "exp_len_reduction": 8}, + ], +) +def test_dataset_length(dataset_config): + """Check that correct number of samples can be extracted from the dataset, + given a specific configuration of forcing windowing and ar_steps. + """ + # Use dummy datastore of length 10 here, only want to test slicing + # in dataset class + ds_len = 10 + datastore = DummyDatastore(n_timesteps=ds_len) + + dataset = WeatherDataset( + datastore=datastore, + split="train", + ar_steps=dataset_config["ar_steps"], + num_past_forcing_steps=dataset_config["past"], + num_future_forcing_steps=dataset_config["future"], + ) + + # We expect dataset to contain this many samples + expected_len = ds_len - dataset_config["exp_len_reduction"] + + # Check that datast has correct length + assert len(dataset) == expected_len + + # Check that we can actually get last and first sample + dataset[0] + dataset[expected_len - 1] diff --git a/tests/test_datastores.py b/tests/test_datastores.py new file mode 100644 index 00000000..4a4b1100 --- /dev/null +++ b/tests/test_datastores.py @@ -0,0 +1,384 @@ +"""List of methods and attributes that should be implemented in a subclass of +`` (these are all decorated with `@abc.abstractmethod`): + +- `root_path` (property): Root path of the datastore. +- `step_length` (property): Length of the time step in hours. +- `grid_shape_state` (property): Shape of the grid for the state variables. +- `get_xy` (method): Return the x, y coordinates of the dataset. +- `coords_projection` (property): Projection object for the coordinates. +- `get_vars_units` (method): Get the units of the variables in the given + category. +- `get_vars_names` (method): Get the names of the variables in the given + category. +- `get_vars_long_names` (method): Get the long names of the variables in + the given category. +- `get_num_data_vars` (method): Get the number of data variables in the + given category. +- `get_normalization_dataarray` (method): Return the normalization + dataarray for the given category. +- `get_dataarray` (method): Return the processed data (as a single + `xr.DataArray`) for the given category and test/train/val-split. +- `boundary_mask` (property): Return the boundary mask for the dataset, + with spatial dimensions stacked. +- `config` (property): Return the configuration of the datastore. + +In addition BaseRegularGridDatastore must have the following methods and +attributes: +- `get_xy_extent` (method): Return the extent of the x, y coordinates for a + given category of data. +- `get_xy` (method): Return the x, y coordinates of the dataset. +- `coords_projection` (property): Projection object for the coordinates. +- `grid_shape_state` (property): Shape of the grid for the state variables. +- `stack_grid_coords` (method): Stack the grid coordinates of the dataset + +""" + +# Standard library +import collections +import dataclasses +from pathlib import Path + +# Third-party +import cartopy.crs as ccrs +import numpy as np +import pytest +import torch +import xarray as xr + +# First-party +from neural_lam.datastore import DATASTORES +from neural_lam.datastore.base import BaseRegularGridDatastore +from neural_lam.datastore.plot_example import plot_example_from_datastore +from tests.conftest import init_datastore_example + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_root_path(datastore_name): + """Check that the `datastore.root_path` property is implemented.""" + datastore = init_datastore_example(datastore_name) + assert isinstance(datastore.root_path, Path) + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_config(datastore_name): + """Check that the `datastore.config` property is implemented.""" + datastore = init_datastore_example(datastore_name) + # check the config is a mapping or a dataclass + config = datastore.config + assert isinstance( + config, collections.abc.Mapping + ) or dataclasses.is_dataclass(config) + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_step_length(datastore_name): + """Check that the `datastore.step_length` property is implemented.""" + datastore = init_datastore_example(datastore_name) + step_length = datastore.step_length + assert isinstance(step_length, int) + assert step_length > 0 + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_datastore_grid_xy(datastore_name): + """Use the `datastore.get_xy` method to get the x, y coordinates of the + dataset and check that the shape is correct against the `da + tastore.grid_shape_state` property.""" + datastore = init_datastore_example(datastore_name) + + if not isinstance(datastore, BaseRegularGridDatastore): + pytest.skip( + "Skip grid_shape_state test for non-regular grid datastores" + ) + + # check the shapes of the xy grid + grid_shape = datastore.grid_shape_state + nx, ny = grid_shape.x, grid_shape.y + for stacked in [True, False]: + xy = datastore.get_xy("static", stacked=stacked) + if stacked: + assert xy.shape == (nx * ny, 2) + else: + assert xy.shape == (nx, ny, 2) + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_get_vars(datastore_name): + """Check that results of. + + - `datastore.get_vars_units` + - `datastore.get_vars_names` + - `datastore.get_vars_long_names` + - `datastore.get_num_data_vars` + + are consistent (as in the number of variables are the same) and that the + return types of each are correct. + + """ + datastore = init_datastore_example(datastore_name) + + for category in ["state", "forcing", "static"]: + units = datastore.get_vars_units(category) + names = datastore.get_vars_names(category) + long_names = datastore.get_vars_long_names(category) + num_vars = datastore.get_num_data_vars(category) + + assert len(units) == len(names) == num_vars + assert isinstance(units, list) + assert isinstance(names, list) + assert isinstance(long_names, list) + assert isinstance(num_vars, int) + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_get_normalization_dataarray(datastore_name): + """Check that the `datastore.get_normalization_dataa rray` method is + implemented.""" + datastore = init_datastore_example(datastore_name) + + for category in ["state", "forcing", "static"]: + ds_stats = datastore.get_standardization_dataarray(category=category) + + # check that the returned object is an xarray DataArray + # and that it has the correct variables + assert isinstance(ds_stats, xr.Dataset) + + if category == "state": + ops = ["mean", "std", "diff_mean", "diff_std"] + elif category == "forcing": + ops = ["mean", "std"] + elif category == "static": + ops = [] + else: + raise NotImplementedError(category) + + for op in ops: + var_name = f"{category}_{op}" + assert var_name in ds_stats.data_vars + da_val = ds_stats[var_name] + assert set(da_val.dims) == {f"{category}_feature"} + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_get_dataarray(datastore_name): + """Check that the `datastore.get_dataarray` method is implemented. + + And that it returns an xarray DataArray with the correct dimensions. + + """ + + datastore = init_datastore_example(datastore_name) + + for category in ["state", "forcing", "static"]: + n_features = {} + if category in ["state", "forcing"]: + splits = ["train", "val", "test"] + elif category == "static": + # static data should be the same for all splits, so split + # should be allowed to be None + splits = ["train", "val", "test", None] + else: + raise NotImplementedError(category) + + for split in splits: + expected_dims = ["grid_index", f"{category}_feature"] + if category != "static": + if not datastore.is_forecast: + expected_dims.append("time") + else: + expected_dims += [ + "analysis_time", + "elapsed_forecast_duration", + ] + + if datastore.is_ensemble and category == "state": + # assume that only state variables change with ensemble members + expected_dims.append("ensemble_member") + + # XXX: for now we only have a single attribute to get the shape of + # the grid which uses the shape from the "state" category, maybe + # this should change? + + da = datastore.get_dataarray(category=category, split=split) + + assert isinstance(da, xr.DataArray) + assert set(da.dims) == set(expected_dims) + if isinstance(datastore, BaseRegularGridDatastore): + grid_shape = datastore.grid_shape_state + assert da.grid_index.size == grid_shape.x * grid_shape.y + + n_features[split] = da[category + "_feature"].size + + # check that the number of features is the same for all splits + assert n_features["train"] == n_features["val"] == n_features["test"] + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_boundary_mask(datastore_name): + """Check that the `datastore.boundary_mask` property is implemented and + that the returned object is an xarray DataArray with the correct shape.""" + datastore = init_datastore_example(datastore_name) + da_mask = datastore.boundary_mask + + assert isinstance(da_mask, xr.DataArray) + assert set(da_mask.dims) == {"grid_index"} + assert da_mask.dtype == "int" + assert set(da_mask.values) == {0, 1} + assert da_mask.sum() > 0 + assert da_mask.sum() < da_mask.size + + if isinstance(datastore, BaseRegularGridDatastore): + grid_shape = datastore.grid_shape_state + assert datastore.boundary_mask.size == grid_shape.x * grid_shape.y + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_get_xy_extent(datastore_name): + """Check that the `datastore.get_xy_extent` method is implemented and that + the returned object is a tuple of the correct length.""" + datastore = init_datastore_example(datastore_name) + + if not isinstance(datastore, BaseRegularGridDatastore): + pytest.skip("Datastore does not implement `BaseCartesianDatastore`") + + extents = {} + # get the extents for each category, and finally check they are all the same + for category in ["state", "forcing", "static"]: + extent = datastore.get_xy_extent(category) + assert isinstance(extent, list) + assert len(extent) == 4 + assert all(isinstance(e, (int, float)) for e in extent) + extents[category] = extent + + # check that the extents are the same for all categories + for category in ["forcing", "static"]: + assert extents["state"] == extents[category] + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_get_xy(datastore_name): + """Check that the `datastore.get_xy` method is implemented.""" + datastore = init_datastore_example(datastore_name) + + if not isinstance(datastore, BaseRegularGridDatastore): + pytest.skip("Datastore does not implement `BaseCartesianDatastore`") + + for category in ["state", "forcing", "static"]: + xy_stacked = datastore.get_xy(category=category, stacked=True) + xy_unstacked = datastore.get_xy(category=category, stacked=False) + + assert isinstance(xy_stacked, np.ndarray) + assert isinstance(xy_unstacked, np.ndarray) + + nx, ny = datastore.grid_shape_state.x, datastore.grid_shape_state.y + + # for stacked=True, the shape should be (n_grid_points, 2) + assert xy_stacked.ndim == 2 + assert xy_stacked.shape[0] == nx * ny + assert xy_stacked.shape[1] == 2 + + # for stacked=False, the shape should be (nx, ny, 2) + assert xy_unstacked.ndim == 3 + assert xy_unstacked.shape[0] == nx + assert xy_unstacked.shape[1] == ny + assert xy_unstacked.shape[2] == 2 + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_get_projection(datastore_name): + """Check that the `datastore.coords_projection` property is implemented.""" + datastore = init_datastore_example(datastore_name) + + if not isinstance(datastore, BaseRegularGridDatastore): + pytest.skip("Datastore does not implement `BaseCartesianDatastore`") + + assert isinstance(datastore.coords_projection, ccrs.Projection) + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def get_grid_shape_state(datastore_name): + """Check that the `datastore.grid_shape_state` property is implemented.""" + datastore = init_datastore_example(datastore_name) + + if not isinstance(datastore, BaseRegularGridDatastore): + pytest.skip("Datastore does not implement `BaseCartesianDatastore`") + + grid_shape = datastore.grid_shape_state + assert isinstance(grid_shape, tuple) + assert len(grid_shape) == 2 + assert all(isinstance(e, int) for e in grid_shape) + assert all(e > 0 for e in grid_shape) + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +@pytest.mark.parametrize("category", ["state", "forcing", "static"]) +def test_stacking_grid_coords(datastore_name, category): + """Check that the `datastore.stack_grid_coords` method is implemented.""" + datastore = init_datastore_example(datastore_name) + + if not isinstance(datastore, BaseRegularGridDatastore): + pytest.skip("Datastore does not implement `BaseCartesianDatastore`") + + da_static = datastore.get_dataarray(category=category, split="train") + + da_static_unstacked = datastore.unstack_grid_coords(da_static).load() + da_static_test = datastore.stack_grid_coords(da_static_unstacked) + + assert da_static.dims == da_static_test.dims + xr.testing.assert_equal(da_static, da_static_test) + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_dataarray_shapes(datastore_name): + datastore = init_datastore_example(datastore_name) + static_da = datastore.get_dataarray("static", split=None) + static_da = datastore.stack_grid_coords(static_da) + static_da = static_da.isel(static_feature=0) + + # Convert the unstacked grid coordinates and static data array to tensors + unstacked_tensor = torch.tensor( + datastore.unstack_grid_coords(static_da).to_numpy(), dtype=torch.float32 + ).squeeze() + + reshaped_tensor = ( + torch.tensor(static_da.to_numpy(), dtype=torch.float32) + .reshape(datastore.grid_shape_state.x, datastore.grid_shape_state.y) + .squeeze() + ) + + # Compute the difference + diff = unstacked_tensor - reshaped_tensor + + # Check the shapes + assert unstacked_tensor.shape == ( + datastore.grid_shape_state.x, + datastore.grid_shape_state.y, + ) + assert reshaped_tensor.shape == ( + datastore.grid_shape_state.x, + datastore.grid_shape_state.y, + ) + assert diff.shape == ( + datastore.grid_shape_state.x, + datastore.grid_shape_state.y, + ) + # assert diff == 0 with tolerance 1e-6 + assert torch.allclose(diff, torch.zeros_like(diff), atol=1e-6) + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_plot_example_from_datastore(datastore_name): + """Check that the `plot_example_from_datastore` function is implemented.""" + datastore = init_datastore_example(datastore_name) + fig = plot_example_from_datastore( + category="static", + datastore=datastore, + col_dim="{category}_feature", + split="train", + standardize=True, + selection={}, + index_selection={}, + ) + + assert fig is not None + assert fig.get_axes() diff --git a/tests/test_graph_creation.py b/tests/test_graph_creation.py new file mode 100644 index 00000000..93a7a55f --- /dev/null +++ b/tests/test_graph_creation.py @@ -0,0 +1,119 @@ +# Standard library +import tempfile +from pathlib import Path + +# Third-party +import pytest +import torch + +# First-party +from neural_lam.create_graph import create_graph_from_datastore +from neural_lam.datastore import DATASTORES +from neural_lam.datastore.base import BaseRegularGridDatastore +from tests.conftest import init_datastore_example + + +@pytest.mark.parametrize("graph_name", ["1level", "multiscale", "hierarchical"]) +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_graph_creation(datastore_name, graph_name): + """Check that the `create_ graph_from_datastore` function is implemented. + + And that the graph is created in the correct location. + + """ + datastore = init_datastore_example(datastore_name) + + if not isinstance(datastore, BaseRegularGridDatastore): + pytest.skip( + f"Skipping test for {datastore_name} as it is not a regular " + "grid datastore." + ) + + if graph_name == "hierarchical": + hierarchical = True + n_max_levels = 3 + elif graph_name == "multiscale": + hierarchical = False + n_max_levels = 3 + elif graph_name == "1level": + hierarchical = False + n_max_levels = 1 + else: + raise ValueError(f"Unknown graph_name: {graph_name}") + + required_graph_files = [ + "m2m_edge_index.pt", + "g2m_edge_index.pt", + "m2g_edge_index.pt", + "m2m_features.pt", + "g2m_features.pt", + "m2g_features.pt", + "mesh_features.pt", + ] + if hierarchical: + required_graph_files.extend( + [ + "mesh_up_edge_index.pt", + "mesh_down_edge_index.pt", + "mesh_up_features.pt", + "mesh_down_features.pt", + ] + ) + + # TODO: check that the number of edges is consistent over the files, for + # now we just check the number of features + d_features = 3 + d_mesh_static = 2 + + with tempfile.TemporaryDirectory() as tmpdir: + graph_dir_path = Path(tmpdir) / "graph" / graph_name + + create_graph_from_datastore( + datastore=datastore, + output_root_path=str(graph_dir_path), + hierarchical=hierarchical, + n_max_levels=n_max_levels, + ) + + assert graph_dir_path.exists() + + # check that all the required files are present + for file_name in required_graph_files: + assert (graph_dir_path / file_name).exists() + + # try to load each and ensure they have the right shape + for file_name in required_graph_files: + file_id = Path(file_name).stem # remove the extension + result = torch.load(graph_dir_path / file_name) + + if file_id.startswith("g2m") or file_id.startswith("m2g"): + assert isinstance(result, torch.Tensor) + + if file_id.endswith("_index"): + assert ( + result.shape[0] == 2 + ) # adjacency matrix uses two rows + elif file_id.endswith("_features"): + assert result.shape[1] == d_features + + elif file_id.startswith("m2m") or file_id.startswith("mesh"): + assert isinstance(result, list) + if not hierarchical: + assert len(result) == 1 + else: + if file_id.startswith("mesh_up") or file_id.startswith( + "mesh_down" + ): + assert len(result) == n_max_levels - 1 + else: + assert len(result) == n_max_levels + + for r in result: + assert isinstance(r, torch.Tensor) + + if file_id == "mesh_features": + assert r.shape[1] == d_mesh_static + elif file_id.endswith("_index"): + assert r.shape[0] == 2 # adjacency matrix uses two rows + elif file_id.endswith("_features"): + assert r.shape[1] == d_features diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py deleted file mode 100644 index 5c8b7aa1..00000000 --- a/tests/test_mllam_dataset.py +++ /dev/null @@ -1,142 +0,0 @@ -# Standard library -import os -from pathlib import Path - -# Third-party -import pooch -import pytest - -# First-party -from neural_lam.config import Config -from neural_lam.create_mesh import main as create_mesh -from neural_lam.train_model import main as train_model -from neural_lam.utils import load_static_data -from neural_lam.weather_dataset import WeatherDataset - -# Disable weights and biases to avoid unnecessary logging -# and to avoid having to deal with authentication -os.environ["WANDB_DISABLED"] = "true" - -# Initializing variables for the s3 client -S3_BUCKET_NAME = "mllam-testdata" -S3_ENDPOINT_URL = "https://object-store.os-api.cci1.ecmwf.int" -S3_FILE_PATH = "neural-lam/npy/meps_example_reduced.v0.1.0.zip" -S3_FULL_PATH = "/".join([S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_FILE_PATH]) -TEST_DATA_KNOWN_HASH = ( - "98c7a2f442922de40c6891fe3e5d190346889d6e0e97550170a82a7ce58a72b7" -) - - -@pytest.fixture(scope="module") -def meps_example_reduced_filepath(): - # Download and unzip test data into data/meps_example_reduced - pooch.retrieve( - url=S3_FULL_PATH, - known_hash=TEST_DATA_KNOWN_HASH, - processor=pooch.Unzip(extract_dir=""), - path="data", - fname="meps_example_reduced.zip", - ) - return Path("data/meps_example_reduced") - - -def test_load_reduced_meps_dataset(meps_example_reduced_filepath): - # The data_config.yaml file is downloaded and extracted in - # test_retrieve_data_ewc together with the dataset itself - data_config_file = meps_example_reduced_filepath / "data_config.yaml" - dataset_name = meps_example_reduced_filepath.name - - dataset = WeatherDataset(dataset_name=dataset_name) - config = Config.from_file(str(data_config_file)) - - var_names = config.values["dataset"]["var_names"] - var_units = config.values["dataset"]["var_units"] - var_longnames = config.values["dataset"]["var_longnames"] - - assert len(var_names) == len(var_longnames) - assert len(var_names) == len(var_units) - - # in future the number of grid static features - # will be provided by the Dataset class itself - n_grid_static_features = 4 - # Hardcoded in model - n_input_steps = 2 - - n_forcing_features = config.values["dataset"]["num_forcing_features"] - n_state_features = len(var_names) - n_prediction_timesteps = dataset.sample_length - n_input_steps - - nx, ny = config.values["grid_shape_state"] - n_grid = nx * ny - - # check that the dataset is not empty - assert len(dataset) > 0 - - # get the first item - init_states, target_states, forcing = dataset[0] - - # check that the shapes of the tensors are correct - assert init_states.shape == (n_input_steps, n_grid, n_state_features) - assert target_states.shape == ( - n_prediction_timesteps, - n_grid, - n_state_features, - ) - assert forcing.shape == ( - n_prediction_timesteps, - n_grid, - n_forcing_features, - ) - - static_data = load_static_data(dataset_name=dataset_name) - - required_props = { - "border_mask", - "grid_static_features", - "step_diff_mean", - "step_diff_std", - "data_mean", - "data_std", - "param_weights", - } - - # check the sizes of the props - assert static_data["border_mask"].shape == (n_grid, 1) - assert static_data["grid_static_features"].shape == ( - n_grid, - n_grid_static_features, - ) - assert static_data["step_diff_mean"].shape == (n_state_features,) - assert static_data["step_diff_std"].shape == (n_state_features,) - assert static_data["data_mean"].shape == (n_state_features,) - assert static_data["data_std"].shape == (n_state_features,) - assert static_data["param_weights"].shape == (n_state_features,) - - assert set(static_data.keys()) == required_props - - -def test_create_graph_reduced_meps_dataset(): - args = [ - "--graph=hierarchical", - "--hierarchical", - "--data_config=data/meps_example_reduced/data_config.yaml", - "--levels=2", - ] - create_mesh(args) - - -def test_train_model_reduced_meps_dataset(): - args = [ - "--model=hi_lam", - "--data_config=data/meps_example_reduced/data_config.yaml", - "--n_workers=4", - "--epochs=1", - "--graph=hierarchical", - "--hidden_dim=16", - "--hidden_layers=1", - "--processor_layers=1", - "--ar_steps=1", - "--eval=val", - "--n_example_pred=0", - ] - train_model(args) diff --git a/tests/test_time_slicing.py b/tests/test_time_slicing.py new file mode 100644 index 00000000..29161505 --- /dev/null +++ b/tests/test_time_slicing.py @@ -0,0 +1,146 @@ +# Third-party +import numpy as np +import pytest +import xarray as xr + +# First-party +from neural_lam.datastore.base import BaseDatastore +from neural_lam.weather_dataset import WeatherDataset + + +class SinglePointDummyDatastore(BaseDatastore): + step_length = 1 + config = None + coords_projection = None + num_grid_points = 1 + root_path = None + + def __init__(self, time_values, state_data, forcing_data, is_forecast): + self._time_values = np.array(time_values) + self._state_data = np.array(state_data) + self._forcing_data = np.array(forcing_data) + self.is_forecast = is_forecast + + if is_forecast: + assert self._state_data.ndim == 2 + else: + assert self._state_data.ndim == 1 + + def get_num_data_vars(self, category): + return 1 + + def get_dataarray(self, category, split): + if category == "state": + values = self._state_data + elif category == "forcing": + values = self._forcing_data + else: + raise NotImplementedError(category) + + if self.is_forecast: + raise NotImplementedError() + else: + da = xr.DataArray( + values, dims=["time"], coords={"time": self._time_values} + ) + # add `{category}_feature` and `grid_index` dimensions + + da = da.expand_dims("grid_index") + da = da.expand_dims(f"{category}_feature") + + dim_order = self.expected_dim_order(category=category) + return da.transpose(*dim_order) + + def get_standardization_dataarray(self, category): + raise NotImplementedError() + + def get_xy(self, category): + raise NotImplementedError() + + def get_vars_units(self, category): + raise NotImplementedError() + + def get_vars_names(self, category): + raise NotImplementedError() + + def get_vars_long_names(self, category): + raise NotImplementedError() + + +ANALYSIS_STATE_VALUES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +FORCING_VALUES = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + + +@pytest.mark.parametrize( + "ar_steps,num_past_forcing_steps,num_future_forcing_steps", + [[3, 0, 0], [3, 1, 0], [3, 2, 0], [3, 3, 0]], +) +def test_time_slicing_analysis( + ar_steps, num_past_forcing_steps, num_future_forcing_steps +): + # state and forcing variables have only on dimension, `time` + time_values = np.datetime64("2020-01-01") + np.arange( + len(ANALYSIS_STATE_VALUES) + ) + assert len(ANALYSIS_STATE_VALUES) == len(FORCING_VALUES) == len(time_values) + + datastore = SinglePointDummyDatastore( + state_data=ANALYSIS_STATE_VALUES, + forcing_data=FORCING_VALUES, + time_values=time_values, + is_forecast=False, + ) + + dataset = WeatherDataset( + datastore=datastore, + ar_steps=ar_steps, + num_future_forcing_steps=num_future_forcing_steps, + num_past_forcing_steps=num_past_forcing_steps, + standardize=False, + ) + + sample = dataset[0] + + init_states, target_states, forcing, _ = [ + tensor.numpy() for tensor in sample + ] + + expected_init_states = [0, 1] + if ar_steps == 3: + expected_target_states = [2, 3, 4] + else: + raise NotImplementedError() + + if num_past_forcing_steps == num_future_forcing_steps == 0: + expected_forcing_values = [[12], [13], [14]] + elif num_past_forcing_steps == 1 and num_future_forcing_steps == 0: + expected_forcing_values = [[11, 12], [12, 13], [13, 14]] + elif num_past_forcing_steps == 2 and num_future_forcing_steps == 0: + expected_forcing_values = [[10, 11, 12], [11, 12, 13], [12, 13, 14]] + elif num_past_forcing_steps == 3 and num_future_forcing_steps == 0: + expected_init_states = [1, 2] + expected_target_states = [3, 4, 5] + expected_forcing_values = [ + [10, 11, 12, 13], + [11, 12, 13, 14], + [12, 13, 14, 15], + ] + else: + raise NotImplementedError() + + # init_states: (2, N_grid, d_features) + # target_states: (ar_steps, N_grid, d_features) + # forcing: (ar_steps, N_grid, d_windowed_forcing) + # target_times: (ar_steps,) + assert init_states.shape == (2, 1, 1) + assert init_states[:, 0, 0].tolist() == expected_init_states + + assert target_states.shape == (3, 1, 1) + assert target_states[:, 0, 0].tolist() == expected_target_states + + assert forcing.shape == ( + 3, + 1, + 1 + num_past_forcing_steps + num_future_forcing_steps, + ) + np.testing.assert_equal(forcing[:, 0, :], np.array(expected_forcing_values)) diff --git a/tests/test_training.py b/tests/test_training.py new file mode 100644 index 00000000..1ed1847d --- /dev/null +++ b/tests/test_training.py @@ -0,0 +1,103 @@ +# Standard library +from pathlib import Path + +# Third-party +import pytest +import pytorch_lightning as pl +import torch +import wandb + +# First-party +from neural_lam import config as nlconfig +from neural_lam.create_graph import create_graph_from_datastore +from neural_lam.datastore import DATASTORES +from neural_lam.datastore.base import BaseRegularGridDatastore +from neural_lam.models.graph_lam import GraphLAM +from neural_lam.weather_dataset import WeatherDataModule +from tests.conftest import init_datastore_example + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +def test_training(datastore_name): + datastore = init_datastore_example(datastore_name) + + if not isinstance(datastore, BaseRegularGridDatastore): + pytest.skip( + f"Skipping test for {datastore_name} as it is not a regular " + "grid datastore." + ) + + if torch.cuda.is_available(): + device_name = "cuda" + torch.set_float32_matmul_precision( + "high" + ) # Allows using Tensor Cores on A100s + else: + device_name = "cpu" + + trainer = pl.Trainer( + max_epochs=1, + deterministic=True, + accelerator=device_name, + # XXX: `devices` has to be set to 2 otherwise + # neural_lam.models.ar_model.ARModel.aggregate_and_plot_metrics fails + # because it expects to aggregate over multiple devices + devices=2, + log_every_n_steps=1, + ) + + graph_name = "1level" + + graph_dir_path = Path(datastore.root_path) / "graph" / graph_name + + if not graph_dir_path.exists(): + create_graph_from_datastore( + datastore=datastore, + output_root_path=str(graph_dir_path), + n_max_levels=1, + ) + + data_module = WeatherDataModule( + datastore=datastore, + ar_steps_train=3, + ar_steps_eval=5, + standardize=True, + batch_size=2, + num_workers=1, + num_past_forcing_steps=1, + num_future_forcing_steps=1, + ) + + class ModelArgs: + output_std = False + loss = "mse" + restore_opt = False + n_example_pred = 1 + # XXX: this should be superfluous when we have already defined the + # model object no? + graph = graph_name + hidden_dim = 4 + hidden_layers = 1 + processor_layers = 2 + mesh_aggr = "sum" + lr = 1.0e-3 + val_steps_to_log = [1, 3] + metrics_watch = [] + num_past_forcing_steps = 1 + num_future_forcing_steps = 1 + + model_args = ModelArgs() + + config = nlconfig.NeuralLAMConfig( + datastore=nlconfig.DatastoreSelection( + kind=datastore.SHORT_NAME, config_path=datastore.root_path + ) + ) + + model = GraphLAM( # noqa + args=model_args, + datastore=datastore, + config=config, + ) + wandb.init() + trainer.fit(model=model, datamodule=data_module) From 5df1bff46f22f818a01389f2d8bf5148d822bde9 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 18 Nov 2024 19:46:37 +0100 Subject: [PATCH 023/190] add datastore_boundary to neural_lam --- neural_lam/train_model.py | 22 ++++++++++++++++++++++ neural_lam/weather_dataset.py | 10 ++++++++++ 2 files changed, 32 insertions(+) diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index 74146c89..37bf6db7 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -34,6 +34,11 @@ def main(input_args=None): type=str, help="Path to the configuration for neural-lam", ) + parser.add_argument( + "--config_path_boundary", + type=str, + help="Path to the configuration for boundary conditions", + ) parser.add_argument( "--model", type=str, @@ -212,6 +217,9 @@ def main(input_args=None): assert ( args.config_path is not None ), "Specify your config with --config_path" + assert ( + args.config_path_boundary is not None + ), "Specify your config with --config_path_boundary" assert args.model in MODELS, f"Unknown model: {args.model}" assert args.eval in ( None, @@ -227,10 +235,24 @@ def main(input_args=None): # Load neural-lam configuration and datastore to use config, datastore = load_config_and_datastore(config_path=args.config_path) + config_boundary, datastore_boundary = load_config_and_datastore( + config_path=args.config_path_boundary + ) + + # TODO this should not be required, make more flexible + assert ( + datastore.num_past_forcing_steps + == datastore_boundary.num_past_forcing_steps + ), "Mismatch in num_past_forcing_steps" + assert ( + datastore.num_future_forcing_steps + == datastore_boundary.num_future_forcing_steps + ), "Mismatch in num_future_forcing_steps" # Create datamodule data_module = WeatherDataModule( datastore=datastore, + datastore_boundary=datastore_boundary, ar_steps_train=args.ar_steps_train, ar_steps_eval=args.ar_steps_eval, standardize=True, diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 532e3c90..51256e41 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -22,6 +22,8 @@ class WeatherDataset(torch.utils.data.Dataset): ---------- datastore : BaseDatastore The datastore to load the data from (e.g. mdp). + datastore_boundary : BaseDatastore + The boundary datastore to load the data from (e.g. mdp). split : str, optional The data split to use ("train", "val" or "test"). Default is "train". ar_steps : int, optional @@ -43,6 +45,7 @@ class WeatherDataset(torch.utils.data.Dataset): def __init__( self, datastore: BaseDatastore, + datastore_boundary: BaseDatastore, split="train", ar_steps=3, num_past_forcing_steps=1, @@ -54,6 +57,7 @@ def __init__( self.split = split self.ar_steps = ar_steps self.datastore = datastore + self.datastore_boundary = datastore_boundary self.num_past_forcing_steps = num_past_forcing_steps self.num_future_forcing_steps = num_future_forcing_steps @@ -605,6 +609,7 @@ class WeatherDataModule(pl.LightningDataModule): def __init__( self, datastore: BaseDatastore, + datastore_boundary: BaseDatastore, ar_steps_train=3, ar_steps_eval=25, standardize=True, @@ -615,6 +620,7 @@ def __init__( ): super().__init__() self._datastore = datastore + self._datastore_boundary = datastore_boundary self.num_past_forcing_steps = num_past_forcing_steps self.num_future_forcing_steps = num_future_forcing_steps self.ar_steps_train = ar_steps_train @@ -626,6 +632,7 @@ def __init__( self.val_dataset = None self.test_dataset = None if num_workers > 0: + # BUG: There also seem to be issues with "spawn", to be investigated # default to spawn for now, as the default on linux "fork" hangs # when using dask (which the npyfilesmeps datastore uses) self.multiprocessing_context = "spawn" @@ -636,6 +643,7 @@ def setup(self, stage=None): if stage == "fit" or stage is None: self.train_dataset = WeatherDataset( datastore=self._datastore, + datastore_boundary=self._datastore_boundary, split="train", ar_steps=self.ar_steps_train, standardize=self.standardize, @@ -644,6 +652,7 @@ def setup(self, stage=None): ) self.val_dataset = WeatherDataset( datastore=self._datastore, + datastore_boundary=self._datastore_boundary, split="val", ar_steps=self.ar_steps_eval, standardize=self.standardize, @@ -654,6 +663,7 @@ def setup(self, stage=None): if stage == "test" or stage is None: self.test_dataset = WeatherDataset( datastore=self._datastore, + datastore_boundary=self._datastore_boundary, split="test", ar_steps=self.ar_steps_eval, standardize=self.standardize, From 46590efc277cb809d788ce5af44133f8b95eb279 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 18 Nov 2024 20:15:41 +0100 Subject: [PATCH 024/190] complete integration of boundary in weatherDataset --- neural_lam/weather_dataset.py | 55 ++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 51256e41..10b74086 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -67,6 +67,9 @@ def __init__( self.da_forcing = self.datastore.get_dataarray( category="forcing", split=self.split ) + self.da_boundary = self.datastore_boundary.get_dataarray( + category="boundary", split=self.split + ) # check that with the provided data-arrays and ar_steps that we have a # non-zero amount of samples @@ -118,6 +121,15 @@ def __init__( self.da_forcing_mean = self.ds_forcing_stats.forcing_mean self.da_forcing_std = self.ds_forcing_stats.forcing_std + if self.da_boundary is not None: + self.ds_boundary_stats = ( + self.datastore_boundary.get_standardization_dataarray( + category="boundary" + ) + ) + self.da_boundary_mean = self.ds_boundary_stats.boundary_mean + self.da_boundary_std = self.ds_boundary_stats.boundary_std + def __len__(self): if self.datastore.is_forecast: # for now we simply create a single sample for each analysis time @@ -352,6 +364,8 @@ def _build_item_dataarrays(self, idx): The dataarray for the target states. da_forcing_windowed : xr.DataArray The dataarray for the forcing data, windowed for the sample. + da_boundary_windowed : xr.DataArray + The dataarray for the boundary data, windowed for the sample. da_target_times : xr.DataArray The dataarray for the target times. """ @@ -381,6 +395,11 @@ def _build_item_dataarrays(self, idx): else: da_forcing = None + if self.da_boundary is not None: + da_boundary = self.da_boundary + else: + da_boundary = None + # handle time sampling in a way that is compatible with both analysis # and forecast data da_state = self._slice_state_time( @@ -390,11 +409,17 @@ def _build_item_dataarrays(self, idx): da_forcing_windowed = self._slice_forcing_time( da_forcing=da_forcing, idx=idx, n_steps=self.ar_steps ) + if da_boundary is not None: + da_boundary_windowed = self._slice_forcing_time( + da_forcing=da_boundary, idx=idx, n_steps=self.ar_steps + ) # load the data into memory da_state.load() if da_forcing is not None: da_forcing_windowed.load() + if da_boundary is not None: + da_boundary_windowed.load() da_init_states = da_state.isel(time=slice(0, 2)) da_target_states = da_state.isel(time=slice(2, None)) @@ -417,6 +442,11 @@ def _build_item_dataarrays(self, idx): da_forcing_windowed - self.da_forcing_mean ) / self.da_forcing_std + if da_boundary is not None: + da_boundary_windowed = ( + da_boundary_windowed - self.da_boundary_mean + ) / self.da_boundary_std + if da_forcing is not None: # stack the `forcing_feature` and `window_sample` dimensions into a # single `forcing_feature` dimension @@ -436,11 +466,31 @@ def _build_item_dataarrays(self, idx): "forcing_feature": [], }, ) + if da_boundary is not None: + # stack the `forcing_feature` and `window_sample` dimensions into a + # single `forcing_feature` dimension + da_boundary_windowed = da_boundary_windowed.stack( + boundary_feature_windowed=("boundary_feature", "window") + ) + else: + # create an empty forcing tensor with the right shape + da_boundary_windowed = xr.DataArray( + data=np.empty( + (self.ar_steps, da_state.grid_index.size, 0), + ), + dims=("time", "grid_index", "boundary_feature"), + coords={ + "time": da_target_times, + "grid_index": da_state.grid_index, + "boundary_feature": [], + }, + ) return ( da_init_states, da_target_states, da_forcing_windowed, + da_boundary_windowed, da_target_times, ) @@ -475,6 +525,7 @@ def __getitem__(self, idx): da_init_states, da_target_states, da_forcing_windowed, + da_boundary_windowed, da_target_times, ) = self._build_item_dataarrays(idx=idx) @@ -491,13 +542,15 @@ def __getitem__(self, idx): ) forcing = torch.tensor(da_forcing_windowed.values, dtype=tensor_dtype) + boundary = torch.tensor(da_boundary_windowed.values, dtype=tensor_dtype) # init_states: (2, N_grid, d_features) # target_states: (ar_steps, N_grid, d_features) # forcing: (ar_steps, N_grid, d_windowed_forcing) + # boundary: (ar_steps, N_grid, d_windowed_boundary) # target_times: (ar_steps,) - return init_states, target_states, forcing, target_times + return init_states, target_states, forcing, boundary, target_times def __iter__(self): """ From b990f4941bd7167160a2f265b1e9fe17026ed31e Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 18 Nov 2024 20:15:55 +0100 Subject: [PATCH 025/190] Add test to check timestep length and spacing --- neural_lam/weather_dataset.py | 76 +++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 10b74086..97d9f9c3 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -101,6 +101,82 @@ def __init__( "the data in `BaseDatastore.get_dataarray`?" ) + # Check time coverage for forcing and boundary data + if self.da_forcing is not None or self.da_boundary is not None: + state_times = self.da_state.time + state_time_min = state_times.min().values + state_time_max = state_times.max().values + + def get_time_step(times): + """Calculate the time step from the data""" + time_diffs = np.diff(times) + if not np.all(time_diffs == time_diffs[0]): + raise ValueError( + "Inconsistent time steps in data. " + f"Found different time steps: {np.unique(time_diffs)}" + ) + return time_diffs[0] + + if self.da_forcing is not None: + forcing_times = self.da_forcing.time + forcing_time_step = get_time_step(forcing_times.values) + forcing_time_min = forcing_times.min().values + forcing_time_max = forcing_times.max().values + + # Calculate required bounds for forcing using its time step + forcing_required_time_min = ( + state_time_min + - self.num_past_forcing_steps * forcing_time_step + ) + forcing_required_time_max = ( + state_time_max + + self.num_future_forcing_steps * forcing_time_step + ) + + if forcing_time_min > forcing_required_time_min: + raise ValueError( + f"Forcing data starts too late." + f"Required start: {forcing_required_time_min}, " + f"but forcing starts at {forcing_time_min}." + ) + + if forcing_time_max < forcing_required_time_max: + raise ValueError( + f"Forcing data ends too early." + f"Required end: {forcing_required_time_max}," + f"but forcing ends at {forcing_time_max}." + ) + + if self.da_boundary is not None: + boundary_times = self.da_boundary.time + boundary_time_step = get_time_step(boundary_times.values) + boundary_time_min = boundary_times.min().values + boundary_time_max = boundary_times.max().values + + # Calculate required bounds for boundary using its time step + boundary_required_time_min = ( + state_time_min + - self.num_past_forcing_steps * boundary_time_step + ) + boundary_required_time_max = ( + state_time_max + + self.num_future_forcing_steps * boundary_time_step + ) + + if boundary_time_min > boundary_required_time_min: + raise ValueError( + f"Boundary data starts too late." + f"Required start: {boundary_required_time_min}, " + f"but boundary starts at {boundary_time_min}." + ) + + if boundary_time_max < boundary_required_time_max: + raise ValueError( + f"Boundary data ends too early." + f"Required end: {boundary_required_time_max}, " + f"but boundary ends at {boundary_time_max}." + ) + # Set up for standardization # TODO: This will become part of ar_model.py soon! self.standardize = standardize From 3fd1d6be82d0174b106922a7ff9c74255bac5a35 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 18 Nov 2024 21:43:57 +0100 Subject: [PATCH 026/190] setting default mdp boundary to 0 gridcells --- neural_lam/datastore/mdp.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 10593a82..8c67fe58 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -26,7 +26,7 @@ class MDPDatastore(BaseRegularGridDatastore): SHORT_NAME = "mdp" - def __init__(self, config_path, n_boundary_points=30, reuse_existing=True): + def __init__(self, config_path, n_boundary_points=0, reuse_existing=True): """ Construct a new MDPDatastore from the configuration file at `config_path`. A boundary mask is created with `n_boundary_points` @@ -335,19 +335,22 @@ def boundary_mask(self) -> xr.DataArray: boundary point and 0 is not. """ - ds_unstacked = self.unstack_grid_coords(da_or_ds=self._ds) - da_state_variable = ( - ds_unstacked["state"].isel(time=0).isel(state_feature=0) - ) - da_domain_allzero = xr.zeros_like(da_state_variable) - ds_unstacked["boundary_mask"] = da_domain_allzero.isel( - x=slice(self._n_boundary_points, -self._n_boundary_points), - y=slice(self._n_boundary_points, -self._n_boundary_points), - ) - ds_unstacked["boundary_mask"] = ds_unstacked.boundary_mask.fillna( - 1 - ).astype(int) - return self.stack_grid_coords(da_or_ds=ds_unstacked.boundary_mask) + if self._n_boundary_points > 0: + ds_unstacked = self.unstack_grid_coords(da_or_ds=self._ds) + da_state_variable = ( + ds_unstacked["state"].isel(time=0).isel(state_feature=0) + ) + da_domain_allzero = xr.zeros_like(da_state_variable) + ds_unstacked["boundary_mask"] = da_domain_allzero.isel( + x=slice(self._n_boundary_points, -self._n_boundary_points), + y=slice(self._n_boundary_points, -self._n_boundary_points), + ) + ds_unstacked["boundary_mask"] = ds_unstacked.boundary_mask.fillna( + 1 + ).astype(int) + return self.stack_grid_coords(da_or_ds=ds_unstacked.boundary_mask) + else: + return None @property def coords_projection(self) -> ccrs.Projection: From 1f2499c3b3fb8493b89d2be97ff301181c756f72 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 18 Nov 2024 21:44:54 +0100 Subject: [PATCH 027/190] implement time-based slicing combine two slicing fcts into one --- neural_lam/weather_dataset.py | 300 ++++++++++++++++++---------------- 1 file changed, 161 insertions(+), 139 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 97d9f9c3..5d35a4b7 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -67,8 +67,9 @@ def __init__( self.da_forcing = self.datastore.get_dataarray( category="forcing", split=self.split ) + # XXX For now boundary data is always considered forcing data self.da_boundary = self.datastore_boundary.get_dataarray( - category="boundary", split=self.split + category="forcing", split=self.split ) # check that with the provided data-arrays and ar_steps that we have a @@ -200,7 +201,7 @@ def get_time_step(times): if self.da_boundary is not None: self.ds_boundary_stats = ( self.datastore_boundary.get_standardization_dataarray( - category="boundary" + category="forcing" ) ) self.da_boundary_mean = self.ds_boundary_stats.boundary_mean @@ -252,175 +253,156 @@ def __len__(self): - self.num_future_forcing_steps ) - def _slice_state_time(self, da_state, idx, n_steps: int): + def _slice_time(self, da_state, da_forcing, idx, n_steps: int): """ - Produce a time slice of the given dataarray `da_state` (state) starting - at `idx` and with `n_steps` steps. An `offset`is calculated based on the - `num_past_forcing_steps` class attribute. `Offset` is used to offset the - start of the sample, to assert that enough previous time steps are - available for the 2 initial states and any corresponding forcings - (calculated in `_slice_forcing_time`). + Produce time slices of the given dataarrays `da_state` (state) and + `da_forcing` (forcing). For the state data, slicing is done as before + based on `idx`. For the forcing data, nearest neighbor matching is + performed based on the state times. Additionally, the time difference + between the matched forcing times and state times (in multiples of state + time steps) is added to the forcing dataarray. Parameters ---------- da_state : xr.DataArray - The dataarray to slice. This is expected to have a `time` dimension - if the datastore is providing analysis only data, and a - `analysis_time` and `elapsed_forecast_duration` dimensions if the - datastore is providing forecast data. + The state dataarray to slice. + da_forcing : xr.DataArray + The forcing dataarray to slice. idx : int - The index of the time step to start the sample from. + The index of the time step to start the sample from in the state + data. n_steps : int The number of time steps to include in the sample. Returns ------- - da_sliced : xr.DataArray - The sliced dataarray with dims ('time', 'grid_index', + da_state_sliced : xr.DataArray + The sliced state dataarray with dims ('time', 'grid_index', 'state_feature'). + da_forcing_matched : xr.DataArray + The forcing dataarray matched to state times with an added + coordinate 'time_diff', representing the time difference to state + times in multiples of state time steps. """ - # The current implementation requires at least 2 time steps for the - # initial state (see GraphCast). + # Number of initial steps required (e.g., for initializing models) init_steps = 2 - # slice the dataarray to include the required number of time steps + + # Slice the state data as before if self.datastore.is_forecast: + # Calculate start and end indices for slicing start_idx = max(0, self.num_past_forcing_steps - init_steps) end_idx = max(init_steps, self.num_past_forcing_steps) + n_steps - # this implies that the data will have both `analysis_time` and - # `elapsed_forecast_duration` dimensions for forecasts. We for now - # simply select a analysis time and the first `n_steps` forecast - # times (given no offset). Note that this means that we get one - # sample per forecast, always starting at forecast time 2. - da_sliced = da_state.isel( + + # Slice the state data over the elapsed forecast duration + da_state_sliced = da_state.isel( analysis_time=idx, elapsed_forecast_duration=slice(start_idx, end_idx), ) - # create a new time dimension so that the produced sample has a - # `time` dimension, similarly to the analysis only data - da_sliced["time"] = ( - da_sliced.analysis_time + da_sliced.elapsed_forecast_duration + + # Create a new 'time' dimension + da_state_sliced["time"] = ( + da_state_sliced.analysis_time + + da_state_sliced.elapsed_forecast_duration ) - da_sliced = da_sliced.swap_dims( + da_state_sliced = da_state_sliced.swap_dims( {"elapsed_forecast_duration": "time"} ) + else: - # For analysis data we slice the time dimension directly. The offset - # is only relevant for the very first (and last) samples in the - # dataset. + # For analysis data, slice the time dimension directly start_idx = idx + max(0, self.num_past_forcing_steps - init_steps) end_idx = ( idx + max(init_steps, self.num_past_forcing_steps) + n_steps ) - da_sliced = da_state.isel(time=slice(start_idx, end_idx)) - return da_sliced + da_state_sliced = da_state.isel(time=slice(start_idx, end_idx)) - def _slice_forcing_time(self, da_forcing, idx, n_steps: int): - """ - Produce a time slice of the given dataarray `da_forcing` (forcing) - starting at `idx` and with `n_steps` steps. An `offset` is calculated - based on the `num_past_forcing_steps` class attribute. It is used to - offset the start of the sample, to ensure that enough previous time - steps are available for the forcing data. The forcing data is windowed - around the current autoregressive time step to include the past and - future forcings. - - Parameters - ---------- - da_forcing : xr.DataArray - The forcing dataarray to slice. This is expected to have a `time` - dimension if the datastore is providing analysis only data, and a - `analysis_time` and `elapsed_forecast_duration` dimensions if the - datastore is providing forecast data. - idx : int - The index of the time step to start the sample from. - n_steps : int - The number of time steps to include in the sample. - - Returns - ------- - da_concat : xr.DataArray - The sliced dataarray with dims ('time', 'grid_index', - 'window', 'forcing_feature'). - """ - # The current implementation requires at least 2 time steps for the - # initial state (see GraphCast). The forcing data is windowed around the - # current autregressive time step. The two `init_steps` can also be used - # as past forcings. - init_steps = 2 - da_list = [] + # Get the state times for matching + state_times = da_state_sliced["time"] + # Match forcing data to state times based on nearest neighbor if self.datastore.is_forecast: - # This implies that the data will have both `analysis_time` and - # `elapsed_forecast_duration` dimensions for forecasts. We for now - # simply select an analysis time and the first `n_steps` forecast - # times (given no offset). Note that this means that we get one - # sample per forecast. - # Add a 'time' dimension using the actual forecast times - offset = max(init_steps, self.num_past_forcing_steps) - for step in range(n_steps): - start_idx = offset + step - self.num_past_forcing_steps - end_idx = offset + step + self.num_future_forcing_steps - - current_time = ( - da_forcing.analysis_time[idx] - + da_forcing.elapsed_forecast_duration[offset + step] - ) - - da_sliced = da_forcing.isel( - analysis_time=idx, - elapsed_forecast_duration=slice(start_idx, end_idx + 1), - ) - - da_sliced = da_sliced.rename( - {"elapsed_forecast_duration": "window"} - ) + # Calculate all possible forcing times + forcing_times = ( + da_forcing.analysis_time + da_forcing.elapsed_forecast_duration + ) + forcing_times_flat = forcing_times.stack( + forecast_time=("analysis_time", "elapsed_forecast_duration") + ) - # Assign the 'window' coordinate to be relative positions - da_sliced = da_sliced.assign_coords( - window=np.arange(len(da_sliced.window)) - ) + # Compute time differences + time_deltas = ( + forcing_times_flat.values[:, np.newaxis] + - state_times.values[np.newaxis, :] + ) + time_diffs = np.abs(time_deltas) + idx_min = time_diffs.argmin(axis=0) + + # Retrieve corresponding indices for analysis_time and + # elapsed_forecast_duration + forecast_time_index = forcing_times_flat["forecast_time"][idx_min] + analysis_time_indices = forecast_time_index["analysis_time"] + elapsed_forecast_duration_indices = forecast_time_index[ + "elapsed_forecast_duration" + ] + + # Slice the forcing data using matched indices + da_forcing_matched = da_forcing.isel( + analysis_time=("time", analysis_time_indices), + elapsed_forecast_duration=( + "time", + elapsed_forecast_duration_indices, + ), + ) - da_sliced = da_sliced.expand_dims( - dim={"time": [current_time.values]} - ) + # Assign matched state times to the forcing data + da_forcing_matched["time"] = state_times + da_forcing_matched = da_forcing_matched.swap_dims( + {"elapsed_forecast_duration": "time"} + ) - da_list.append(da_sliced) + # Calculate time differences in multiples of state time steps + state_time_step = state_times.values[1] - state_times.values[0] + time_diff_steps = ( + time_deltas[idx_min, np.arange(len(state_times))] + / state_time_step + ) - # Concatenate the list of DataArrays along the 'time' dimension - da_concat = xr.concat(da_list, dim="time") + # Add time difference as a new coordinate + da_forcing_matched = da_forcing_matched.assign_coords( + time_diff=("time", time_diff_steps) + ) else: - # For analysis data, we slice the time dimension directly. The - # offset is only relevant for the very first (and last) samples in - # the dataset. - offset = idx + max(init_steps, self.num_past_forcing_steps) - for step in range(n_steps): - start_idx = offset + step - self.num_past_forcing_steps - end_idx = offset + step + self.num_future_forcing_steps - - # Slice the data over the desired time window - da_sliced = da_forcing.isel(time=slice(start_idx, end_idx + 1)) - - da_sliced = da_sliced.rename({"time": "window"}) - - # Assign the 'window' coordinate to be relative positions - da_sliced = da_sliced.assign_coords( - window=np.arange(len(da_sliced.window)) - ) + # For analysis data, match directly using the 'time' coordinate + forcing_times = da_forcing["time"] - # Add a 'time' dimension to keep track of steps using actual - # time coordinates - current_time = da_forcing.time[offset + step] - da_sliced = da_sliced.expand_dims( - dim={"time": [current_time.values]} - ) + # Compute time differences + time_deltas = ( + forcing_times.values[:, np.newaxis] + - state_times.values[np.newaxis, :] + ) + time_diffs = np.abs(time_deltas) + idx_min = time_diffs.argmin(axis=0) - da_list.append(da_sliced) + # Slice the forcing data using matched indices + da_forcing_matched = da_forcing.isel(time=idx_min) + da_forcing_matched = da_forcing_matched.assign_coords( + time=state_times + ) - # Concatenate the list of DataArrays along the 'time' dimension - da_concat = xr.concat(da_list, dim="time") + # Calculate time differences in multiples of state time steps + state_time_step = state_times.values[1] - state_times.values[0] + time_diff_steps = ( + time_deltas[idx_min, np.arange(len(state_times))] + / state_time_step + ) - return da_concat + # Add time difference as a new coordinate + da_forcing_matched = da_forcing_matched.assign_coords( + time_diff=("time", time_diff_steps) + ) + + return da_state_sliced, da_forcing_matched def _build_item_dataarrays(self, idx): """ @@ -442,6 +424,7 @@ def _build_item_dataarrays(self, idx): The dataarray for the forcing data, windowed for the sample. da_boundary_windowed : xr.DataArray The dataarray for the boundary data, windowed for the sample. + Boundary data is always considered forcing data. da_target_times : xr.DataArray The dataarray for the target times. """ @@ -478,15 +461,15 @@ def _build_item_dataarrays(self, idx): # handle time sampling in a way that is compatible with both analysis # and forecast data - da_state = self._slice_state_time( + da_state = self._slice_time( da_state=da_state, idx=idx, n_steps=self.ar_steps ) if da_forcing is not None: - da_forcing_windowed = self._slice_forcing_time( + da_forcing_windowed = self._slice_time( da_forcing=da_forcing, idx=idx, n_steps=self.ar_steps ) if da_boundary is not None: - da_boundary_windowed = self._slice_forcing_time( + da_boundary_windowed = self._slice_time( da_forcing=da_boundary, idx=idx, n_steps=self.ar_steps ) @@ -524,13 +507,32 @@ def _build_item_dataarrays(self, idx): ) / self.da_boundary_std if da_forcing is not None: - # stack the `forcing_feature` and `window_sample` dimensions into a - # single `forcing_feature` dimension + # Expand 'time_diff' to align with 'forcing_feature' and 'window' + # dimensions 'time_diff' has dimension ('time'), expand to ('time', + # 'forcing_feature', 'window') + time_diff_expanded = da_forcing_windowed["time_diff"].expand_dims( + forcing_feature=da_forcing_windowed["forcing_feature"], + window=da_forcing_windowed["window"], + ) + + # Stack 'forcing_feature' and 'window' into a single + # 'forcing_feature_windowed' dimension da_forcing_windowed = da_forcing_windowed.stack( forcing_feature_windowed=("forcing_feature", "window") ) + time_diff_expanded = time_diff_expanded.stack( + forcing_feature_windowed=("forcing_feature", "window") + ) + + # Assign 'time_diff' as a coordinate to 'forcing_feature_windowed' + da_forcing_windowed = da_forcing_windowed.assign_coords( + time_diff=( + "forcing_feature_windowed", + time_diff_expanded.values, + ) + ) else: - # create an empty forcing tensor with the right shape + # Create an empty forcing tensor with the right shape da_forcing_windowed = xr.DataArray( data=np.empty( (self.ar_steps, da_state.grid_index.size, 0), @@ -542,14 +544,34 @@ def _build_item_dataarrays(self, idx): "forcing_feature": [], }, ) + if da_boundary is not None: - # stack the `forcing_feature` and `window_sample` dimensions into a - # single `forcing_feature` dimension + # If 'da_boundary_windowed' also has 'time_diff', process similarly + # Expand 'time_diff' to align with 'boundary_feature' and 'window' + # dimensions + time_diff_expanded = da_boundary_windowed["time_diff"].expand_dims( + boundary_feature=da_boundary_windowed["boundary_feature"], + window=da_boundary_windowed["window"], + ) + + # Stack 'boundary_feature' and 'window' into a single + # 'boundary_feature_windowed' dimension da_boundary_windowed = da_boundary_windowed.stack( boundary_feature_windowed=("boundary_feature", "window") ) + time_diff_expanded = time_diff_expanded.stack( + boundary_feature_windowed=("boundary_feature", "window") + ) + + # Assign 'time_diff' as a coordinate to 'boundary_feature_windowed' + da_boundary_windowed = da_boundary_windowed.assign_coords( + time_diff=( + "boundary_feature_windowed", + time_diff_expanded.values, + ) + ) else: - # create an empty forcing tensor with the right shape + # Create an empty boundary tensor with the right shape da_boundary_windowed = xr.DataArray( data=np.empty( (self.ar_steps, da_state.grid_index.size, 0), From 1af1481e6884f89ccf39befa37e0d61ed16bbcc3 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 19 Nov 2024 06:26:54 +0100 Subject: [PATCH 028/190] remove all interior_mask and boundary_mask --- neural_lam/datastore/base.py | 17 ------- neural_lam/datastore/mdp.py | 34 -------------- neural_lam/datastore/npyfilesmeps/store.py | 28 ------------ neural_lam/models/ar_model.py | 53 ++++------------------ neural_lam/vis.py | 16 ------- tests/dummy_datastore.py | 22 --------- tests/test_datastores.py | 21 --------- 7 files changed, 10 insertions(+), 181 deletions(-) diff --git a/neural_lam/datastore/base.py b/neural_lam/datastore/base.py index 0317c2e5..5aeedb2e 100644 --- a/neural_lam/datastore/base.py +++ b/neural_lam/datastore/base.py @@ -228,23 +228,6 @@ def get_dataarray( """ pass - @cached_property - @abc.abstractmethod - def boundary_mask(self) -> xr.DataArray: - """ - Return the boundary mask for the dataset, with spatial dimensions - stacked. Where the value is 1, the grid point is a boundary point, and - where the value is 0, the grid point is not a boundary point. - - Returns - ------- - xr.DataArray - The boundary mask for the dataset, with dimensions - `('grid_index',)`. - - """ - pass - @abc.abstractmethod def get_xy(self, category: str) -> np.ndarray: """ diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 8c67fe58..5365c723 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -318,40 +318,6 @@ def get_standardization_dataarray(self, category: str) -> xr.Dataset: ds_stats = self._ds[stats_variables.keys()].rename(stats_variables) return ds_stats - @cached_property - def boundary_mask(self) -> xr.DataArray: - """ - Produce a 0/1 mask for the boundary points of the dataset, these will - sit at the edges of the domain (in x/y extent) and will be used to mask - out the boundary points from the loss function and to overwrite the - boundary points from the prediction. For now this is created when the - mask is requested, but in the future this could be saved to the zarr - file. - - Returns - ------- - xr.DataArray - A 0/1 mask for the boundary points of the dataset, where 1 is a - boundary point and 0 is not. - - """ - if self._n_boundary_points > 0: - ds_unstacked = self.unstack_grid_coords(da_or_ds=self._ds) - da_state_variable = ( - ds_unstacked["state"].isel(time=0).isel(state_feature=0) - ) - da_domain_allzero = xr.zeros_like(da_state_variable) - ds_unstacked["boundary_mask"] = da_domain_allzero.isel( - x=slice(self._n_boundary_points, -self._n_boundary_points), - y=slice(self._n_boundary_points, -self._n_boundary_points), - ) - ds_unstacked["boundary_mask"] = ds_unstacked.boundary_mask.fillna( - 1 - ).astype(int) - return self.stack_grid_coords(da_or_ds=ds_unstacked.boundary_mask) - else: - return None - @property def coords_projection(self) -> ccrs.Projection: """ diff --git a/neural_lam/datastore/npyfilesmeps/store.py b/neural_lam/datastore/npyfilesmeps/store.py index 42e80706..146b0627 100644 --- a/neural_lam/datastore/npyfilesmeps/store.py +++ b/neural_lam/datastore/npyfilesmeps/store.py @@ -668,34 +668,6 @@ def grid_shape_state(self) -> CartesianGridShape: ny, nx = self.config.grid_shape_state return CartesianGridShape(x=nx, y=ny) - @cached_property - def boundary_mask(self) -> xr.DataArray: - """The boundary mask for the dataset. This is a binary mask that is 1 - where the grid cell is on the boundary of the domain, and 0 otherwise. - - Returns - ------- - xr.DataArray - The boundary mask for the dataset, with dimensions `[grid_index]`. - - """ - xy = self.get_xy(category="state", stacked=False) - xs = xy[:, :, 0] - ys = xy[:, :, 1] - # Check if x-coordinates are constant along columns - assert np.allclose(xs, xs[:, [0]]), "x-coordinates are not constant" - # Check if y-coordinates are constant along rows - assert np.allclose(ys, ys[[0], :]), "y-coordinates are not constant" - # Extract unique x and y coordinates - x = xs[:, 0] # Unique x-coordinates (changes along the first axis) - y = ys[0, :] # Unique y-coordinates (changes along the second axis) - values = np.load(self.root_path / "static" / "border_mask.npy") - da_mask = xr.DataArray( - values, dims=["y", "x"], coords=dict(x=x, y=y), name="boundary_mask" - ) - da_mask_stacked_xy = self.stack_grid_coords(da_mask).astype(int) - return da_mask_stacked_xy - def get_standardization_dataarray(self, category: str) -> xr.Dataset: """Return the standardization dataarray for the given category. This should contain a `{category}_mean` and `{category}_std` variable for diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index bc4c6719..4ab73cc7 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -42,7 +42,6 @@ def __init__( da_state_stats = datastore.get_standardization_dataarray( category="state" ) - da_boundary_mask = datastore.boundary_mask num_past_forcing_steps = args.num_past_forcing_steps num_future_forcing_steps = args.num_future_forcing_steps @@ -115,18 +114,6 @@ def __init__( # Instantiate loss function self.loss = metrics.get_metric(args.loss) - boundary_mask = torch.tensor( - da_boundary_mask.values, dtype=torch.float32 - ).unsqueeze( - 1 - ) # add feature dim - - self.register_buffer("boundary_mask", boundary_mask, persistent=False) - # Pre-compute interior mask for use in loss function - self.register_buffer( - "interior_mask", 1.0 - self.boundary_mask, persistent=False - ) # (num_grid_nodes, 1), 1 for non-border - self.val_metrics = { "mse": [], } @@ -153,13 +140,6 @@ def configure_optimizers(self): ) return opt - @property - def interior_mask_bool(self): - """ - Get the interior mask as a boolean (N,) mask. - """ - return self.interior_mask[:, 0].to(torch.bool) - @staticmethod def expand_to_batch(x, batch_size): """ @@ -191,7 +171,6 @@ def unroll_prediction(self, init_states, forcing_features, true_states): for i in range(pred_steps): forcing = forcing_features[:, i] - border_state = true_states[:, i] pred_state, pred_std = self.predict_step( prev_state, prev_prev_state, forcing @@ -199,19 +178,13 @@ def unroll_prediction(self, init_states, forcing_features, true_states): # state: (B, num_grid_nodes, d_f) pred_std: (B, num_grid_nodes, # d_f) or None - # Overwrite border with true state - new_state = ( - self.boundary_mask * border_state - + self.interior_mask * pred_state - ) - - prediction_list.append(new_state) + prediction_list.append(pred_state) if self.output_std: pred_std_list.append(pred_std) # Update conditioning states prev_prev_state = prev_state - prev_state = new_state + prev_state = pred_state prediction = torch.stack( prediction_list, dim=1 @@ -249,12 +222,14 @@ def training_step(self, batch): """ prediction, target, pred_std, _ = self.common_step(batch) - # Compute loss + # Compute loss - mean over unrolled times and batch batch_loss = torch.mean( self.loss( - prediction, target, pred_std, mask=self.interior_mask_bool + prediction, + target, + pred_std, ) - ) # mean over unrolled times and batch + ) log_dict = {"train_loss": batch_loss} self.log_dict( @@ -287,9 +262,7 @@ def validation_step(self, batch, batch_idx): prediction, target, pred_std, _ = self.common_step(batch) time_step_loss = torch.mean( - self.loss( - prediction, target, pred_std, mask=self.interior_mask_bool - ), + self.loss(prediction, target, pred_std), dim=0, ) # (time_steps-1) mean_loss = torch.mean(time_step_loss) @@ -314,7 +287,6 @@ def validation_step(self, batch, batch_idx): prediction, target, pred_std, - mask=self.interior_mask_bool, sum_vars=False, ) # (B, pred_steps, d_f) self.val_metrics["mse"].append(entry_mses) @@ -341,9 +313,7 @@ def test_step(self, batch, batch_idx): # pred_steps, num_grid_nodes, d_f) or (d_f,) time_step_loss = torch.mean( - self.loss( - prediction, target, pred_std, mask=self.interior_mask_bool - ), + self.loss(prediction, target, pred_std), dim=0, ) # (time_steps-1,) mean_loss = torch.mean(time_step_loss) @@ -372,16 +342,13 @@ def test_step(self, batch, batch_idx): prediction, target, pred_std, - mask=self.interior_mask_bool, sum_vars=False, ) # (B, pred_steps, d_f) self.test_metrics[metric_name].append(batch_metric_vals) if self.output_std: # Store output std. per variable, spatially averaged - mean_pred_std = torch.mean( - pred_std[..., self.interior_mask_bool, :], dim=-2 - ) # (B, pred_steps, d_f) + mean_pred_std = torch.mean(pred_std, dim=-2) # (B, pred_steps, d_f) self.test_metrics["output_std"].append(mean_pred_std) # Save per-sample spatial loss for specific times diff --git a/neural_lam/vis.py b/neural_lam/vis.py index b9d18b39..31de8f32 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -86,13 +86,6 @@ def plot_prediction( extent = datastore.get_xy_extent("state") - # Set up masking of border region - da_mask = datastore.unstack_grid_coords(datastore.boundary_mask) - mask_reshaped = da_mask.values - pixel_alpha = ( - mask_reshaped.clamp(0.7, 1).cpu().numpy() - ) # Faded border region - fig, axes = plt.subplots( 1, 2, @@ -112,7 +105,6 @@ def plot_prediction( data_grid, origin="lower", extent=extent, - alpha=pixel_alpha, vmin=vmin, vmax=vmax, cmap="plasma", @@ -147,13 +139,6 @@ def plot_spatial_error( extent = datastore.get_xy_extent("state") - # Set up masking of border region - da_mask = datastore.unstack_grid_coords(datastore.boundary_mask) - mask_reshaped = da_mask.values - pixel_alpha = ( - mask_reshaped.clamp(0.7, 1).cpu().numpy() - ) # Faded border region - fig, ax = plt.subplots( figsize=(5, 4.8), subplot_kw={"projection": datastore.coords_projection}, @@ -170,7 +155,6 @@ def plot_spatial_error( error_grid, origin="lower", extent=extent, - alpha=pixel_alpha, vmin=vmin, vmax=vmax, cmap="OrRd", diff --git a/tests/dummy_datastore.py b/tests/dummy_datastore.py index 9075d404..d62c7356 100644 --- a/tests/dummy_datastore.py +++ b/tests/dummy_datastore.py @@ -148,12 +148,6 @@ def __init__( times = [self.T0 + dt * i for i in range(n_timesteps)] self.ds.coords["time"] = times - # Add boundary mask - self.ds["boundary_mask"] = xr.DataArray( - np.random.choice([0, 1], size=(n_points_1d, n_points_1d)), - dims=["x", "y"], - ) - # Stack the spatial dimensions into grid_index self.ds = self.ds.stack(grid_index=self.CARTESIAN_COORDS) @@ -342,22 +336,6 @@ def get_dataarray( dim_order = self.expected_dim_order(category=category) return self.ds[category].transpose(*dim_order) - @cached_property - def boundary_mask(self) -> xr.DataArray: - """ - Return the boundary mask for the dataset, with spatial dimensions - stacked. Where the value is 1, the grid point is a boundary point, and - where the value is 0, the grid point is not a boundary point. - - Returns - ------- - xr.DataArray - The boundary mask for the dataset, with dimensions - `('grid_index',)`. - - """ - return self.ds["boundary_mask"] - def get_xy(self, category: str, stacked: bool) -> ndarray: """Return the x, y coordinates of the dataset. diff --git a/tests/test_datastores.py b/tests/test_datastores.py index 4a4b1100..a91f6245 100644 --- a/tests/test_datastores.py +++ b/tests/test_datastores.py @@ -18,8 +18,6 @@ dataarray for the given category. - `get_dataarray` (method): Return the processed data (as a single `xr.DataArray`) for the given category and test/train/val-split. -- `boundary_mask` (property): Return the boundary mask for the dataset, - with spatial dimensions stacked. - `config` (property): Return the configuration of the datastore. In addition BaseRegularGridDatastore must have the following methods and @@ -213,25 +211,6 @@ def test_get_dataarray(datastore_name): assert n_features["train"] == n_features["val"] == n_features["test"] -@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -def test_boundary_mask(datastore_name): - """Check that the `datastore.boundary_mask` property is implemented and - that the returned object is an xarray DataArray with the correct shape.""" - datastore = init_datastore_example(datastore_name) - da_mask = datastore.boundary_mask - - assert isinstance(da_mask, xr.DataArray) - assert set(da_mask.dims) == {"grid_index"} - assert da_mask.dtype == "int" - assert set(da_mask.values) == {0, 1} - assert da_mask.sum() > 0 - assert da_mask.sum() < da_mask.size - - if isinstance(datastore, BaseRegularGridDatastore): - grid_shape = datastore.grid_shape_state - assert datastore.boundary_mask.size == grid_shape.x * grid_shape.y - - @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) def test_get_xy_extent(datastore_name): """Check that the `datastore.get_xy_extent` method is implemented and that From d545cb7576de020b7d721c08741e784bc2b69c24 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 19 Nov 2024 16:55:56 +0100 Subject: [PATCH 029/190] added gcsfs dependency for era5 weatherbench download --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f0bc0851..5bbe4d92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,8 @@ dependencies = [ "torch-geometric==2.3.1", "parse>=1.20.2", "dataclass-wizard>=0.22.3", - "mllam-data-prep>=0.5.0", + "gcsfs>=2021.10.0", + "mllam-data-prep @ git+https://github.com/leifdenby/mllam-data-prep@temp/for-neural-lam-datastores", ] requires-python = ">=3.9" From 5c1a7d7cf9a4befb874ce847424787e818cced75 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 19 Nov 2024 16:57:57 +0100 Subject: [PATCH 030/190] added new era5 datastore config for boundary --- tests/conftest.py | 19 +++- .../mdp/era5_1000hPa_winds/.gitignore | 2 + .../mdp/era5_1000hPa_winds/config.yaml | 3 + .../era5_1000hPa_winds/era5.datastore.yaml | 90 +++++++++++++++++++ 4 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 tests/datastore_examples/mdp/era5_1000hPa_winds/.gitignore create mode 100644 tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml create mode 100644 tests/datastore_examples/mdp/era5_1000hPa_winds/era5.datastore.yaml diff --git a/tests/conftest.py b/tests/conftest.py index 6f579621..be5cf3e7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -94,6 +94,15 @@ def download_meps_example_reduced_dataset(): dummydata=None, ) +DATASTORES_BOUNDARY_EXAMPLES = dict( + mdp=( + DATASTORE_EXAMPLES_ROOT_PATH + / "mdp" + / "era5_1000hPa_winds" + / "era5.datastore.yaml" + ) +) + DATASTORES[DummyDatastore.SHORT_NAME] = DummyDatastore @@ -102,5 +111,13 @@ def init_datastore_example(datastore_kind): datastore_kind=datastore_kind, config_path=DATASTORES_EXAMPLES[datastore_kind], ) - return datastore + + +def init_datastore_boundary_example(datastore_kind): + datastore_boundary = init_datastore( + datastore_kind=datastore_kind, + config_path=DATASTORES_BOUNDARY_EXAMPLES[datastore_kind], + ) + + return datastore_boundary diff --git a/tests/datastore_examples/mdp/era5_1000hPa_winds/.gitignore b/tests/datastore_examples/mdp/era5_1000hPa_winds/.gitignore new file mode 100644 index 00000000..f2828f46 --- /dev/null +++ b/tests/datastore_examples/mdp/era5_1000hPa_winds/.gitignore @@ -0,0 +1,2 @@ +*.zarr/ +graph/ diff --git a/tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml b/tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml new file mode 100644 index 00000000..5d1e05f2 --- /dev/null +++ b/tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml @@ -0,0 +1,3 @@ +datastore: + kind: mdp + config_path: era5.datastore.yaml diff --git a/tests/datastore_examples/mdp/era5_1000hPa_winds/era5.datastore.yaml b/tests/datastore_examples/mdp/era5_1000hPa_winds/era5.datastore.yaml new file mode 100644 index 00000000..36b39501 --- /dev/null +++ b/tests/datastore_examples/mdp/era5_1000hPa_winds/era5.datastore.yaml @@ -0,0 +1,90 @@ +#TODO: What do these versions mean? Should they be updated? +schema_version: v0.2.0+dev +dataset_version: v1.0.0 + +output: + variables: + forcing: [time, grid_index, forcing_feature] + coord_ranges: + time: + start: 1990-09-02T00:00 + end: 1990-09-10T00:00 + step: PT6H + chunking: + time: 1 + splitting: + dim: time + splits: + train: + start: 1990-09-02T00:00 + end: 1990-09-07T00:00 + compute_statistics: + ops: [mean, std, diff_mean, diff_std] + dims: [grid_index, time] + val: + start: 1990-09-05T00:00 + end: 1990-09-08T00:00 + test: + start: 1990-09-06T00:00 + end: 1990-09-10T00:00 + +inputs: + era_height_levels: + path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721_with_derived_variables.zarr' + dims: [time, longitude, latitude, level] + variables: + u_component_of_wind: + level: + values: [1000,] + units: hPa + v_component_of_wind: + level: + values: [1000, ] + units: hPa + dim_mapping: + time: + method: rename + dim: time + x: + method: rename + dim: longitude + y: + method: rename + dim: latitude + forcing_feature: + method: stack_variables_by_var_name + dims: [level] + name_format: "{var_name}{level}hPa" + grid_index: + method: stack + dims: [x, y] + target_output_variable: forcing + + era5_surface: + path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721_with_derived_variables.zarr' + dims: [time, longitude, latitude, level] + variables: + - mean_surface_net_short_wave_radiation_flux + dim_mapping: + time: + method: rename + dim: time + x: + method: rename + dim: longitude + y: + method: rename + dim: latitude + forcing_feature: + method: stack_variables_by_var_name + name_format: "{var_name}" + grid_index: + method: stack + dims: [x, y] + target_output_variable: forcing + +extra: + projection: + class_name: PlateCarree + kwargs: + central_longitude: 0.0 From 30e4f05e1c9cc726180868450286d9cf8279ce07 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 19 Nov 2024 16:58:36 +0100 Subject: [PATCH 031/190] removed left-over boundary-mask references --- neural_lam/datastore/mdp.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 5365c723..fd9acb4e 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -26,11 +26,10 @@ class MDPDatastore(BaseRegularGridDatastore): SHORT_NAME = "mdp" - def __init__(self, config_path, n_boundary_points=0, reuse_existing=True): + def __init__(self, config_path, reuse_existing=True): """ Construct a new MDPDatastore from the configuration file at - `config_path`. A boundary mask is created with `n_boundary_points` - boundary points. If `reuse_existing` is True, the dataset is loaded + `config_path`. If `reuse_existing` is True, the dataset is loaded from a zarr file if it exists (unless the config has been modified since the zarr was created), otherwise it is created from the configuration file. @@ -41,8 +40,6 @@ def __init__(self, config_path, n_boundary_points=0, reuse_existing=True): The path to the configuration file, this will be fed to the `mllam_data_prep.Config.from_yaml_file` method to then call `mllam_data_prep.create_dataset` to create the dataset. - n_boundary_points : int - The number of boundary points to use in the boundary mask. reuse_existing : bool Whether to reuse an existing dataset zarr file if it exists and its creation date is newer than the configuration file. @@ -69,7 +66,6 @@ def __init__(self, config_path, n_boundary_points=0, reuse_existing=True): if self._ds is None: self._ds = mdp.create_dataset(config=self._config) self._ds.to_zarr(fp_ds) - self._n_boundary_points = n_boundary_points print("The loaded datastore contains the following features:") for category in ["state", "forcing", "static"]: From 6a8c593f422c2844545feb2cc7e57de520dc1062 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 19 Nov 2024 16:59:12 +0100 Subject: [PATCH 032/190] make check for existing category in datastore more flexible (for boundary) --- neural_lam/datastore/mdp.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index fd9acb4e..67aaa9d0 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -153,8 +153,8 @@ def get_vars_units(self, category: str) -> List[str]: The units of the variables in the given category. """ - if category not in self._ds and category == "forcing": - warnings.warn("no forcing data found in datastore") + if category not in self._ds: + warnings.warn(f"no {category} data found in datastore") return [] return self._ds[f"{category}_feature_units"].values.tolist() @@ -172,8 +172,8 @@ def get_vars_names(self, category: str) -> List[str]: The names of the variables in the given category. """ - if category not in self._ds and category == "forcing": - warnings.warn("no forcing data found in datastore") + if category not in self._ds: + warnings.warn(f"no {category} data found in datastore") return [] return self._ds[f"{category}_feature"].values.tolist() @@ -192,8 +192,8 @@ def get_vars_long_names(self, category: str) -> List[str]: The long names of the variables in the given category. """ - if category not in self._ds and category == "forcing": - warnings.warn("no forcing data found in datastore") + if category not in self._ds: + warnings.warn(f"no {category} data found in datastore") return [] return self._ds[f"{category}_feature_long_name"].values.tolist() @@ -248,9 +248,9 @@ def get_dataarray(self, category: str, split: str) -> xr.DataArray: The xarray DataArray object with processed dataset. """ - if category not in self._ds and category == "forcing": - warnings.warn("no forcing data found in datastore") - return None + if category not in self._ds: + warnings.warn(f"no {category} data found in datastore") + return [] da_category = self._ds[category] From 17c920d36848d61153fd53781d8ec3ac90e5de56 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Wed, 20 Nov 2024 16:00:15 +0100 Subject: [PATCH 033/190] implement xarray based (mostly) time slicing and windowing --- neural_lam/weather_dataset.py | 255 +++++++++++++++------------------- 1 file changed, 111 insertions(+), 144 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 5d35a4b7..c8806d1c 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -64,10 +64,16 @@ def __init__( self.da_state = self.datastore.get_dataarray( category="state", split=self.split ) + if self.da_state is None: + raise ValueError( + "A non-empty state dataarray must be provided. " + "The datastore.get_dataarray() returned None or empty array " + "for category='state'" + ) self.da_forcing = self.datastore.get_dataarray( category="forcing", split=self.split ) - # XXX For now boundary data is always considered forcing data + # XXX For now boundary data is always considered mdp-forcing data self.da_boundary = self.datastore_boundary.get_dataarray( category="forcing", split=self.split ) @@ -102,53 +108,36 @@ def __init__( "the data in `BaseDatastore.get_dataarray`?" ) + def get_time_step(times): + """Calculate the time step from the data""" + time_diffs = np.diff(times) + if not np.all(time_diffs == time_diffs[0]): + raise ValueError( + "Inconsistent time steps in data. " + f"Found different time steps: {np.unique(time_diffs)}" + ) + return time_diffs[0] + + # Check time step consistency in state data + _ = get_time_step(self.da_state.time.values) + # Check time coverage for forcing and boundary data if self.da_forcing is not None or self.da_boundary is not None: state_times = self.da_state.time state_time_min = state_times.min().values state_time_max = state_times.max().values - def get_time_step(times): - """Calculate the time step from the data""" - time_diffs = np.diff(times) - if not np.all(time_diffs == time_diffs[0]): - raise ValueError( - "Inconsistent time steps in data. " - f"Found different time steps: {np.unique(time_diffs)}" - ) - return time_diffs[0] - if self.da_forcing is not None: + # Forcing data is part of the same datastore as state data + # During creation the time dimension of the forcing data + # is matched to the state data forcing_times = self.da_forcing.time - forcing_time_step = get_time_step(forcing_times.values) - forcing_time_min = forcing_times.min().values - forcing_time_max = forcing_times.max().values - - # Calculate required bounds for forcing using its time step - forcing_required_time_min = ( - state_time_min - - self.num_past_forcing_steps * forcing_time_step - ) - forcing_required_time_max = ( - state_time_max - + self.num_future_forcing_steps * forcing_time_step - ) - - if forcing_time_min > forcing_required_time_min: - raise ValueError( - f"Forcing data starts too late." - f"Required start: {forcing_required_time_min}, " - f"but forcing starts at {forcing_time_min}." - ) - - if forcing_time_max < forcing_required_time_max: - raise ValueError( - f"Forcing data ends too early." - f"Required end: {forcing_required_time_max}," - f"but forcing ends at {forcing_time_max}." - ) + _ = get_time_step(forcing_times.values) if self.da_boundary is not None: + # Boundary data is part of a separate datastore + # The boundary data is allowed to have a different time_step + # Check that the boundary data covers the required time range boundary_times = self.da_boundary.time boundary_time_step = get_time_step(boundary_times.values) boundary_time_min = boundary_times.min().values @@ -204,8 +193,8 @@ def get_time_step(times): category="forcing" ) ) - self.da_boundary_mean = self.ds_boundary_stats.boundary_mean - self.da_boundary_std = self.ds_boundary_stats.boundary_std + self.da_boundary_mean = self.ds_boundary_stats.forcing_mean + self.da_boundary_std = self.ds_boundary_stats.forcing_std def __len__(self): if self.datastore.is_forecast: @@ -253,7 +242,7 @@ def __len__(self): - self.num_future_forcing_steps ) - def _slice_time(self, da_state, da_forcing, idx, n_steps: int): + def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): """ Produce time slices of the given dataarrays `da_state` (state) and `da_forcing` (forcing). For the state data, slicing is done as before @@ -316,8 +305,13 @@ def _slice_time(self, da_state, da_forcing, idx, n_steps: int): ) da_state_sliced = da_state.isel(time=slice(start_idx, end_idx)) + if da_forcing is None: + return da_state_sliced, None + # Get the state times for matching state_times = da_state_sliced["time"] + # Calculate time differences in multiples of state time steps + state_time_step = state_times.values[1] - state_times.values[0] # Match forcing data to state times based on nearest neighbor if self.datastore.is_forecast: @@ -371,39 +365,80 @@ def _slice_time(self, da_state, da_forcing, idx, n_steps: int): da_forcing_matched = da_forcing_matched.assign_coords( time_diff=("time", time_diff_steps) ) - else: # For analysis data, match directly using the 'time' coordinate forcing_times = da_forcing["time"] # Compute time differences time_deltas = ( - forcing_times.values[:, np.newaxis] - - state_times.values[np.newaxis, :] + state_times.values[np.newaxis, :] + - forcing_times.values[:, np.newaxis] + ) + idx_min = np.abs(time_deltas).argmin(axis=0) + + time_diff_steps = xr.DataArray( + np.stack( + [ + np.diagonal(time_deltas, offset=offset)[ + -len(state_times) + init_steps : + ] + / state_time_step + for offset in range( + -self.num_past_forcing_steps, + self.num_future_forcing_steps + 1, + ) + ], + axis=1, + ), + dims=["time", "window"], + coords={ + "time": state_times.isel(time=slice(init_steps, None)), + "window": np.arange( + -self.num_past_forcing_steps, + self.num_future_forcing_steps + 1, + ), + }, + name="time_diff_steps", ) - time_diffs = np.abs(time_deltas) - idx_min = time_diffs.argmin(axis=0) - # Slice the forcing data using matched indices - da_forcing_matched = da_forcing.isel(time=idx_min) - da_forcing_matched = da_forcing_matched.assign_coords( - time=state_times + # Create window dimension using rolling + window_size = ( + self.num_past_forcing_steps + self.num_future_forcing_steps + 1 ) - - # Calculate time differences in multiples of state time steps - state_time_step = state_times.values[1] - state_times.values[0] - time_diff_steps = ( - time_deltas[idx_min, np.arange(len(state_times))] - / state_time_step + da_forcing_windowed = da_forcing.rolling( + time=window_size, center=True + ).construct(window_dim="window") + da_forcing_matched = da_forcing_windowed.isel( + time=idx_min[init_steps:] ) # Add time difference as a new coordinate da_forcing_matched = da_forcing_matched.assign_coords( - time_diff=("time", time_diff_steps) + time_diff=time_diff_steps ) return da_state_sliced, da_forcing_matched + def _process_windowed_data(self, da_windowed, da_state, da_target_times): + """Helper function to process windowed data after standardization.""" + stacked_dim = "forcing_feature_windowed" + if da_windowed is not None: + # Stack the 'feature' and 'window' dimensions + da_windowed = da_windowed.stack( + {stacked_dim: ("forcing_feature", "window")} + ) + else: + # Create empty DataArray with the correct dimensions and coordinates + return xr.DataArray( + data=np.empty((self.ar_steps, da_state.grid_index.size, 0)), + dims=("time", "grid_index", f"{stacked_dim}"), + coords={ + "time": da_target_times, + "grid_index": da_state.grid_index, + f"{stacked_dim}": [], + }, + ) + def _build_item_dataarrays(self, idx): """ Create the dataarrays for the initial states, target states and forcing @@ -459,18 +494,21 @@ def _build_item_dataarrays(self, idx): else: da_boundary = None - # handle time sampling in a way that is compatible with both analysis - # and forecast data - da_state = self._slice_time( - da_state=da_state, idx=idx, n_steps=self.ar_steps + # if da_forcing is None, the function will return None for + # da_forcing_windowed + da_state, da_forcing_windowed = self._slice_time( + da_state=da_state, + idx=idx, + n_steps=self.ar_steps, + da_forcing=da_forcing, ) - if da_forcing is not None: - da_forcing_windowed = self._slice_time( - da_forcing=da_forcing, idx=idx, n_steps=self.ar_steps - ) + if da_boundary is not None: - da_boundary_windowed = self._slice_time( - da_forcing=da_boundary, idx=idx, n_steps=self.ar_steps + _, da_boundary_windowed = self._slice_time( + da_state=da_state, + idx=idx, + n_steps=self.ar_steps, + da_forcing=da_boundary, ) # load the data into memory @@ -506,83 +544,12 @@ def _build_item_dataarrays(self, idx): da_boundary_windowed - self.da_boundary_mean ) / self.da_boundary_std - if da_forcing is not None: - # Expand 'time_diff' to align with 'forcing_feature' and 'window' - # dimensions 'time_diff' has dimension ('time'), expand to ('time', - # 'forcing_feature', 'window') - time_diff_expanded = da_forcing_windowed["time_diff"].expand_dims( - forcing_feature=da_forcing_windowed["forcing_feature"], - window=da_forcing_windowed["window"], - ) - - # Stack 'forcing_feature' and 'window' into a single - # 'forcing_feature_windowed' dimension - da_forcing_windowed = da_forcing_windowed.stack( - forcing_feature_windowed=("forcing_feature", "window") - ) - time_diff_expanded = time_diff_expanded.stack( - forcing_feature_windowed=("forcing_feature", "window") - ) - - # Assign 'time_diff' as a coordinate to 'forcing_feature_windowed' - da_forcing_windowed = da_forcing_windowed.assign_coords( - time_diff=( - "forcing_feature_windowed", - time_diff_expanded.values, - ) - ) - else: - # Create an empty forcing tensor with the right shape - da_forcing_windowed = xr.DataArray( - data=np.empty( - (self.ar_steps, da_state.grid_index.size, 0), - ), - dims=("time", "grid_index", "forcing_feature"), - coords={ - "time": da_target_times, - "grid_index": da_state.grid_index, - "forcing_feature": [], - }, - ) - - if da_boundary is not None: - # If 'da_boundary_windowed' also has 'time_diff', process similarly - # Expand 'time_diff' to align with 'boundary_feature' and 'window' - # dimensions - time_diff_expanded = da_boundary_windowed["time_diff"].expand_dims( - boundary_feature=da_boundary_windowed["boundary_feature"], - window=da_boundary_windowed["window"], - ) - - # Stack 'boundary_feature' and 'window' into a single - # 'boundary_feature_windowed' dimension - da_boundary_windowed = da_boundary_windowed.stack( - boundary_feature_windowed=("boundary_feature", "window") - ) - time_diff_expanded = time_diff_expanded.stack( - boundary_feature_windowed=("boundary_feature", "window") - ) - - # Assign 'time_diff' as a coordinate to 'boundary_feature_windowed' - da_boundary_windowed = da_boundary_windowed.assign_coords( - time_diff=( - "boundary_feature_windowed", - time_diff_expanded.values, - ) - ) - else: - # Create an empty boundary tensor with the right shape - da_boundary_windowed = xr.DataArray( - data=np.empty( - (self.ar_steps, da_state.grid_index.size, 0), - ), - dims=("time", "grid_index", "boundary_feature"), - coords={ - "time": da_target_times, - "grid_index": da_state.grid_index, - "boundary_feature": [], - }, - ) + da_forcing_windowed = self._process_windowed_data( + da_forcing_windowed, da_state, da_target_times + ) + da_boundary_windowed = self._process_windowed_data( + da_boundary_windowed, da_state, da_target_times + ) return ( da_init_states, From 79199956225277cb88b255a514be1a72634926c5 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Thu, 21 Nov 2024 07:09:52 +0100 Subject: [PATCH 034/190] cleanup analysis based time-slicing --- neural_lam/weather_dataset.py | 85 +++++++++++++++++------------------ 1 file changed, 42 insertions(+), 43 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index c8806d1c..bbfb5705 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -245,11 +245,12 @@ def __len__(self): def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): """ Produce time slices of the given dataarrays `da_state` (state) and - `da_forcing` (forcing). For the state data, slicing is done as before - based on `idx`. For the forcing data, nearest neighbor matching is - performed based on the state times. Additionally, the time difference - between the matched forcing times and state times (in multiples of state - time steps) is added to the forcing dataarray. + `da_forcing` (forcing). For the state data, slicing is done based on + `idx`. For the forcing data, nearest neighbor matching is performed + based on the state times. Additionally, the time difference between the + matched forcing times and state times (in multiples of state time steps) + is added to the forcing dataarray. This will be used as an additional + feature in the model (temporal embedding). Parameters ---------- @@ -269,9 +270,8 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): The sliced state dataarray with dims ('time', 'grid_index', 'state_feature'). da_forcing_matched : xr.DataArray - The forcing dataarray matched to state times with an added - coordinate 'time_diff', representing the time difference to state - times in multiples of state time steps. + The sliced state dataarray with dims ('time', 'grid_index', + 'forcing_feature_windowed'). """ # Number of initial steps required (e.g., for initializing models) init_steps = 2 @@ -308,9 +308,9 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): if da_forcing is None: return da_state_sliced, None - # Get the state times for matching + # Get the state times and its temporal resolution for matching with + # forcing data state_times = da_state_sliced["time"] - # Calculate time differences in multiples of state time steps state_time_step = state_times.values[1] - state_times.values[0] # Match forcing data to state times based on nearest neighbor @@ -369,39 +369,29 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): # For analysis data, match directly using the 'time' coordinate forcing_times = da_forcing["time"] - # Compute time differences + # Compute time differences between forcing and state times + # (in multiples of state time steps) + # Retrieve the indices of the closest times in the forcing data time_deltas = ( - state_times.values[np.newaxis, :] - - forcing_times.values[:, np.newaxis] - ) + forcing_times.values[:, np.newaxis] + - state_times.values[np.newaxis, :] + ) / state_time_step idx_min = np.abs(time_deltas).argmin(axis=0) - time_diff_steps = xr.DataArray( - np.stack( - [ - np.diagonal(time_deltas, offset=offset)[ - -len(state_times) + init_steps : - ] - / state_time_step - for offset in range( - -self.num_past_forcing_steps, - self.num_future_forcing_steps + 1, - ) - ], - axis=1, - ), - dims=["time", "window"], - coords={ - "time": state_times.isel(time=slice(init_steps, None)), - "window": np.arange( - -self.num_past_forcing_steps, - self.num_future_forcing_steps + 1, - ), - }, - name="time_diff_steps", + time_diff_steps = np.stack( + [ + time_deltas[ + idx_i + - self.num_past_forcing_steps : idx_i + + self.num_future_forcing_steps + + 1, + init_steps + step_i, + ] + for (step_i, idx_i) in enumerate(idx_min[init_steps:]) + ], ) - # Create window dimension using rolling + # Create window dimension for forcing data to stack later window_size = ( self.num_past_forcing_steps + self.num_future_forcing_steps + 1 ) @@ -412,9 +402,11 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): time=idx_min[init_steps:] ) - # Add time difference as a new coordinate - da_forcing_matched = da_forcing_matched.assign_coords( - time_diff=time_diff_steps + # Add time difference as a new coordinate to concatenate to the + # forcing features later + da_forcing_matched["time_diff_steps"] = ( + ("time", "window"), + time_diff_steps, ) return da_state_sliced, da_forcing_matched @@ -423,13 +415,19 @@ def _process_windowed_data(self, da_windowed, da_state, da_target_times): """Helper function to process windowed data after standardization.""" stacked_dim = "forcing_feature_windowed" if da_windowed is not None: - # Stack the 'feature' and 'window' dimensions + # Stack the 'feature' and 'window' dimensions and add the + # time step differences to the existing features as a temporal + # embedding da_windowed = da_windowed.stack( {stacked_dim: ("forcing_feature", "window")} ) + da_windowed = xr.concat( + [da_windowed, da_windowed.time_diff_steps], + dim="forcing_feature_windowed", + ) else: # Create empty DataArray with the correct dimensions and coordinates - return xr.DataArray( + da_windowed = xr.DataArray( data=np.empty((self.ar_steps, da_state.grid_index.size, 0)), dims=("time", "grid_index", f"{stacked_dim}"), coords={ @@ -438,6 +436,7 @@ def _process_windowed_data(self, da_windowed, da_state, da_target_times): f"{stacked_dim}": [], }, ) + return da_windowed def _build_item_dataarrays(self, idx): """ From 9bafceec0480ead53e4cdd32b24be669c195316c Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 19 Nov 2024 16:59:42 +0100 Subject: [PATCH 035/190] implement datastore_boundary in existing tests --- tests/test_datasets.py | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 419aece0..67eac70e 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -14,12 +14,19 @@ from neural_lam.datastore.base import BaseRegularGridDatastore from neural_lam.models.graph_lam import GraphLAM from neural_lam.weather_dataset import WeatherDataset -from tests.conftest import init_datastore_example +from tests.conftest import ( + DATASTORES_BOUNDARY_EXAMPLES, + init_datastore_boundary_example, + init_datastore_example, +) from tests.dummy_datastore import DummyDatastore @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -def test_dataset_item_shapes(datastore_name): +@pytest.mark.parametrize( + "datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys() +) +def test_dataset_item_shapes(datastore_name, datastore_boundary_name): """Check that the `datastore.get_dataarray` method is implemented. Validate the shapes of the tensors match between the different @@ -31,6 +38,9 @@ def test_dataset_item_shapes(datastore_name): """ datastore = init_datastore_example(datastore_name) + datastore_boundary = init_datastore_boundary_example( + datastore_boundary_name + ) N_gridpoints = datastore.num_grid_points N_pred_steps = 4 @@ -38,6 +48,7 @@ def test_dataset_item_shapes(datastore_name): num_future_forcing_steps = 1 dataset = WeatherDataset( datastore=datastore, + datastore_boundary=datastore_boundary, split="train", ar_steps=N_pred_steps, num_past_forcing_steps=num_past_forcing_steps, @@ -48,7 +59,7 @@ def test_dataset_item_shapes(datastore_name): # unpack the item, this is the current return signature for # WeatherDataset.__getitem__ - init_states, target_states, forcing, target_times = item + init_states, target_states, forcing, boundary, target_times = item # initial states assert init_states.ndim == 3 @@ -81,14 +92,23 @@ def test_dataset_item_shapes(datastore_name): @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -def test_dataset_item_create_dataarray_from_tensor(datastore_name): +@pytest.mark.parametrize( + "datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys() +) +def test_dataset_item_create_dataarray_from_tensor( + datastore_name, datastore_boundary_name +): datastore = init_datastore_example(datastore_name) + datastore_boundary = init_datastore_boundary_example( + datastore_boundary_name + ) N_pred_steps = 4 num_past_forcing_steps = 1 num_future_forcing_steps = 1 dataset = WeatherDataset( datastore=datastore, + datastore_boundary=datastore_boundary, split="train", ar_steps=N_pred_steps, num_past_forcing_steps=num_past_forcing_steps, @@ -158,13 +178,19 @@ def test_dataset_item_create_dataarray_from_tensor(datastore_name): @pytest.mark.parametrize("split", ["train", "val", "test"]) @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -def test_single_batch(datastore_name, split): +@pytest.mark.parametrize( + "datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys() +) +def test_single_batch(datastore_name, datastore_boundary_name, split): """Check that the `datastore.get_dataarray` method is implemented. And that it returns an xarray DataArray with the correct dimensions. """ datastore = init_datastore_example(datastore_name) + datastore_boundary = init_datastore_boundary_example( + datastore_boundary_name + ) device_name = ( torch.device("cuda") if torch.cuda.is_available() else "cpu" @@ -210,7 +236,9 @@ def _create_graph(): ) ) - dataset = WeatherDataset(datastore=datastore, split=split, ar_steps=2) + dataset = WeatherDataset( + datastore=datastore, datastore_boundary=datastore_boundary, split=split + ) model = GraphLAM(args=args, datastore=datastore, config=config) # noqa From ce06bbc24dc4765944c0b937ace0dc4d0f11f364 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Thu, 21 Nov 2024 16:39:27 +0100 Subject: [PATCH 036/190] allow for grid shape retrieval from forcing data --- neural_lam/datastore/mdp.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 67aaa9d0..57a3249f 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -377,8 +377,17 @@ def grid_shape_state(self): The shape of the cartesian grid for the state variables. """ - ds_state = self.unstack_grid_coords(self._ds["state"]) - da_x, da_y = ds_state.x, ds_state.y + # Boundary data often has no state features + if "state" not in self._ds: + warnings.warn( + "no state data found in datastore" + "returning grid shape from forcing data" + ) + ds_forcing = self.unstack_grid_coords(self._ds["forcing"]) + da_x, da_y = ds_forcing.x, ds_forcing.y + else: + ds_state = self.unstack_grid_coords(self._ds["state"]) + da_x, da_y = ds_state.x, ds_state.y assert da_x.ndim == da_y.ndim == 1 return CartesianGridShape(x=da_x.size, y=da_y.size) From 884b5c623117cb18c405ac869caaff028625e5fb Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Thu, 21 Nov 2024 16:40:47 +0100 Subject: [PATCH 037/190] rearrange time slicing, boundary first --- neural_lam/weather_dataset.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index bbfb5705..32add37a 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -495,13 +495,6 @@ def _build_item_dataarrays(self, idx): # if da_forcing is None, the function will return None for # da_forcing_windowed - da_state, da_forcing_windowed = self._slice_time( - da_state=da_state, - idx=idx, - n_steps=self.ar_steps, - da_forcing=da_forcing, - ) - if da_boundary is not None: _, da_boundary_windowed = self._slice_time( da_state=da_state, @@ -509,6 +502,12 @@ def _build_item_dataarrays(self, idx): n_steps=self.ar_steps, da_forcing=da_boundary, ) + da_state, da_forcing_windowed = self._slice_time( + da_state=da_state, + idx=idx, + n_steps=self.ar_steps, + da_forcing=da_forcing, + ) # load the data into memory da_state.load() From 5904cbe9da67d3e98eaab0cebd501a2ad0ded7f3 Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Mon, 25 Nov 2024 16:42:21 +0100 Subject: [PATCH 038/190] identified issue, cleanup next --- neural_lam/datastore/base.py | 9 ++++- neural_lam/datastore/mdp.py | 5 ++- neural_lam/models/ar_model.py | 46 ++++++++++++++++++++-- neural_lam/train_model.py | 2 +- neural_lam/vis.py | 73 +++++++++++++++++++++++++---------- 5 files changed, 107 insertions(+), 28 deletions(-) diff --git a/neural_lam/datastore/base.py b/neural_lam/datastore/base.py index 0317c2e5..b0055e39 100644 --- a/neural_lam/datastore/base.py +++ b/neural_lam/datastore/base.py @@ -295,8 +295,13 @@ def get_xy_extent(self, category: str) -> List[float]: The extent of the x, y coordinates. """ - xy = self.get_xy(category, stacked=False) - extent = [xy[0].min(), xy[0].max(), xy[1].min(), xy[1].max()] + xy = self.get_xy(category, stacked=True) + extent = [ + xy[:, 0].min(), + xy[:, 0].max(), + xy[:, 1].min(), + xy[:, 1].max(), + ] return [float(v) for v in extent] @property diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 10593a82..0d1aac7b 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -1,4 +1,5 @@ # Standard library +import copy import warnings from functools import cached_property from pathlib import Path @@ -394,7 +395,9 @@ def coords_projection(self) -> ccrs.Projection: class_name = projection_info["class_name"] ProjectionClass = getattr(ccrs, class_name) - kwargs = projection_info["kwargs"] + # need to copy otherwise we modify the dict stored in the dataclass + # in-place + kwargs = copy.deepcopy(projection_info["kwargs"]) globe_kwargs = kwargs.pop("globe", {}) if len(globe_kwargs) > 0: diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index bc4c6719..b55143f0 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -7,12 +7,14 @@ import pytorch_lightning as pl import torch import wandb +from loguru import logger # Local from .. import metrics, vis from ..config import NeuralLAMConfig from ..datastore import BaseDatastore from ..loss_weighting import get_state_feature_weighting +from ..weather_dataset import WeatherDataset class ARModel(pl.LightningModule): @@ -147,6 +149,14 @@ def __init__( # For storing spatial loss maps during evaluation self.spatial_loss_maps = [] + def _create_dataarray_from_tensor(self, tensor, time, split, category): + weather_dataset = WeatherDataset(datastore=self._datastore, split=split) + time = np.array(time, dtype="datetime64[ns]") + da = weather_dataset.create_dataarray_from_tensor( + tensor=tensor, time=time, category=category + ) + return da + def configure_optimizers(self): opt = torch.optim.AdamW( self.parameters(), lr=self.args.lr, betas=(0.9, 0.95) @@ -406,10 +416,13 @@ def test_step(self, batch, batch_idx): ) self.plot_examples( - batch, n_additional_examples, prediction=prediction + batch, + n_additional_examples, + prediction=prediction, + split="test", ) - def plot_examples(self, batch, n_examples, prediction=None): + def plot_examples(self, batch, n_examples, split, prediction=None): """ Plot the first n_examples forecasts from batch @@ -422,18 +435,34 @@ def plot_examples(self, batch, n_examples, prediction=None): prediction, target, _, _ = self.common_step(batch) target = batch[1] + time = batch[3] # Rescale to original data scale prediction_rescaled = prediction * self.state_std + self.state_mean target_rescaled = target * self.state_std + self.state_mean # Iterate over the examples - for pred_slice, target_slice in zip( - prediction_rescaled[:n_examples], target_rescaled[:n_examples] + for pred_slice, target_slice, time_slice in zip( + prediction_rescaled[:n_examples], + target_rescaled[:n_examples], + time[:n_examples], ): # Each slice is (pred_steps, num_grid_nodes, d_f) self.plotted_examples += 1 # Increment already here + da_prediction = self._create_dataarray_from_tensor( + tensor=pred_slice, + time=time_slice, + split=split, + category="state", + ).unstack("grid_index") + da_target = self._create_dataarray_from_tensor( + tensor=target_slice, + time=time_slice, + split=split, + category="state", + ).unstack("grid_index") + var_vmin = ( torch.minimum( pred_slice.flatten(0, 1).min(dim=0)[0], @@ -465,6 +494,10 @@ def plot_examples(self, batch, n_examples, prediction=None): title=f"{var_name} ({var_unit}), " f"t={t_i} ({self._datastore.step_length * t_i} h)", vrange=var_vrange, + da_prediction=da_prediction.isel( + state_feature=var_i + ).squeeze(), + da_target=da_target.isel(state_feature=var_i).squeeze(), ) for var_i, (var_name, var_unit, var_vrange) in enumerate( zip( @@ -476,6 +509,11 @@ def plot_examples(self, batch, n_examples, prediction=None): ] example_i = self.plotted_examples + for i, fig in enumerate(var_figs): + fn = f"example_{i}_{example_i}_t{t_i}.png" + fig.savefig(fn) + logger.info(f"Saved example plot to {fn}") + wandb.log( { f"{var_name}_example_{example_i}": wandb.Image(fig) diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index 74146c89..9d1d5039 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -23,7 +23,7 @@ } -@logger.catch +@logger.catch(reraise=True) def main(input_args=None): """Main function for training and evaluating models.""" parser = ArgumentParser( diff --git a/neural_lam/vis.py b/neural_lam/vis.py index b9d18b39..357a8977 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -68,6 +68,8 @@ def plot_prediction( pred, target, datastore: BaseRegularGridDatastore, + da_prediction=None, + da_target=None, title=None, vrange=None, ): @@ -88,10 +90,8 @@ def plot_prediction( # Set up masking of border region da_mask = datastore.unstack_grid_coords(datastore.boundary_mask) - mask_reshaped = da_mask.values - pixel_alpha = ( - mask_reshaped.clamp(0.7, 1).cpu().numpy() - ) # Faded border region + mask_values = np.invert(da_mask.values.astype(bool)).astype(float) + pixel_alpha = mask_values.clip(0.7, 1) # Faded border region fig, axes = plt.subplots( 1, @@ -100,29 +100,62 @@ def plot_prediction( subplot_kw={"projection": datastore.coords_projection}, ) + use_xarray = True + # Plot pred and target - for ax, data in zip(axes, (target, pred)): + + if not use_xarray: + for ax, data in zip(axes, (target, pred)): + ax.coastlines() # Add coastline outlines + data_grid = ( + data.reshape( + [datastore.grid_shape_state.x, datastore.grid_shape_state.y] + ) + .T.cpu() + .numpy() + ) + im = ax.imshow( + data_grid, + origin="lower", + extent=extent, + alpha=pixel_alpha, + vmin=vmin, + vmax=vmax, + cmap="plasma", + ) + + cbar = fig.colorbar(im, aspect=30) + cbar.ax.tick_params(labelsize=10) + + x = da_target.x.values + y = da_target.y.values + extent = [x.min(), x.max(), y.min(), y.max()] + for ax, da in zip(axes, (da_target, da_prediction)): ax.coastlines() # Add coastline outlines - data_grid = ( - data.reshape(list(datastore.grid_shape_state.values.values())) - .cpu() - .numpy() - ) - im = ax.imshow( - data_grid, + im = da.plot.imshow( + ax=ax, origin="lower", + x="x", extent=extent, - alpha=pixel_alpha, + alpha=pixel_alpha.T, vmin=vmin, vmax=vmax, cmap="plasma", + transform=datastore.coords_projection, ) + # da.plot.pcolormesh( + # ax=ax, + # x="x", + # vmin=vmin, + # vmax=vmax, + # transform=datastore.coords_projection, + # cmap="plasma", + # ) + # Ticks and labels axes[0].set_title("Ground Truth", size=15) axes[1].set_title("Prediction", size=15) - cbar = fig.colorbar(im, aspect=30) - cbar.ax.tick_params(labelsize=10) if title: fig.suptitle(title, size=20) @@ -150,9 +183,7 @@ def plot_spatial_error( # Set up masking of border region da_mask = datastore.unstack_grid_coords(datastore.boundary_mask) mask_reshaped = da_mask.values - pixel_alpha = ( - mask_reshaped.clamp(0.7, 1).cpu().numpy() - ) # Faded border region + pixel_alpha = mask_reshaped.clip(0.7, 1) # Faded border region fig, ax = plt.subplots( figsize=(5, 4.8), @@ -161,8 +192,10 @@ def plot_spatial_error( ax.coastlines() # Add coastline outlines error_grid = ( - error.reshape(list(datastore.grid_shape_state.values.values())) - .cpu() + error.reshape( + [datastore.grid_shape_state.x, datastore.grid_shape_state.y] + ) + .T.cpu() .numpy() ) From efe03027842a22139d6554d68ffee7b6ebe0ad73 Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Tue, 26 Nov 2024 13:46:05 +0100 Subject: [PATCH 039/190] use xarray plot only --- neural_lam/models/ar_model.py | 47 +++++++++++++++++++++++++++-------- neural_lam/vis.py | 43 +++----------------------------- 2 files changed, 39 insertions(+), 51 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index b55143f0..0af25367 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -1,5 +1,6 @@ # Standard library import os +from typing import List, Union # Third-party import matplotlib.pyplot as plt @@ -7,7 +8,7 @@ import pytorch_lightning as pl import torch import wandb -from loguru import logger +import xarray as xr # Local from .. import metrics, vis @@ -149,7 +150,35 @@ def __init__( # For storing spatial loss maps during evaluation self.spatial_loss_maps = [] - def _create_dataarray_from_tensor(self, tensor, time, split, category): + def _create_dataarray_from_tensor( + self, + tensor: torch.Tensor, + time: Union[int, List[int]], + split: str, + category: str, + ) -> xr.DataArray: + """ + Create an `xr.DataArray` from a tensor, with the correct dimensions and + coordinates to match the datastore used by the model. This function in + in effect is the inverse of what is returned by + `WeatherDataset.__getitem__`. + + Parameters + ---------- + tensor : torch.Tensor + The tensor to convert to a `xr.DataArray` with dimensions [time, + grid_index, feature] + time : Union[int,List[int]] + The time index or indices for the data, given as integers or a list + of integers representing epoch time in nanoseconds. + split : str + The split of the data, either 'train', 'val', or 'test' + category : str + The category of the data, either 'state' or 'forcing' + """ + # TODO: creating an instance of WeatherDataset here on every call is + # not how this should be done but whether WeatherDataset should be + # provided to ARModel or where to put plotting still needs discussion weather_dataset = WeatherDataset(datastore=self._datastore, split=split) time = np.array(time, dtype="datetime64[ns]") da = weather_dataset.create_dataarray_from_tensor( @@ -482,14 +511,10 @@ def plot_examples(self, batch, n_examples, split, prediction=None): var_vranges = list(zip(var_vmin, var_vmax)) # Iterate over prediction horizon time steps - for t_i, (pred_t, target_t) in enumerate( - zip(pred_slice, target_slice), start=1 - ): + for t_i, _ in enumerate(zip(pred_slice, target_slice), start=1): # Create one figure per variable at this time step var_figs = [ vis.plot_prediction( - pred=pred_t[:, var_i], - target=target_t[:, var_i], datastore=self._datastore, title=f"{var_name} ({var_unit}), " f"t={t_i} ({self._datastore.step_length * t_i} h)", @@ -509,10 +534,10 @@ def plot_examples(self, batch, n_examples, split, prediction=None): ] example_i = self.plotted_examples - for i, fig in enumerate(var_figs): - fn = f"example_{i}_{example_i}_t{t_i}.png" - fig.savefig(fn) - logger.info(f"Saved example plot to {fn}") + # for i, fig in enumerate(var_figs): + # fn = f"example_{i}_{example_i}_t{t_i}.png" + # fig.savefig(fn) + # logger.info(f"Saved example plot to {fn}") wandb.log( { diff --git a/neural_lam/vis.py b/neural_lam/vis.py index 357a8977..47c68e4f 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -65,8 +65,6 @@ def plot_error_map(errors, datastore: BaseRegularGridDatastore, title=None): @matplotlib.rc_context(utils.fractional_plot_bundle(1)) def plot_prediction( - pred, - target, datastore: BaseRegularGridDatastore, da_prediction=None, da_target=None, @@ -81,8 +79,8 @@ def plot_prediction( """ # Get common scale for values if vrange is None: - vmin = min(vals.min().cpu().item() for vals in (pred, target)) - vmax = max(vals.max().cpu().item() for vals in (pred, target)) + vmin = min(da_prediction.min(), da_target.min()) + vmax = max(da_prediction.max(), da_target.max()) else: vmin, vmax = vrange @@ -100,39 +98,13 @@ def plot_prediction( subplot_kw={"projection": datastore.coords_projection}, ) - use_xarray = True - # Plot pred and target - - if not use_xarray: - for ax, data in zip(axes, (target, pred)): - ax.coastlines() # Add coastline outlines - data_grid = ( - data.reshape( - [datastore.grid_shape_state.x, datastore.grid_shape_state.y] - ) - .T.cpu() - .numpy() - ) - im = ax.imshow( - data_grid, - origin="lower", - extent=extent, - alpha=pixel_alpha, - vmin=vmin, - vmax=vmax, - cmap="plasma", - ) - - cbar = fig.colorbar(im, aspect=30) - cbar.ax.tick_params(labelsize=10) - x = da_target.x.values y = da_target.y.values extent = [x.min(), x.max(), y.min(), y.max()] for ax, da in zip(axes, (da_target, da_prediction)): ax.coastlines() # Add coastline outlines - im = da.plot.imshow( + da.plot.imshow( ax=ax, origin="lower", x="x", @@ -144,15 +116,6 @@ def plot_prediction( transform=datastore.coords_projection, ) - # da.plot.pcolormesh( - # ax=ax, - # x="x", - # vmin=vmin, - # vmax=vmax, - # transform=datastore.coords_projection, - # cmap="plasma", - # ) - # Ticks and labels axes[0].set_title("Ground Truth", size=15) axes[1].set_title("Prediction", size=15) From a489c2ed974397ea230d2e61b842d8d9384867dc Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Tue, 26 Nov 2024 14:07:06 +0100 Subject: [PATCH 040/190] don't reraise --- neural_lam/train_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index 9d1d5039..74146c89 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -23,7 +23,7 @@ } -@logger.catch(reraise=True) +@logger.catch def main(input_args=None): """Main function for training and evaluating models.""" parser = ArgumentParser( From 242d08bcb5374cdd90aecfd49f501ed233f1ce0c Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Tue, 26 Nov 2024 14:50:03 +0100 Subject: [PATCH 041/190] remove debug plot --- neural_lam/models/ar_model.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 0af25367..c875688b 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -534,10 +534,6 @@ def plot_examples(self, batch, n_examples, split, prediction=None): ] example_i = self.plotted_examples - # for i, fig in enumerate(var_figs): - # fn = f"example_{i}_{example_i}_t{t_i}.png" - # fig.savefig(fn) - # logger.info(f"Saved example plot to {fn}") wandb.log( { From c1f706c29542d770ed49e910f8b9bd5caff1fdec Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Tue, 26 Nov 2024 16:04:24 +0100 Subject: [PATCH 042/190] remove extent calc used in diagnosing issue --- neural_lam/vis.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/neural_lam/vis.py b/neural_lam/vis.py index 47c68e4f..c814aacf 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -99,9 +99,6 @@ def plot_prediction( ) # Plot pred and target - x = da_target.x.values - y = da_target.y.values - extent = [x.min(), x.max(), y.min(), y.max()] for ax, da in zip(axes, (da_target, da_prediction)): ax.coastlines() # Add coastline outlines da.plot.imshow( From cf8e3e4c1be93a6ec074368aaf6f91c8042b5278 Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Fri, 29 Nov 2024 14:51:36 +0100 Subject: [PATCH 043/190] add type annotation --- neural_lam/vis.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/neural_lam/vis.py b/neural_lam/vis.py index c814aacf..d6b57f88 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -2,6 +2,7 @@ import matplotlib import matplotlib.pyplot as plt import numpy as np +import xarray as xr # Local from . import utils @@ -66,8 +67,8 @@ def plot_error_map(errors, datastore: BaseRegularGridDatastore, title=None): @matplotlib.rc_context(utils.fractional_plot_bundle(1)) def plot_prediction( datastore: BaseRegularGridDatastore, - da_prediction=None, - da_target=None, + da_prediction: xr.DataArray = None, + da_target: xr.DataArray = None, title=None, vrange=None, ): From 85160cecf13ecfc9fc6a589ac1a9e3542da45e23 Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Fri, 29 Nov 2024 15:03:06 +0100 Subject: [PATCH 044/190] ensure tensor copy to cpu mem before data-array creation --- neural_lam/models/ar_model.py | 10 ++++++---- neural_lam/weather_dataset.py | 5 +++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index c875688b..0d8e6e3c 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -167,10 +167,12 @@ def _create_dataarray_from_tensor( ---------- tensor : torch.Tensor The tensor to convert to a `xr.DataArray` with dimensions [time, - grid_index, feature] + grid_index, feature]. The tensor will be copied to the CPU if it is + not already there. time : Union[int,List[int]] The time index or indices for the data, given as integers or a list - of integers representing epoch time in nanoseconds. + of integers representing epoch time in nanoseconds. The ints will be + copied to the CPU memory if they are not already there. split : str The split of the data, either 'train', 'val', or 'test' category : str @@ -180,9 +182,9 @@ def _create_dataarray_from_tensor( # not how this should be done but whether WeatherDataset should be # provided to ARModel or where to put plotting still needs discussion weather_dataset = WeatherDataset(datastore=self._datastore, split=split) - time = np.array(time, dtype="datetime64[ns]") + time = np.array(time.cpu(), dtype="datetime64[ns]") da = weather_dataset.create_dataarray_from_tensor( - tensor=tensor, time=time, category=category + tensor=tensor.cpu().numpy(), time=time, category=category ) return da diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 532e3c90..b5f85580 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -529,7 +529,8 @@ def create_dataarray_from_tensor( tensor : torch.Tensor The tensor to construct the DataArray from, this assumed to have the same dimension ordering as returned by the __getitem__ method - (i.e. time, grid_index, {category}_feature). + (i.e. time, grid_index, {category}_feature). The tensor will be + copied to the CPU before constructing the DataArray. time : datetime.datetime or list[datetime.datetime] The time or times of the tensor. category : str @@ -581,7 +582,7 @@ def _is_listlike(obj): coords["time"] = time da = xr.DataArray( - tensor.numpy(), + tensor.cpu().numpy(), dims=dims, coords=coords, ) From 52c452879f56c7f982cfd5d55a5259f37cb6b030 Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Fri, 29 Nov 2024 15:05:36 +0100 Subject: [PATCH 045/190] apply time-indexing to support ar_steps_val > 1 --- neural_lam/models/ar_model.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 0d8e6e3c..44baf9c2 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -522,9 +522,11 @@ def plot_examples(self, batch, n_examples, split, prediction=None): f"t={t_i} ({self._datastore.step_length * t_i} h)", vrange=var_vrange, da_prediction=da_prediction.isel( - state_feature=var_i + state_feature=var_i, time=t_i - 1 + ).squeeze(), + da_target=da_target.isel( + state_feature=var_i, time=t_i - 1 ).squeeze(), - da_target=da_target.isel(state_feature=var_i).squeeze(), ) for var_i, (var_name, var_unit, var_vrange) in enumerate( zip( From b96d8ebc0c5c22f980e22384efafcd08db20577f Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:42:05 +0100 Subject: [PATCH 046/190] renaming test datastores --- tests/datastore_examples/.gitignore | 3 +- .../.gitignore | 0 .../era5_1000hPa_danra_100m_winds/config.yaml | 12 +++ .../danra.datastore.yaml | 99 +++++++++++++++++++ .../era5.datastore.yaml | 23 ++--- .../mdp/era5_1000hPa_winds/config.yaml | 3 - 6 files changed, 122 insertions(+), 18 deletions(-) rename tests/datastore_examples/mdp/{era5_1000hPa_winds => era5_1000hPa_danra_100m_winds}/.gitignore (100%) create mode 100644 tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/config.yaml create mode 100644 tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/danra.datastore.yaml rename tests/datastore_examples/mdp/{era5_1000hPa_winds => era5_1000hPa_danra_100m_winds}/era5.datastore.yaml (80%) delete mode 100644 tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml diff --git a/tests/datastore_examples/.gitignore b/tests/datastore_examples/.gitignore index e84e6493..4fbd2326 100644 --- a/tests/datastore_examples/.gitignore +++ b/tests/datastore_examples/.gitignore @@ -1,2 +1,3 @@ npyfilesmeps/*.zip -npyfilesmeps/meps_example_reduced/ +npyfilesmeps/meps_example_reduced +npyfilesmeps/era5_1000hPa_temp_meps_example_reduced diff --git a/tests/datastore_examples/mdp/era5_1000hPa_winds/.gitignore b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/.gitignore similarity index 100% rename from tests/datastore_examples/mdp/era5_1000hPa_winds/.gitignore rename to tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/.gitignore diff --git a/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/config.yaml b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/config.yaml new file mode 100644 index 00000000..a158bee3 --- /dev/null +++ b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/config.yaml @@ -0,0 +1,12 @@ +datastore: + kind: mdp + config_path: danra.datastore.yaml +datastore_boundary: + kind: mdp + config_path: era5.datastore.yaml +training: + state_feature_weighting: + __config_class__: ManualStateFeatureWeighting + weights: + u100m: 1.0 + v100m: 1.0 diff --git a/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/danra.datastore.yaml b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/danra.datastore.yaml new file mode 100644 index 00000000..3edf1267 --- /dev/null +++ b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/danra.datastore.yaml @@ -0,0 +1,99 @@ +schema_version: v0.5.0 +dataset_version: v0.1.0 + +output: + variables: + static: [grid_index, static_feature] + state: [time, grid_index, state_feature] + forcing: [time, grid_index, forcing_feature] + coord_ranges: + time: + start: 1990-09-03T00:00 + end: 1990-09-09T00:00 + step: PT3H + chunking: + time: 1 + splitting: + dim: time + splits: + train: + start: 1990-09-03T00:00 + end: 1990-09-06T00:00 + compute_statistics: + ops: [mean, std, diff_mean, diff_std] + dims: [grid_index, time] + val: + start: 1990-09-06T00:00 + end: 1990-09-07T00:00 + test: + start: 1990-09-07T00:00 + end: 1990-09-09T00:00 + +inputs: + danra_height_levels: + path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/height_levels.zarr + dims: [time, x, y, altitude] + variables: + u: + altitude: + values: [100,] + units: m + v: + altitude: + values: [100, ] + units: m + dim_mapping: + time: + method: rename + dim: time + state_feature: + method: stack_variables_by_var_name + dims: [altitude] + name_format: "{var_name}{altitude}m" + grid_index: + method: stack + dims: [x, y] + target_output_variable: state + + danra_surface: + path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/single_levels.zarr + dims: [time, x, y] + variables: + # use surface incoming shortwave radiation as forcing + - swavr0m + dim_mapping: + time: + method: rename + dim: time + grid_index: + method: stack + dims: [x, y] + forcing_feature: + method: stack_variables_by_var_name + name_format: "{var_name}" + target_output_variable: forcing + + danra_lsm: + path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/lsm.zarr + dims: [x, y] + variables: + - lsm + dim_mapping: + grid_index: + method: stack + dims: [x, y] + static_feature: + method: stack_variables_by_var_name + name_format: "{var_name}" + target_output_variable: static + +extra: + projection: + class_name: LambertConformal + kwargs: + central_longitude: 25.0 + central_latitude: 56.7 + standard_parallels: [56.7, 56.7] + globe: + semimajor_axis: 6367470.0 + semiminor_axis: 6367470.0 diff --git a/tests/datastore_examples/mdp/era5_1000hPa_winds/era5.datastore.yaml b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml similarity index 80% rename from tests/datastore_examples/mdp/era5_1000hPa_winds/era5.datastore.yaml rename to tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml index 36b39501..c97da4bc 100644 --- a/tests/datastore_examples/mdp/era5_1000hPa_winds/era5.datastore.yaml +++ b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml @@ -1,5 +1,4 @@ -#TODO: What do these versions mean? Should they be updated? -schema_version: v0.2.0+dev +schema_version: v0.5.0 dataset_version: v1.0.0 output: @@ -7,8 +6,8 @@ output: forcing: [time, grid_index, forcing_feature] coord_ranges: time: - start: 1990-09-02T00:00 - end: 1990-09-10T00:00 + start: 1990-09-01T00:00 + end: 2022-09-30T00:00 step: PT6H chunking: time: 1 @@ -16,17 +15,17 @@ output: dim: time splits: train: - start: 1990-09-02T00:00 - end: 1990-09-07T00:00 + start: 1990-09-01T00:00 + end: 2022-09-30T00:00 compute_statistics: ops: [mean, std, diff_mean, diff_std] dims: [grid_index, time] val: - start: 1990-09-05T00:00 - end: 1990-09-08T00:00 + start: 1990-09-01T00:00 + end: 2022-09-30T00:00 test: - start: 1990-09-06T00:00 - end: 1990-09-10T00:00 + start: 1990-09-01T00:00 + end: 2022-09-30T00:00 inputs: era_height_levels: @@ -37,10 +36,6 @@ inputs: level: values: [1000,] units: hPa - v_component_of_wind: - level: - values: [1000, ] - units: hPa dim_mapping: time: method: rename diff --git a/tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml b/tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml deleted file mode 100644 index 5d1e05f2..00000000 --- a/tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml +++ /dev/null @@ -1,3 +0,0 @@ -datastore: - kind: mdp - config_path: era5.datastore.yaml From 72da25fd15d46a4497728935e9767c34330f1ccc Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:44:15 +0100 Subject: [PATCH 047/190] adding num_past/future_boundary_step args --- neural_lam/train_model.py | 37 +++++++++++++++------------------ tests/test_datasets.py | 43 +++++++++++++++++++++++++++++++++------ tests/test_training.py | 24 ++++++++++++++++++++-- 3 files changed, 75 insertions(+), 29 deletions(-) diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index 37bf6db7..2a61e86c 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -34,11 +34,6 @@ def main(input_args=None): type=str, help="Path to the configuration for neural-lam", ) - parser.add_argument( - "--config_path_boundary", - type=str, - help="Path to the configuration for boundary conditions", - ) parser.add_argument( "--model", type=str, @@ -208,6 +203,18 @@ def main(input_args=None): default=1, help="Number of future time steps to use as input for forcing data", ) + parser.add_argument( + "--num_past_boundary_steps", + type=int, + default=1, + help="Number of past time steps to use as input for boundary data", + ) + parser.add_argument( + "--num_future_boundary_steps", + type=int, + default=1, + help="Number of future time steps to use as input for boundary data", + ) args = parser.parse_args(input_args) args.var_leads_metrics_watch = { int(k): v for k, v in json.loads(args.var_leads_metrics_watch).items() @@ -217,9 +224,6 @@ def main(input_args=None): assert ( args.config_path is not None ), "Specify your config with --config_path" - assert ( - args.config_path_boundary is not None - ), "Specify your config with --config_path_boundary" assert args.model in MODELS, f"Unknown model: {args.model}" assert args.eval in ( None, @@ -234,21 +238,10 @@ def main(input_args=None): seed.seed_everything(args.seed) # Load neural-lam configuration and datastore to use - config, datastore = load_config_and_datastore(config_path=args.config_path) - config_boundary, datastore_boundary = load_config_and_datastore( - config_path=args.config_path_boundary + config, datastore, datastore_boundary = load_config_and_datastore( + config_path=args.config_path ) - # TODO this should not be required, make more flexible - assert ( - datastore.num_past_forcing_steps - == datastore_boundary.num_past_forcing_steps - ), "Mismatch in num_past_forcing_steps" - assert ( - datastore.num_future_forcing_steps - == datastore_boundary.num_future_forcing_steps - ), "Mismatch in num_future_forcing_steps" - # Create datamodule data_module = WeatherDataModule( datastore=datastore, @@ -258,6 +251,8 @@ def main(input_args=None): standardize=True, num_past_forcing_steps=args.num_past_forcing_steps, num_future_forcing_steps=args.num_future_forcing_steps, + num_past_boundary_steps=args.num_past_boundary_steps, + num_future_boundary_steps=args.num_future_boundary_steps, batch_size=args.batch_size, num_workers=args.num_workers, ) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 67eac70e..5fbe4a5d 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -42,10 +42,13 @@ def test_dataset_item_shapes(datastore_name, datastore_boundary_name): datastore_boundary_name ) N_gridpoints = datastore.num_grid_points + N_gridpoints_boundary = datastore_boundary.num_grid_points N_pred_steps = 4 num_past_forcing_steps = 1 num_future_forcing_steps = 1 + num_past_boundary_steps = 1 + num_future_boundary_steps = 1 dataset = WeatherDataset( datastore=datastore, datastore_boundary=datastore_boundary, @@ -53,6 +56,8 @@ def test_dataset_item_shapes(datastore_name, datastore_boundary_name): ar_steps=N_pred_steps, num_past_forcing_steps=num_past_forcing_steps, num_future_forcing_steps=num_future_forcing_steps, + num_past_boundary_steps=num_past_boundary_steps, + num_future_boundary_steps=num_future_boundary_steps, ) item = dataset[0] @@ -77,8 +82,23 @@ def test_dataset_item_shapes(datastore_name, datastore_boundary_name): assert forcing.ndim == 3 assert forcing.shape[0] == N_pred_steps assert forcing.shape[1] == N_gridpoints - assert forcing.shape[2] == datastore.get_num_data_vars("forcing") * ( - num_past_forcing_steps + num_future_forcing_steps + 1 + # each stacked forcing feature has one corresponding temporal embedding + assert ( + forcing.shape[2] + == datastore.get_num_data_vars("forcing") + * (num_past_forcing_steps + num_future_forcing_steps + 1) + * 2 + ) + + # boundary + assert boundary.ndim == 3 + assert boundary.shape[0] == N_pred_steps + assert boundary.shape[1] == N_gridpoints_boundary + assert ( + boundary.shape[2] + == datastore_boundary.get_num_data_vars("forcing") + * (num_past_boundary_steps + num_future_boundary_steps + 1) + * 2 ) # batch times @@ -88,6 +108,7 @@ def test_dataset_item_shapes(datastore_name, datastore_boundary_name): # try to get the last item of the dataset to ensure slicing and stacking # operations are working as expected and are consistent with the dataset # length + dataset[len(dataset) - 1] @@ -106,6 +127,9 @@ def test_dataset_item_create_dataarray_from_tensor( N_pred_steps = 4 num_past_forcing_steps = 1 num_future_forcing_steps = 1 + num_past_boundary_steps = 1 + num_future_boundary_steps = 1 + dataset = WeatherDataset( datastore=datastore, datastore_boundary=datastore_boundary, @@ -113,16 +137,22 @@ def test_dataset_item_create_dataarray_from_tensor( ar_steps=N_pred_steps, num_past_forcing_steps=num_past_forcing_steps, num_future_forcing_steps=num_future_forcing_steps, + num_past_boundary_steps=num_past_boundary_steps, + num_future_boundary_steps=num_future_boundary_steps, ) idx = 0 # unpack the item, this is the current return signature for # WeatherDataset.__getitem__ - _, target_states, _, target_times_arr = dataset[idx] - _, da_target_true, _, da_target_times_true = dataset._build_item_dataarrays( - idx=idx - ) + _, target_states, _, _, target_times_arr = dataset[idx] + ( + _, + da_target_true, + _, + _, + da_target_times_true, + ) = dataset._build_item_dataarrays(idx=idx) target_times = np.array(target_times_arr, dtype="datetime64[ns]") np.testing.assert_equal(target_times, da_target_times_true.values) @@ -272,6 +302,7 @@ def test_dataset_length(dataset_config): dataset = WeatherDataset( datastore=datastore, + datastore_boundary=None, split="train", ar_steps=dataset_config["ar_steps"], num_past_forcing_steps=dataset_config["past"], diff --git a/tests/test_training.py b/tests/test_training.py index 1ed1847d..28566a4b 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -14,18 +14,33 @@ from neural_lam.datastore.base import BaseRegularGridDatastore from neural_lam.models.graph_lam import GraphLAM from neural_lam.weather_dataset import WeatherDataModule -from tests.conftest import init_datastore_example +from tests.conftest import ( + DATASTORES_BOUNDARY_EXAMPLES, + init_datastore_boundary_example, + init_datastore_example, +) @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -def test_training(datastore_name): +@pytest.mark.parametrize( + "datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys() +) +def test_training(datastore_name, datastore_boundary_name): datastore = init_datastore_example(datastore_name) + datastore_boundary = init_datastore_boundary_example( + datastore_boundary_name + ) if not isinstance(datastore, BaseRegularGridDatastore): pytest.skip( f"Skipping test for {datastore_name} as it is not a regular " "grid datastore." ) + if not isinstance(datastore_boundary, BaseRegularGridDatastore): + pytest.skip( + f"Skipping test for {datastore_boundary_name} as it is not a regular " + "grid datastore." + ) if torch.cuda.is_available(): device_name = "cuda" @@ -59,6 +74,7 @@ def test_training(datastore_name): data_module = WeatherDataModule( datastore=datastore, + datastore_boundary=datastore_boundary, ar_steps_train=3, ar_steps_eval=5, standardize=True, @@ -66,6 +82,8 @@ def test_training(datastore_name): num_workers=1, num_past_forcing_steps=1, num_future_forcing_steps=1, + num_past_boundary_steps=1, + num_future_boundary_steps=1, ) class ModelArgs: @@ -85,6 +103,8 @@ class ModelArgs: metrics_watch = [] num_past_forcing_steps = 1 num_future_forcing_steps = 1 + num_past_boundary_steps = 1 + num_future_boundary_steps = 1 model_args = ModelArgs() From 244f1ccb77e9d12852e3a59feddff5034f54ef95 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:44:51 +0100 Subject: [PATCH 048/190] using combined config file --- neural_lam/config.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/neural_lam/config.py b/neural_lam/config.py index d3e09697..914ebb38 100644 --- a/neural_lam/config.py +++ b/neural_lam/config.py @@ -168,4 +168,15 @@ def load_config_and_datastore( datastore_kind=config.datastore.kind, config_path=datastore_config_path ) - return config, datastore + if config.datastore_boundary is not None: + datastore_boundary_config_path = ( + Path(config_path).parent / config.datastore_boundary.config_path + ) + datastore_boundary = init_datastore( + datastore_kind=config.datastore_boundary.kind, + config_path=datastore_boundary_config_path, + ) + else: + datastore_boundary = None + + return config, datastore, datastore_boundary From a9cc36e23de294f21fce15f903a4ba7d0a8496a6 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:45:12 +0100 Subject: [PATCH 049/190] proper handling of state/forcing/boundary in dataset --- neural_lam/weather_dataset.py | 304 +++++++++++++++++++--------------- 1 file changed, 167 insertions(+), 137 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 32add37a..b717c40a 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -38,6 +38,16 @@ class WeatherDataset(torch.utils.data.Dataset): forcing from times t, t+1, ..., t+j-1, t+j (and potentially times before t, given num_past_forcing_steps) are included as forcing inputs at time t. Default is 1. + num_past_boundary_steps: int, optional + Number of past time steps to include in boundary input. If set to i, + boundary from times t-i, t-i+1, ..., t-1, t (and potentially beyond, + given num_future_forcing_steps) are included as boundary inputs at time t + Default is 1. + num_future_boundary_steps: int, optional + Number of future time steps to include in boundary input. If set to j, + boundary from times t, t+1, ..., t+j-1, t+j (and potentially times before + t, given num_past_forcing_steps) are included as boundary inputs at time + t. Default is 1. standardize : bool, optional Whether to standardize the data. Default is True. """ @@ -50,6 +60,8 @@ def __init__( ar_steps=3, num_past_forcing_steps=1, num_future_forcing_steps=1, + num_past_boundary_steps=1, + num_future_boundary_steps=1, standardize=True, ): super().__init__() @@ -60,10 +72,10 @@ def __init__( self.datastore_boundary = datastore_boundary self.num_past_forcing_steps = num_past_forcing_steps self.num_future_forcing_steps = num_future_forcing_steps + self.num_past_boundary_steps = num_past_boundary_steps + self.num_future_boundary_steps = num_future_boundary_steps - self.da_state = self.datastore.get_dataarray( - category="state", split=self.split - ) + self.da_state = self.datastore.get_dataarray(category="state", split=self.split) if self.da_state is None: raise ValueError( "A non-empty state dataarray must be provided. " @@ -74,9 +86,12 @@ def __init__( category="forcing", split=self.split ) # XXX For now boundary data is always considered mdp-forcing data - self.da_boundary = self.datastore_boundary.get_dataarray( - category="forcing", split=self.split - ) + if self.datastore_boundary is not None: + self.da_boundary = self.datastore_boundary.get_dataarray( + category="forcing", split=self.split + ) + else: + self.da_boundary = None # check that with the provided data-arrays and ar_steps that we have a # non-zero amount of samples @@ -97,9 +112,7 @@ def __init__( parts["forcing"] = self.da_forcing for part, da in parts.items(): - expected_dim_order = self.datastore.expected_dim_order( - category=part - ) + expected_dim_order = self.datastore.expected_dim_order(category=part) if da.dims != expected_dim_order: raise ValueError( f"The dimension order of the `{part}` data ({da.dims}) " @@ -108,6 +121,23 @@ def __init__( "the data in `BaseDatastore.get_dataarray`?" ) + # handling ensemble data + if self.datastore.is_ensemble: + # for the now the strategy is to only include the first ensemble + # member + # XXX: this could be changed to include all ensemble members by + # splitting `idx` into two parts, one for the analysis time and one + # for the ensemble member and then increasing self.__len__ to + # include all ensemble members + warnings.warn( + "only use of ensemble member 0 (the first member) is " + "implemented for ensemble data" + ) + i_ensemble = 0 + self.da_state = self.da_state.isel(ensemble_member=i_ensemble) + else: + self.da_state = self.da_state + def get_time_step(times): """Calculate the time step from the data""" time_diffs = np.diff(times) @@ -119,11 +149,18 @@ def get_time_step(times): return time_diffs[0] # Check time step consistency in state data - _ = get_time_step(self.da_state.time.values) + if self.datastore.is_forecast: + state_times = self.da_state.analysis_time + else: + state_times = self.da_state.time + _ = get_time_step(state_times) # Check time coverage for forcing and boundary data if self.da_forcing is not None or self.da_boundary is not None: - state_times = self.da_state.time + if self.datastore.is_forecast: + state_times = self.da_state.analysis_time + else: + state_times = self.da_state.time state_time_min = state_times.min().values state_time_max = state_times.max().values @@ -131,26 +168,30 @@ def get_time_step(times): # Forcing data is part of the same datastore as state data # During creation the time dimension of the forcing data # is matched to the state data - forcing_times = self.da_forcing.time - _ = get_time_step(forcing_times.values) + if self.datastore.is_forecast: + forcing_times = self.da_forcing.analysis_time + else: + forcing_times = self.da_forcing.time + get_time_step(forcing_times.values) if self.da_boundary is not None: # Boundary data is part of a separate datastore # The boundary data is allowed to have a different time_step # Check that the boundary data covers the required time range - boundary_times = self.da_boundary.time + if self.datastore_boundary.is_forecast: + boundary_times = self.da_boundary.analysis_time + else: + boundary_times = self.da_boundary.time boundary_time_step = get_time_step(boundary_times.values) boundary_time_min = boundary_times.min().values boundary_time_max = boundary_times.max().values # Calculate required bounds for boundary using its time step boundary_required_time_min = ( - state_time_min - - self.num_past_forcing_steps * boundary_time_step + state_time_min - self.num_past_forcing_steps * boundary_time_step ) boundary_required_time_max = ( - state_time_max - + self.num_future_forcing_steps * boundary_time_step + state_time_max + self.num_future_forcing_steps * boundary_time_step ) if boundary_time_min > boundary_required_time_min: @@ -179,10 +220,8 @@ def get_time_step(times): self.da_state_std = self.ds_state_stats.state_std if self.da_forcing is not None: - self.ds_forcing_stats = ( - self.datastore.get_standardization_dataarray( - category="forcing" - ) + self.ds_forcing_stats = self.datastore.get_standardization_dataarray( + category="forcing" ) self.da_forcing_mean = self.ds_forcing_stats.forcing_mean self.da_forcing_std = self.ds_forcing_stats.forcing_std @@ -208,7 +247,7 @@ def __len__(self): warnings.warn( "only using first ensemble member, so dataset size is " " effectively reduced by the number of ensemble members " - f"({self.da_state.ensemble_member.size})", + f"({self.datastore._num_ensemble_members})", UserWarning, ) @@ -242,36 +281,50 @@ def __len__(self): - self.num_future_forcing_steps ) - def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): + def _slice_time( + self, + da_state, + idx, + n_steps: int, + da_forcing_boundary=None, + num_past_steps=None, + num_future_steps=None, + ): """ Produce time slices of the given dataarrays `da_state` (state) and - `da_forcing` (forcing). For the state data, slicing is done based on - `idx`. For the forcing data, nearest neighbor matching is performed - based on the state times. Additionally, the time difference between the - matched forcing times and state times (in multiples of state time steps) - is added to the forcing dataarray. This will be used as an additional - feature in the model (temporal embedding). + `da_forcing_boundary`. For the state data, slicing is done + based on `idx`. For the forcing/boundary data, nearest neighbor matching + is performed based on the state times. Additionally, the time difference + between the matched forcing/boundary times and state times (in multiples + of state time steps) is added to the forcing dataarray. This will be + used as an additional feature in the model (temporal embedding). Parameters ---------- da_state : xr.DataArray The state dataarray to slice. - da_forcing : xr.DataArray - The forcing dataarray to slice. idx : int The index of the time step to start the sample from in the state data. n_steps : int The number of time steps to include in the sample. + da_forcing_boundary : xr.DataArray + The forcing/boundary dataarray to slice. + num_past_steps : int, optional + The number of past time steps to include in the forcing/boundary + data. Default is `None`. + num_future_steps : int, optional + The number of future time steps to include in the forcing/boundary + data. Default is `None`. Returns ------- da_state_sliced : xr.DataArray The sliced state dataarray with dims ('time', 'grid_index', 'state_feature'). - da_forcing_matched : xr.DataArray + da_forcing_boundary_matched : xr.DataArray The sliced state dataarray with dims ('time', 'grid_index', - 'forcing_feature_windowed'). + 'forcing/boundary_feature_windowed'). """ # Number of initial steps required (e.g., for initializing models) init_steps = 2 @@ -279,8 +332,8 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): # Slice the state data as before if self.datastore.is_forecast: # Calculate start and end indices for slicing - start_idx = max(0, self.num_past_forcing_steps - init_steps) - end_idx = max(init_steps, self.num_past_forcing_steps) + n_steps + start_idx = max(0, num_past_steps - init_steps) + end_idx = max(init_steps, num_past_steps) + n_steps # Slice the state data over the elapsed forecast duration da_state_sliced = da_state.isel( @@ -299,13 +352,11 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): else: # For analysis data, slice the time dimension directly - start_idx = idx + max(0, self.num_past_forcing_steps - init_steps) - end_idx = ( - idx + max(init_steps, self.num_past_forcing_steps) + n_steps - ) + start_idx = idx + max(0, num_past_steps - init_steps) + end_idx = idx + max(init_steps, num_past_steps) + n_steps da_state_sliced = da_state.isel(time=slice(start_idx, end_idx)) - if da_forcing is None: + if da_forcing_boundary is None: return da_state_sliced, None # Get the state times and its temporal resolution for matching with @@ -313,78 +364,66 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): state_times = da_state_sliced["time"] state_time_step = state_times.values[1] - state_times.values[0] - # Match forcing data to state times based on nearest neighbor - if self.datastore.is_forecast: - # Calculate all possible forcing times - forcing_times = ( - da_forcing.analysis_time + da_forcing.elapsed_forecast_duration - ) - forcing_times_flat = forcing_times.stack( - forecast_time=("analysis_time", "elapsed_forecast_duration") - ) + if "analysis_time" in da_forcing_boundary.dims: + idx = np.abs( + da_forcing_boundary.analysis_time.values + - self.da_state.analysis_time.values[idx] + ).argmin() + # Add a 'time' dimension using the actual forecast times + offset = max(init_steps, num_past_steps) + da_list = [] + for step in range(n_steps): + start_idx = offset + step - num_past_steps + end_idx = offset + step + num_future_steps + + current_time = ( + da_forcing_boundary.analysis_time[idx] + + da_forcing_boundary.elapsed_forecast_duration[offset + step] + ) - # Compute time differences - time_deltas = ( - forcing_times_flat.values[:, np.newaxis] - - state_times.values[np.newaxis, :] - ) - time_diffs = np.abs(time_deltas) - idx_min = time_diffs.argmin(axis=0) - - # Retrieve corresponding indices for analysis_time and - # elapsed_forecast_duration - forecast_time_index = forcing_times_flat["forecast_time"][idx_min] - analysis_time_indices = forecast_time_index["analysis_time"] - elapsed_forecast_duration_indices = forecast_time_index[ - "elapsed_forecast_duration" - ] - - # Slice the forcing data using matched indices - da_forcing_matched = da_forcing.isel( - analysis_time=("time", analysis_time_indices), - elapsed_forecast_duration=( - "time", - elapsed_forecast_duration_indices, - ), - ) + da_sliced = da_forcing_boundary.isel( + analysis_time=idx, + elapsed_forecast_duration=slice(start_idx, end_idx + 1), + ) - # Assign matched state times to the forcing data - da_forcing_matched["time"] = state_times - da_forcing_matched = da_forcing_matched.swap_dims( - {"elapsed_forecast_duration": "time"} - ) + da_sliced = da_sliced.rename({"elapsed_forecast_duration": "window"}) + da_sliced = da_sliced.assign_coords( + window=np.arange(-num_past_steps, num_future_steps + 1) + ) - # Calculate time differences in multiples of state time steps - state_time_step = state_times.values[1] - state_times.values[0] - time_diff_steps = ( - time_deltas[idx_min, np.arange(len(state_times))] - / state_time_step - ) + da_sliced = da_sliced.expand_dims(dim={"time": [current_time.values]}) + + da_list.append(da_sliced) - # Add time difference as a new coordinate - da_forcing_matched = da_forcing_matched.assign_coords( - time_diff=("time", time_diff_steps) + # Concatenate the list of DataArrays along the 'time' dimension + da_forcing_boundary_matched = xr.concat(da_list, dim="time") + forcing_time_step = ( + da_forcing_boundary_matched.time.values[1] + - da_forcing_boundary_matched.time.values[0] ) + da_forcing_boundary_matched["window"] = da_forcing_boundary_matched["window"] * ( + forcing_time_step / state_time_step + ) + time_diff_steps = da_forcing_boundary_matched.isel( + grid_index=0, forcing_feature=0 + ).data + else: # For analysis data, match directly using the 'time' coordinate - forcing_times = da_forcing["time"] + forcing_times = da_forcing_boundary["time"] # Compute time differences between forcing and state times # (in multiples of state time steps) # Retrieve the indices of the closest times in the forcing data time_deltas = ( - forcing_times.values[:, np.newaxis] - - state_times.values[np.newaxis, :] + forcing_times.values[:, np.newaxis] - state_times.values[np.newaxis, :] ) / state_time_step idx_min = np.abs(time_deltas).argmin(axis=0) time_diff_steps = np.stack( [ time_deltas[ - idx_i - - self.num_past_forcing_steps : idx_i - + self.num_future_forcing_steps - + 1, + idx_i - num_past_steps : idx_i + num_future_steps + 1, init_steps + step_i, ] for (step_i, idx_i) in enumerate(idx_min[init_steps:]) @@ -392,24 +431,22 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): ) # Create window dimension for forcing data to stack later - window_size = ( - self.num_past_forcing_steps + self.num_future_forcing_steps + 1 - ) - da_forcing_windowed = da_forcing.rolling( - time=window_size, center=True + window_size = num_past_steps + num_future_steps + 1 + da_forcing_boundary_windowed = da_forcing_boundary.rolling( + time=window_size, center=False ).construct(window_dim="window") - da_forcing_matched = da_forcing_windowed.isel( + da_forcing_boundary_matched = da_forcing_boundary_windowed.isel( time=idx_min[init_steps:] ) - # Add time difference as a new coordinate to concatenate to the - # forcing features later - da_forcing_matched["time_diff_steps"] = ( - ("time", "window"), - time_diff_steps, - ) + # Add time difference as a new coordinate to concatenate to the + # forcing features later + da_forcing_boundary_matched["time_diff_steps"] = ( + ("time", "window"), + time_diff_steps, + ) - return da_state_sliced, da_forcing_matched + return da_state_sliced, da_forcing_boundary_matched def _process_windowed_data(self, da_windowed, da_state, da_target_times): """Helper function to process windowed data after standardization.""" @@ -462,23 +499,7 @@ def _build_item_dataarrays(self, idx): da_target_times : xr.DataArray The dataarray for the target times. """ - # handling ensemble data - if self.datastore.is_ensemble: - # for the now the strategy is to only include the first ensemble - # member - # XXX: this could be changed to include all ensemble members by - # splitting `idx` into two parts, one for the analysis time and one - # for the ensemble member and then increasing self.__len__ to - # include all ensemble members - warnings.warn( - "only use of ensemble member 0 (the first member) is " - "implemented for ensemble data" - ) - i_ensemble = 0 - da_state = self.da_state.isel(ensemble_member=i_ensemble) - else: - da_state = self.da_state - + da_state = self.da_state if self.da_forcing is not None: if "ensemble_member" in self.da_forcing.dims: raise NotImplementedError( @@ -500,13 +521,19 @@ def _build_item_dataarrays(self, idx): da_state=da_state, idx=idx, n_steps=self.ar_steps, - da_forcing=da_boundary, + da_forcing_boundary=da_boundary, + num_future_steps=self.num_future_boundary_steps, + num_past_steps=self.num_past_boundary_steps, ) + else: + da_boundary_windowed = None da_state, da_forcing_windowed = self._slice_time( da_state=da_state, idx=idx, n_steps=self.ar_steps, - da_forcing=da_forcing, + da_forcing_boundary=da_forcing, + num_future_steps=self.num_future_forcing_steps, + num_past_steps=self.num_past_forcing_steps, ) # load the data into memory @@ -521,9 +548,7 @@ def _build_item_dataarrays(self, idx): da_target_times = da_target_states.time if self.standardize: - da_init_states = ( - da_init_states - self.da_state_mean - ) / self.da_state_std + da_init_states = (da_init_states - self.da_state_mean) / self.da_state_std da_target_states = ( da_target_states - self.da_state_mean ) / self.da_state_std @@ -595,9 +620,7 @@ def __getitem__(self, idx): tensor_dtype = torch.float32 init_states = torch.tensor(da_init_states.values, dtype=tensor_dtype) - target_states = torch.tensor( - da_target_states.values, dtype=tensor_dtype - ) + target_states = torch.tensor(da_target_states.values, dtype=tensor_dtype) target_times = torch.tensor( da_target_times.astype("datetime64[ns]").astype("int64").values, @@ -707,10 +730,7 @@ def _is_listlike(obj): ) for grid_coord in ["x", "y"]: - if ( - grid_coord in da_datastore_state.coords - and grid_coord not in da.coords - ): + if grid_coord in da_datastore_state.coords and grid_coord not in da.coords: da.coords[grid_coord] = da_datastore_state[grid_coord] if not add_time_as_dim: @@ -731,6 +751,8 @@ def __init__( standardize=True, num_past_forcing_steps=1, num_future_forcing_steps=1, + num_past_boundary_steps=1, + num_future_boundary_steps=1, batch_size=4, num_workers=16, ): @@ -739,6 +761,8 @@ def __init__( self._datastore_boundary = datastore_boundary self.num_past_forcing_steps = num_past_forcing_steps self.num_future_forcing_steps = num_future_forcing_steps + self.num_past_boundary_steps = num_past_boundary_steps + self.num_future_boundary_steps = num_future_boundary_steps self.ar_steps_train = ar_steps_train self.ar_steps_eval = ar_steps_eval self.standardize = standardize @@ -765,6 +789,8 @@ def setup(self, stage=None): standardize=self.standardize, num_past_forcing_steps=self.num_past_forcing_steps, num_future_forcing_steps=self.num_future_forcing_steps, + num_past_boundary_steps=self.num_past_boundary_steps, + num_future_boundary_steps=self.num_future_boundary_steps, ) self.val_dataset = WeatherDataset( datastore=self._datastore, @@ -774,6 +800,8 @@ def setup(self, stage=None): standardize=self.standardize, num_past_forcing_steps=self.num_past_forcing_steps, num_future_forcing_steps=self.num_future_forcing_steps, + num_past_boundary_steps=self.num_past_boundary_steps, + num_future_boundary_steps=self.num_future_boundary_steps, ) if stage == "test" or stage is None: @@ -785,6 +813,8 @@ def setup(self, stage=None): standardize=self.standardize, num_past_forcing_steps=self.num_past_forcing_steps, num_future_forcing_steps=self.num_future_forcing_steps, + num_past_boundary_steps=self.num_past_boundary_steps, + num_future_boundary_steps=self.num_future_boundary_steps, ) def train_dataloader(self): From dcc0b46861ff1263c688301eca265bd62803616f Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:45:35 +0100 Subject: [PATCH 050/190] datastore_boundars=None introduced --- .../datastore/npyfilesmeps/compute_standardization_stats.py | 1 + 1 file changed, 1 insertion(+) diff --git a/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py b/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py index f2c80e8a..4207812f 100644 --- a/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py +++ b/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py @@ -172,6 +172,7 @@ def main( ar_steps = 63 ds = WeatherDataset( datastore=datastore, + datastore_boundary=None, split="train", ar_steps=ar_steps, standardize=False, From a3b3bde9ed1b044b32afde7e4b12bc8e4a1593e6 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:46:02 +0100 Subject: [PATCH 051/190] bug fix for file retrieval per member --- neural_lam/datastore/npyfilesmeps/store.py | 51 +++++++++------------- 1 file changed, 20 insertions(+), 31 deletions(-) diff --git a/neural_lam/datastore/npyfilesmeps/store.py b/neural_lam/datastore/npyfilesmeps/store.py index 146b0627..7ee583be 100644 --- a/neural_lam/datastore/npyfilesmeps/store.py +++ b/neural_lam/datastore/npyfilesmeps/store.py @@ -244,9 +244,7 @@ def get_dataarray(self, category: str, split: str) -> DataArray: # them separately features = ["toa_downwelling_shortwave_flux", "open_water_fraction"] das = [ - self._get_single_timeseries_dataarray( - features=[feature], split=split - ) + self._get_single_timeseries_dataarray(features=[feature], split=split) for feature in features ] da = xr.concat(das, dim="feature") @@ -259,9 +257,9 @@ def get_dataarray(self, category: str, split: str) -> DataArray: # variable is turned into a dask array and so execution of the # calculation is delayed until the feature values are actually # used. - da_forecast_time = ( - da.analysis_time + da.elapsed_forecast_duration - ).chunk({"elapsed_forecast_duration": 1}) + da_forecast_time = (da.analysis_time + da.elapsed_forecast_duration).chunk( + {"elapsed_forecast_duration": 1} + ) da_datetime_forcing_features = self._calc_datetime_forcing_features( da_time=da_forecast_time ) @@ -339,10 +337,7 @@ def _get_single_timeseries_dataarray( for all categories of data """ - if ( - set(features).difference(self.get_vars_names(category="static")) - == set() - ): + if set(features).difference(self.get_vars_names(category="static")) == set(): assert split in ( "train", "val", @@ -356,12 +351,8 @@ def _get_single_timeseries_dataarray( "test", ), f"Unknown dataset split {split} for features {features}" - if member is not None and features != self.get_vars_names( - category="state" - ): - raise ValueError( - "Member can only be specified for the 'state' category" - ) + if member is not None and features != self.get_vars_names(category="state"): + raise ValueError("Member can only be specified for the 'state' category") concat_axis = 0 @@ -377,9 +368,7 @@ def _get_single_timeseries_dataarray( fp_samples = self.root_path / "samples" / split if self._remove_state_features_with_index: n_to_drop = len(self._remove_state_features_with_index) - feature_dim_mask = np.ones( - len(features) + n_to_drop, dtype=bool - ) + feature_dim_mask = np.ones(len(features) + n_to_drop, dtype=bool) feature_dim_mask[self._remove_state_features_with_index] = False elif features == ["toa_downwelling_shortwave_flux"]: filename_format = TOA_SW_DOWN_FLUX_FILENAME_FORMAT @@ -445,7 +434,7 @@ def _get_single_timeseries_dataarray( * np.timedelta64(1, "h") ) elif d == "analysis_time": - coord_values = self._get_analysis_times(split=split) + coord_values = self._get_analysis_times(split=split, member_id=member) elif d == "y": coord_values = y elif d == "x": @@ -464,9 +453,7 @@ def _get_single_timeseries_dataarray( if features_vary_with_analysis_time: filepaths = [ fp_samples - / filename_format.format( - analysis_time=analysis_time, **file_params - ) + / filename_format.format(analysis_time=analysis_time, **file_params) for analysis_time in coords["analysis_time"] ] else: @@ -505,7 +492,7 @@ def _get_single_timeseries_dataarray( return da - def _get_analysis_times(self, split) -> List[np.datetime64]: + def _get_analysis_times(self, split, member_id) -> List[np.datetime64]: """Get the analysis times for the given split by parsing the filenames of all the files found for the given split. @@ -513,6 +500,8 @@ def _get_analysis_times(self, split) -> List[np.datetime64]: ---------- split : str The dataset split to get the analysis times for. + member_id : int + The ensemble member to get the analysis times for. Returns ------- @@ -520,8 +509,12 @@ def _get_analysis_times(self, split) -> List[np.datetime64]: The analysis times for the given split. """ + if member_id is None: + # Only interior state data files have member_id, to avoid duplicates + # we only look at the first member for all other categories + member_id = 0 pattern = re.sub(r"{analysis_time:[^}]*}", "*", STATE_FILENAME_FORMAT) - pattern = re.sub(r"{member_id:[^}]*}", "*", pattern) + pattern = re.sub(r"{member_id:[^}]*}", f"{member_id:03d}", pattern) sample_dir = self.root_path / "samples" / split sample_files = sample_dir.glob(pattern) @@ -531,9 +524,7 @@ def _get_analysis_times(self, split) -> List[np.datetime64]: times.append(name_parts["analysis_time"]) if len(times) == 0: - raise ValueError( - f"No files found in {sample_dir} with pattern {pattern}" - ) + raise ValueError(f"No files found in {sample_dir} with pattern {pattern}") return times @@ -690,9 +681,7 @@ def get_standardization_dataarray(self, category: str) -> xr.Dataset: """ def load_pickled_tensor(fn): - return torch.load( - self.root_path / "static" / fn, weights_only=True - ).numpy() + return torch.load(self.root_path / "static" / fn, weights_only=True).numpy() mean_diff_values = None std_diff_values = None From 3ffc413e2f669dafd4c745a50b9b723fff231316 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:46:17 +0100 Subject: [PATCH 052/190] rename datastore for tests --- tests/conftest.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index be5cf3e7..90a86d0d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -94,14 +94,14 @@ def download_meps_example_reduced_dataset(): dummydata=None, ) -DATASTORES_BOUNDARY_EXAMPLES = dict( - mdp=( +DATASTORES_BOUNDARY_EXAMPLES = { + "mdp": ( DATASTORE_EXAMPLES_ROOT_PATH / "mdp" - / "era5_1000hPa_winds" + / "era5_1000hPa_danra_100m_winds" / "era5.datastore.yaml" - ) -) + ), +} DATASTORES[DummyDatastore.SHORT_NAME] = DummyDatastore From 85aad66c8e9eec4e0b4e95cabb753d8492a0c49a Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:46:31 +0100 Subject: [PATCH 053/190] aligned time with danra for easier boundary testing --- tests/dummy_datastore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/dummy_datastore.py b/tests/dummy_datastore.py index d62c7356..a958b8f5 100644 --- a/tests/dummy_datastore.py +++ b/tests/dummy_datastore.py @@ -28,7 +28,7 @@ class DummyDatastore(BaseRegularGridDatastore): """ SHORT_NAME = "dummydata" - T0 = isodate.parse_datetime("2021-01-01T00:00:00") + T0 = isodate.parse_datetime("1990-09-02T00:00:00") N_FEATURES = dict(state=5, forcing=2, static=1) CARTESIAN_COORDS = ["x", "y"] From 64f057f78b713e39496abfc3962affa794666369 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:46:50 +0100 Subject: [PATCH 054/190] Fixed test for temporal embedding --- tests/test_time_slicing.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/tests/test_time_slicing.py b/tests/test_time_slicing.py index 29161505..2f5ed96c 100644 --- a/tests/test_time_slicing.py +++ b/tests/test_time_slicing.py @@ -40,9 +40,7 @@ def get_dataarray(self, category, split): if self.is_forecast: raise NotImplementedError() else: - da = xr.DataArray( - values, dims=["time"], coords={"time": self._time_values} - ) + da = xr.DataArray(values, dims=["time"], coords={"time": self._time_values}) # add `{category}_feature` and `grid_index` dimensions da = da.expand_dims("grid_index") @@ -78,10 +76,8 @@ def get_vars_long_names(self, category): def test_time_slicing_analysis( ar_steps, num_past_forcing_steps, num_future_forcing_steps ): - # state and forcing variables have only on dimension, `time` - time_values = np.datetime64("2020-01-01") + np.arange( - len(ANALYSIS_STATE_VALUES) - ) + # state and forcing variables have only one dimension, `time` + time_values = np.datetime64("2020-01-01") + np.arange(len(ANALYSIS_STATE_VALUES)) assert len(ANALYSIS_STATE_VALUES) == len(FORCING_VALUES) == len(time_values) datastore = SinglePointDummyDatastore( @@ -93,6 +89,7 @@ def test_time_slicing_analysis( dataset = WeatherDataset( datastore=datastore, + datastore_boundary=None, ar_steps=ar_steps, num_future_forcing_steps=num_future_forcing_steps, num_past_forcing_steps=num_past_forcing_steps, @@ -101,9 +98,7 @@ def test_time_slicing_analysis( sample = dataset[0] - init_states, target_states, forcing, _ = [ - tensor.numpy() for tensor in sample - ] + init_states, target_states, forcing, _, _ = [tensor.numpy() for tensor in sample] expected_init_states = [0, 1] if ar_steps == 3: @@ -130,7 +125,7 @@ def test_time_slicing_analysis( # init_states: (2, N_grid, d_features) # target_states: (ar_steps, N_grid, d_features) - # forcing: (ar_steps, N_grid, d_windowed_forcing) + # forcing: (ar_steps, N_grid, d_windowed_forcing * 2) # target_times: (ar_steps,) assert init_states.shape == (2, 1, 1) assert init_states[:, 0, 0].tolist() == expected_init_states @@ -141,6 +136,10 @@ def test_time_slicing_analysis( assert forcing.shape == ( 3, 1, - 1 + num_past_forcing_steps + num_future_forcing_steps, + # Factor 2 because each window step has a temporal embedding + (1 + num_past_forcing_steps + num_future_forcing_steps) * 2, + ) + np.testing.assert_equal( + forcing[:, 0, : num_past_forcing_steps + num_future_forcing_steps + 1], + np.array(expected_forcing_values), ) - np.testing.assert_equal(forcing[:, 0, :], np.array(expected_forcing_values)) From 6205dbd88f1b208118d93da6d12c0a1be672caef Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Mon, 2 Dec 2024 10:26:54 +0100 Subject: [PATCH 055/190] pin dataclass-wizard <0.31.0 to avoid bug in dataclass-wizard --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f0bc0851..fdcb7f3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ "torch>=2.3.0", "torch-geometric==2.3.1", "parse>=1.20.2", - "dataclass-wizard>=0.22.3", + "dataclass-wizard<0.31.0", "mllam-data-prep>=0.5.0", ] requires-python = ">=3.9" From 551cd267235a82378ab28f2b1a4db90523d87ea8 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 2 Dec 2024 10:40:48 +0100 Subject: [PATCH 056/190] allow boundary as input to ar_model.common_step --- neural_lam/models/ar_model.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 4ab73cc7..4a08306d 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -107,7 +107,9 @@ def __init__( self.grid_dim = ( 2 * self.grid_output_dim + grid_static_dim - + num_forcing_vars + # Factor 2 because of temporal embedding or windowed features + + 2 + * num_forcing_vars * (num_past_forcing_steps + num_future_forcing_steps + 1) ) @@ -200,19 +202,20 @@ def unroll_prediction(self, init_states, forcing_features, true_states): def common_step(self, batch): """ - Predict on single batch batch consists of: init_states: (B, 2, - num_grid_nodes, d_features) target_states: (B, pred_steps, - num_grid_nodes, d_features) forcing_features: (B, pred_steps, - num_grid_nodes, d_forcing), - where index 0 corresponds to index 1 of init_states + Predict on single batch batch consists of: + init_states: (B, 2,num_grid_nodes, d_features) + target_states: (B, pred_steps,num_grid_nodes, d_features) + forcing_features: (B, pred_steps,num_grid_nodes, d_forcing) + boundary_features: (B, pred_steps,num_grid_nodes, d_boundaries) + batch_times: (B, pred_steps) """ - (init_states, target_states, forcing_features, batch_times) = batch + (init_states, target_states, forcing_features, _, batch_times) = batch prediction, pred_std = self.unroll_prediction( init_states, forcing_features, target_states - ) # (B, pred_steps, num_grid_nodes, d_f) - # prediction: (B, pred_steps, num_grid_nodes, d_f) pred_std: (B, - # pred_steps, num_grid_nodes, d_f) or (d_f,) + ) + # prediction: (B, pred_steps, num_grid_nodes, d_f) + # pred_std: (B, pred_steps, num_grid_nodes, d_f) or (d_f,) return prediction, target_states, pred_std, batch_times From fc95350a28cbdb81419962b203e0bb08e36520dd Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 2 Dec 2024 10:40:56 +0100 Subject: [PATCH 057/190] linting --- neural_lam/datastore/npyfilesmeps/store.py | 43 ++++++++---- neural_lam/weather_dataset.py | 66 ++++++++++++------- .../era5.datastore.yaml | 2 +- tests/test_time_slicing.py | 12 +++- tests/test_training.py | 17 ++--- 5 files changed, 91 insertions(+), 49 deletions(-) diff --git a/neural_lam/datastore/npyfilesmeps/store.py b/neural_lam/datastore/npyfilesmeps/store.py index 7ee583be..24349e7e 100644 --- a/neural_lam/datastore/npyfilesmeps/store.py +++ b/neural_lam/datastore/npyfilesmeps/store.py @@ -244,7 +244,9 @@ def get_dataarray(self, category: str, split: str) -> DataArray: # them separately features = ["toa_downwelling_shortwave_flux", "open_water_fraction"] das = [ - self._get_single_timeseries_dataarray(features=[feature], split=split) + self._get_single_timeseries_dataarray( + features=[feature], split=split + ) for feature in features ] da = xr.concat(das, dim="feature") @@ -257,9 +259,9 @@ def get_dataarray(self, category: str, split: str) -> DataArray: # variable is turned into a dask array and so execution of the # calculation is delayed until the feature values are actually # used. - da_forecast_time = (da.analysis_time + da.elapsed_forecast_duration).chunk( - {"elapsed_forecast_duration": 1} - ) + da_forecast_time = ( + da.analysis_time + da.elapsed_forecast_duration + ).chunk({"elapsed_forecast_duration": 1}) da_datetime_forcing_features = self._calc_datetime_forcing_features( da_time=da_forecast_time ) @@ -337,7 +339,10 @@ def _get_single_timeseries_dataarray( for all categories of data """ - if set(features).difference(self.get_vars_names(category="static")) == set(): + if ( + set(features).difference(self.get_vars_names(category="static")) + == set() + ): assert split in ( "train", "val", @@ -351,8 +356,12 @@ def _get_single_timeseries_dataarray( "test", ), f"Unknown dataset split {split} for features {features}" - if member is not None and features != self.get_vars_names(category="state"): - raise ValueError("Member can only be specified for the 'state' category") + if member is not None and features != self.get_vars_names( + category="state" + ): + raise ValueError( + "Member can only be specified for the 'state' category" + ) concat_axis = 0 @@ -368,7 +377,9 @@ def _get_single_timeseries_dataarray( fp_samples = self.root_path / "samples" / split if self._remove_state_features_with_index: n_to_drop = len(self._remove_state_features_with_index) - feature_dim_mask = np.ones(len(features) + n_to_drop, dtype=bool) + feature_dim_mask = np.ones( + len(features) + n_to_drop, dtype=bool + ) feature_dim_mask[self._remove_state_features_with_index] = False elif features == ["toa_downwelling_shortwave_flux"]: filename_format = TOA_SW_DOWN_FLUX_FILENAME_FORMAT @@ -434,7 +445,9 @@ def _get_single_timeseries_dataarray( * np.timedelta64(1, "h") ) elif d == "analysis_time": - coord_values = self._get_analysis_times(split=split, member_id=member) + coord_values = self._get_analysis_times( + split=split, member_id=member + ) elif d == "y": coord_values = y elif d == "x": @@ -453,7 +466,9 @@ def _get_single_timeseries_dataarray( if features_vary_with_analysis_time: filepaths = [ fp_samples - / filename_format.format(analysis_time=analysis_time, **file_params) + / filename_format.format( + analysis_time=analysis_time, **file_params + ) for analysis_time in coords["analysis_time"] ] else: @@ -524,7 +539,9 @@ def _get_analysis_times(self, split, member_id) -> List[np.datetime64]: times.append(name_parts["analysis_time"]) if len(times) == 0: - raise ValueError(f"No files found in {sample_dir} with pattern {pattern}") + raise ValueError( + f"No files found in {sample_dir} with pattern {pattern}" + ) return times @@ -681,7 +698,9 @@ def get_standardization_dataarray(self, category: str) -> xr.Dataset: """ def load_pickled_tensor(fn): - return torch.load(self.root_path / "static" / fn, weights_only=True).numpy() + return torch.load( + self.root_path / "static" / fn, weights_only=True + ).numpy() mean_diff_values = None std_diff_values = None diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index b717c40a..b3d86292 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -41,13 +41,13 @@ class WeatherDataset(torch.utils.data.Dataset): num_past_boundary_steps: int, optional Number of past time steps to include in boundary input. If set to i, boundary from times t-i, t-i+1, ..., t-1, t (and potentially beyond, - given num_future_forcing_steps) are included as boundary inputs at time t - Default is 1. + given num_future_forcing_steps) are included as boundary inputs at time + t Default is 1. num_future_boundary_steps: int, optional Number of future time steps to include in boundary input. If set to j, - boundary from times t, t+1, ..., t+j-1, t+j (and potentially times before - t, given num_past_forcing_steps) are included as boundary inputs at time - t. Default is 1. + boundary from times t, t+1, ..., t+j-1, t+j (and potentially times + before t, given num_past_forcing_steps) are included as boundary inputs + at time t. Default is 1. standardize : bool, optional Whether to standardize the data. Default is True. """ @@ -75,7 +75,9 @@ def __init__( self.num_past_boundary_steps = num_past_boundary_steps self.num_future_boundary_steps = num_future_boundary_steps - self.da_state = self.datastore.get_dataarray(category="state", split=self.split) + self.da_state = self.datastore.get_dataarray( + category="state", split=self.split + ) if self.da_state is None: raise ValueError( "A non-empty state dataarray must be provided. " @@ -112,7 +114,9 @@ def __init__( parts["forcing"] = self.da_forcing for part, da in parts.items(): - expected_dim_order = self.datastore.expected_dim_order(category=part) + expected_dim_order = self.datastore.expected_dim_order( + category=part + ) if da.dims != expected_dim_order: raise ValueError( f"The dimension order of the `{part}` data ({da.dims}) " @@ -188,10 +192,12 @@ def get_time_step(times): # Calculate required bounds for boundary using its time step boundary_required_time_min = ( - state_time_min - self.num_past_forcing_steps * boundary_time_step + state_time_min + - self.num_past_forcing_steps * boundary_time_step ) boundary_required_time_max = ( - state_time_max + self.num_future_forcing_steps * boundary_time_step + state_time_max + + self.num_future_forcing_steps * boundary_time_step ) if boundary_time_min > boundary_required_time_min: @@ -220,8 +226,10 @@ def get_time_step(times): self.da_state_std = self.ds_state_stats.state_std if self.da_forcing is not None: - self.ds_forcing_stats = self.datastore.get_standardization_dataarray( - category="forcing" + self.ds_forcing_stats = ( + self.datastore.get_standardization_dataarray( + category="forcing" + ) ) self.da_forcing_mean = self.ds_forcing_stats.forcing_mean self.da_forcing_std = self.ds_forcing_stats.forcing_std @@ -378,7 +386,9 @@ def _slice_time( current_time = ( da_forcing_boundary.analysis_time[idx] - + da_forcing_boundary.elapsed_forecast_duration[offset + step] + + da_forcing_boundary.elapsed_forecast_duration[ + offset + step + ] ) da_sliced = da_forcing_boundary.isel( @@ -386,12 +396,16 @@ def _slice_time( elapsed_forecast_duration=slice(start_idx, end_idx + 1), ) - da_sliced = da_sliced.rename({"elapsed_forecast_duration": "window"}) + da_sliced = da_sliced.rename( + {"elapsed_forecast_duration": "window"} + ) da_sliced = da_sliced.assign_coords( window=np.arange(-num_past_steps, num_future_steps + 1) ) - da_sliced = da_sliced.expand_dims(dim={"time": [current_time.values]}) + da_sliced = da_sliced.expand_dims( + dim={"time": [current_time.values]} + ) da_list.append(da_sliced) @@ -401,13 +415,13 @@ def _slice_time( da_forcing_boundary_matched.time.values[1] - da_forcing_boundary_matched.time.values[0] ) - da_forcing_boundary_matched["window"] = da_forcing_boundary_matched["window"] * ( - forcing_time_step / state_time_step - ) + da_forcing_boundary_matched["window"] = da_forcing_boundary_matched[ + "window" + ] * (forcing_time_step / state_time_step) time_diff_steps = da_forcing_boundary_matched.isel( grid_index=0, forcing_feature=0 ).data - + else: # For analysis data, match directly using the 'time' coordinate forcing_times = da_forcing_boundary["time"] @@ -416,7 +430,8 @@ def _slice_time( # (in multiples of state time steps) # Retrieve the indices of the closest times in the forcing data time_deltas = ( - forcing_times.values[:, np.newaxis] - state_times.values[np.newaxis, :] + forcing_times.values[:, np.newaxis] + - state_times.values[np.newaxis, :] ) / state_time_step idx_min = np.abs(time_deltas).argmin(axis=0) @@ -548,7 +563,9 @@ def _build_item_dataarrays(self, idx): da_target_times = da_target_states.time if self.standardize: - da_init_states = (da_init_states - self.da_state_mean) / self.da_state_std + da_init_states = ( + da_init_states - self.da_state_mean + ) / self.da_state_std da_target_states = ( da_target_states - self.da_state_mean ) / self.da_state_std @@ -620,7 +637,9 @@ def __getitem__(self, idx): tensor_dtype = torch.float32 init_states = torch.tensor(da_init_states.values, dtype=tensor_dtype) - target_states = torch.tensor(da_target_states.values, dtype=tensor_dtype) + target_states = torch.tensor( + da_target_states.values, dtype=tensor_dtype + ) target_times = torch.tensor( da_target_times.astype("datetime64[ns]").astype("int64").values, @@ -730,7 +749,10 @@ def _is_listlike(obj): ) for grid_coord in ["x", "y"]: - if grid_coord in da_datastore_state.coords and grid_coord not in da.coords: + if ( + grid_coord in da_datastore_state.coords + and grid_coord not in da.coords + ): da.coords[grid_coord] = da_datastore_state[grid_coord] if not add_time_as_dim: diff --git a/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml index c97da4bc..7c5ffb3b 100644 --- a/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml +++ b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml @@ -25,7 +25,7 @@ output: end: 2022-09-30T00:00 test: start: 1990-09-01T00:00 - end: 2022-09-30T00:00 + end: 2022-09-30T00:00 inputs: era_height_levels: diff --git a/tests/test_time_slicing.py b/tests/test_time_slicing.py index 2f5ed96c..4a59c81e 100644 --- a/tests/test_time_slicing.py +++ b/tests/test_time_slicing.py @@ -40,7 +40,9 @@ def get_dataarray(self, category, split): if self.is_forecast: raise NotImplementedError() else: - da = xr.DataArray(values, dims=["time"], coords={"time": self._time_values}) + da = xr.DataArray( + values, dims=["time"], coords={"time": self._time_values} + ) # add `{category}_feature` and `grid_index` dimensions da = da.expand_dims("grid_index") @@ -77,7 +79,9 @@ def test_time_slicing_analysis( ar_steps, num_past_forcing_steps, num_future_forcing_steps ): # state and forcing variables have only one dimension, `time` - time_values = np.datetime64("2020-01-01") + np.arange(len(ANALYSIS_STATE_VALUES)) + time_values = np.datetime64("2020-01-01") + np.arange( + len(ANALYSIS_STATE_VALUES) + ) assert len(ANALYSIS_STATE_VALUES) == len(FORCING_VALUES) == len(time_values) datastore = SinglePointDummyDatastore( @@ -98,7 +102,9 @@ def test_time_slicing_analysis( sample = dataset[0] - init_states, target_states, forcing, _, _ = [tensor.numpy() for tensor in sample] + init_states, target_states, forcing, _, _ = [ + tensor.numpy() for tensor in sample + ] expected_init_states = [0, 1] if ar_steps == 3: diff --git a/tests/test_training.py b/tests/test_training.py index 28566a4b..7a1b4717 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -5,6 +5,7 @@ import pytest import pytorch_lightning as pl import torch + import wandb # First-party @@ -22,14 +23,10 @@ @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -@pytest.mark.parametrize( - "datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys() -) +@pytest.mark.parametrize("datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys()) def test_training(datastore_name, datastore_boundary_name): datastore = init_datastore_example(datastore_name) - datastore_boundary = init_datastore_boundary_example( - datastore_boundary_name - ) + datastore_boundary = init_datastore_boundary_example(datastore_boundary_name) if not isinstance(datastore, BaseRegularGridDatastore): pytest.skip( @@ -38,15 +35,13 @@ def test_training(datastore_name, datastore_boundary_name): ) if not isinstance(datastore_boundary, BaseRegularGridDatastore): pytest.skip( - f"Skipping test for {datastore_boundary_name} as it is not a regular " - "grid datastore." + f"Skipping test for {datastore_boundary_name} as it is not a " + "regular grid datastore." ) if torch.cuda.is_available(): device_name = "cuda" - torch.set_float32_matmul_precision( - "high" - ) # Allows using Tensor Cores on A100s + torch.set_float32_matmul_precision("high") # Allows using Tensor Cores on A100s else: device_name = "cpu" From 01fa807bc5ce47270e3b4568db8df8ce3b436953 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 2 Dec 2024 12:10:29 +0100 Subject: [PATCH 058/190] improved docstrings and added some assertions --- neural_lam/weather_dataset.py | 105 ++++++++++++++++++++++++++-------- 1 file changed, 82 insertions(+), 23 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index b3d86292..991965d9 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -143,7 +143,13 @@ def __init__( self.da_state = self.da_state def get_time_step(times): - """Calculate the time step from the data""" + """Calculate the time step from the data + + Parameters + ---------- + times : xr.DataArray + The time dataarray to calculate the time step from. + """ time_diffs = np.diff(times) if not np.all(time_diffs == time_diffs[0]): raise ValueError( @@ -234,6 +240,7 @@ def get_time_step(times): self.da_forcing_mean = self.ds_forcing_stats.forcing_mean self.da_forcing_std = self.ds_forcing_stats.forcing_std + # XXX: Again, the boundary data is considered forcing data for now if self.da_boundary is not None: self.ds_boundary_stats = ( self.datastore_boundary.get_standardization_dataarray( @@ -305,7 +312,7 @@ def _slice_time( is performed based on the state times. Additionally, the time difference between the matched forcing/boundary times and state times (in multiples of state time steps) is added to the forcing dataarray. This will be - used as an additional feature in the model (temporal embedding). + used as an additional input feature in the model (temporal embedding). Parameters ---------- @@ -333,23 +340,26 @@ def _slice_time( da_forcing_boundary_matched : xr.DataArray The sliced state dataarray with dims ('time', 'grid_index', 'forcing/boundary_feature_windowed'). + If no forcing/boundary data is provided, this will be `None`. """ - # Number of initial steps required (e.g., for initializing models) + # The current implementation requires at least 2 time steps for the + # initial state (see GraphCast). init_steps = 2 - - # Slice the state data as before + # slice the dataarray to include the required number of time steps if self.datastore.is_forecast: - # Calculate start and end indices for slicing - start_idx = max(0, num_past_steps - init_steps) - end_idx = max(init_steps, num_past_steps) + n_steps - - # Slice the state data over the elapsed forecast duration + start_idx = max(0, self.num_past_forcing_steps - init_steps) + end_idx = max(init_steps, self.num_past_forcing_steps) + n_steps + # this implies that the data will have both `analysis_time` and + # `elapsed_forecast_duration` dimensions for forecasts. We for now + # simply select a analysis time and the first `n_steps` forecast + # times (given no offset). Note that this means that we get one + # sample per forecast, always starting at forecast time 2. da_state_sliced = da_state.isel( analysis_time=idx, elapsed_forecast_duration=slice(start_idx, end_idx), ) - - # Create a new 'time' dimension + # create a new time dimension so that the produced sample has a + # `time` dimension, similarly to the analysis only data da_state_sliced["time"] = ( da_state_sliced.analysis_time + da_state_sliced.elapsed_forecast_duration @@ -357,9 +367,13 @@ def _slice_time( da_state_sliced = da_state_sliced.swap_dims( {"elapsed_forecast_duration": "time"} ) + # Asserting that the forecast time step is consistent + self.get_time_step(da_state_sliced.time) else: - # For analysis data, slice the time dimension directly + # For analysis data we slice the time dimension directly. The offset + # is only relevant for the very first (and last) samples in the + # dataset. start_idx = idx + max(0, num_past_steps - init_steps) end_idx = idx + max(init_steps, num_past_steps) + n_steps da_state_sliced = da_state.isel(time=slice(start_idx, end_idx)) @@ -372,7 +386,13 @@ def _slice_time( state_times = da_state_sliced["time"] state_time_step = state_times.values[1] - state_times.values[0] + # Here we cannot check 'self.datastore.is_forecast' directly because we + # might be dealing with a datastore_boundary if "analysis_time" in da_forcing_boundary.dims: + # Select the closest analysis time in the forcing/boundary data + # This is mostly relevant for boundary data where the time steps + # are not necessarily the same as the state data. But still fast + # enough for forcing data where the time steps are the same. idx = np.abs( da_forcing_boundary.analysis_time.values - self.da_state.analysis_time.values[idx] @@ -399,6 +419,8 @@ def _slice_time( da_sliced = da_sliced.rename( {"elapsed_forecast_duration": "window"} ) + + # Assign the 'window' coordinate to be relative positions da_sliced = da_sliced.assign_coords( window=np.arange(-num_past_steps, num_future_steps + 1) ) @@ -409,7 +431,10 @@ def _slice_time( da_list.append(da_sliced) - # Concatenate the list of DataArrays along the 'time' dimension + # Generate temporal embedding `time_diff_steps` for the + # forcing/boundary data. This is the time difference in multiples + # of state time steps between the forcing/boundary time and the + # state time. da_forcing_boundary_matched = xr.concat(da_list, dim="time") forcing_time_step = ( da_forcing_boundary_matched.time.values[1] @@ -423,7 +448,9 @@ def _slice_time( ).data else: - # For analysis data, match directly using the 'time' coordinate + # For analysis data, we slice the time dimension directly. The + # offset is only relevant for the very first (and last) samples in + # the dataset. forcing_times = da_forcing_boundary["time"] # Compute time differences between forcing and state times @@ -455,7 +482,7 @@ def _slice_time( ) # Add time difference as a new coordinate to concatenate to the - # forcing features later + # forcing features later as temporal embedding da_forcing_boundary_matched["time_diff_steps"] = ( ("time", "window"), time_diff_steps, @@ -464,7 +491,26 @@ def _slice_time( return da_state_sliced, da_forcing_boundary_matched def _process_windowed_data(self, da_windowed, da_state, da_target_times): - """Helper function to process windowed data after standardization.""" + """Helper function to process windowed data. This function stacks the + 'forcing_feature' and 'window' dimensions and adds the time step + differences to the existing features as a temporal embedding. + + Parameters + ---------- + da_windowed : xr.DataArray + The windowed data to process. Can be `None` if no data is provided. + da_state : xr.DataArray + The state dataarray. + da_target_times : xr.DataArray + The target times. + + Returns + ------- + da_windowed : xr.DataArray + The processed windowed data. If `da_windowed` is `None`, an empty + DataArray with the correct dimensions and coordinates is returned. + + """ stacked_dim = "forcing_feature_windowed" if da_windowed is not None: # Stack the 'feature' and 'window' dimensions and add the @@ -492,8 +538,8 @@ def _process_windowed_data(self, da_windowed, da_state, da_target_times): def _build_item_dataarrays(self, idx): """ - Create the dataarrays for the initial states, target states and forcing - data for the sample at index `idx`. + Create the dataarrays for the initial states, target states, forcing + and boundary data for the sample at index `idx`. Parameters ---------- @@ -529,7 +575,7 @@ def _build_item_dataarrays(self, idx): else: da_boundary = None - # if da_forcing is None, the function will return None for + # if da_forcing_boundary is None, the function will return None for # da_forcing_windowed if da_boundary is not None: _, da_boundary_windowed = self._slice_time( @@ -542,6 +588,9 @@ def _build_item_dataarrays(self, idx): ) else: da_boundary_windowed = None + # XXX: Currently, the order of the `slice_time` calls is important + # as `da_state` is modified in the second call. This should be + # refactored to be more robust. da_state, da_forcing_windowed = self._slice_time( da_state=da_state, idx=idx, @@ -584,6 +633,10 @@ def _build_item_dataarrays(self, idx): da_boundary_windowed - self.da_boundary_mean ) / self.da_boundary_std + # This function handles the stacking of the forcing and boundary data + # and adds the time step differences as a temporal embedding. + # It can handle `None` inputs for the forcing and boundary data + # (and simlpy return an empty DataArray in that case). da_forcing_windowed = self._process_windowed_data( da_forcing_windowed, da_state, da_target_times ) @@ -655,6 +708,11 @@ def __getitem__(self, idx): # boundary: (ar_steps, N_grid, d_windowed_boundary) # target_times: (ar_steps,) + # Assert that the boundary data is an empty tensor if the corresponding + # datastore_boundary is `None` + if self.datastore_boundary is None: + assert boundary.numel() == 0 + return init_states, target_states, forcing, boundary, target_times def __iter__(self): @@ -794,9 +852,10 @@ def __init__( self.val_dataset = None self.test_dataset = None if num_workers > 0: - # BUG: There also seem to be issues with "spawn", to be investigated - # default to spawn for now, as the default on linux "fork" hangs - # when using dask (which the npyfilesmeps datastore uses) + # BUG: There also seem to be issues with "spawn" and `gloo`, to be + # investigated. Defaults to spawn for now, as the default on linux + # "fork" hangs when using dask (which the npyfilesmeps datastore + # uses) self.multiprocessing_context = "spawn" else: self.multiprocessing_context = None From 5a749f3ab55d79ce27ebe5bf439815d0cbf78093 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 2 Dec 2024 12:10:42 +0100 Subject: [PATCH 059/190] update mdp dependency --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5bbe4d92..ef75c8d4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "parse>=1.20.2", "dataclass-wizard>=0.22.3", "gcsfs>=2021.10.0", - "mllam-data-prep @ git+https://github.com/leifdenby/mllam-data-prep@temp/for-neural-lam-datastores", + "mllam-data-prep>=0.5.0", ] requires-python = ">=3.9" From 45ba60782066cfc94d621f07119f23266556a374 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 2 Dec 2024 12:11:32 +0100 Subject: [PATCH 060/190] remove boundary datastore from tests that don't need it --- tests/test_datasets.py | 17 ++--------------- tests/test_training.py | 13 +++++++++---- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 5fbe4a5d..063ec147 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -108,37 +108,24 @@ def test_dataset_item_shapes(datastore_name, datastore_boundary_name): # try to get the last item of the dataset to ensure slicing and stacking # operations are working as expected and are consistent with the dataset # length - dataset[len(dataset) - 1] @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -@pytest.mark.parametrize( - "datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys() -) -def test_dataset_item_create_dataarray_from_tensor( - datastore_name, datastore_boundary_name -): +def test_dataset_item_create_dataarray_from_tensor(datastore_name): datastore = init_datastore_example(datastore_name) - datastore_boundary = init_datastore_boundary_example( - datastore_boundary_name - ) N_pred_steps = 4 num_past_forcing_steps = 1 num_future_forcing_steps = 1 - num_past_boundary_steps = 1 - num_future_boundary_steps = 1 dataset = WeatherDataset( datastore=datastore, - datastore_boundary=datastore_boundary, + datastore_boundary=None, split="train", ar_steps=N_pred_steps, num_past_forcing_steps=num_past_forcing_steps, num_future_forcing_steps=num_future_forcing_steps, - num_past_boundary_steps=num_past_boundary_steps, - num_future_boundary_steps=num_future_boundary_steps, ) idx = 0 diff --git a/tests/test_training.py b/tests/test_training.py index 7a1b4717..ca0ebf41 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -5,7 +5,6 @@ import pytest import pytorch_lightning as pl import torch - import wandb # First-party @@ -23,10 +22,14 @@ @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -@pytest.mark.parametrize("datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys()) +@pytest.mark.parametrize( + "datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys() +) def test_training(datastore_name, datastore_boundary_name): datastore = init_datastore_example(datastore_name) - datastore_boundary = init_datastore_boundary_example(datastore_boundary_name) + datastore_boundary = init_datastore_boundary_example( + datastore_boundary_name + ) if not isinstance(datastore, BaseRegularGridDatastore): pytest.skip( @@ -41,7 +44,9 @@ def test_training(datastore_name, datastore_boundary_name): if torch.cuda.is_available(): device_name = "cuda" - torch.set_float32_matmul_precision("high") # Allows using Tensor Cores on A100s + torch.set_float32_matmul_precision( + "high" + ) # Allows using Tensor Cores on A100s else: device_name = "cpu" From f36f36040dcbfa40380880d4cc9fa03f6632da43 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 2 Dec 2024 12:42:43 +0100 Subject: [PATCH 061/190] fix scope of _get_slice_time --- neural_lam/weather_dataset.py | 40 ++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 991965d9..4bc9d5c7 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -142,28 +142,14 @@ def __init__( else: self.da_state = self.da_state - def get_time_step(times): - """Calculate the time step from the data - - Parameters - ---------- - times : xr.DataArray - The time dataarray to calculate the time step from. - """ - time_diffs = np.diff(times) - if not np.all(time_diffs == time_diffs[0]): - raise ValueError( - "Inconsistent time steps in data. " - f"Found different time steps: {np.unique(time_diffs)}" - ) - return time_diffs[0] + # Check time step consistency in state data if self.datastore.is_forecast: state_times = self.da_state.analysis_time else: state_times = self.da_state.time - _ = get_time_step(state_times) + _ = self._get_time_step(state_times) # Check time coverage for forcing and boundary data if self.da_forcing is not None or self.da_boundary is not None: @@ -182,7 +168,7 @@ def get_time_step(times): forcing_times = self.da_forcing.analysis_time else: forcing_times = self.da_forcing.time - get_time_step(forcing_times.values) + self._get_time_step(forcing_times.values) if self.da_boundary is not None: # Boundary data is part of a separate datastore @@ -192,7 +178,7 @@ def get_time_step(times): boundary_times = self.da_boundary.analysis_time else: boundary_times = self.da_boundary.time - boundary_time_step = get_time_step(boundary_times.values) + boundary_time_step = self._get_time_step(boundary_times.values) boundary_time_min = boundary_times.min().values boundary_time_max = boundary_times.max().values @@ -296,6 +282,22 @@ def __len__(self): - self.num_future_forcing_steps ) + def _get_time_step(self, times): + """Calculate the time step from the data + + Parameters + ---------- + times : xr.DataArray + The time dataarray to calculate the time step from. + """ + time_diffs = np.diff(times) + if not np.all(time_diffs == time_diffs[0]): + raise ValueError( + "Inconsistent time steps in data. " + f"Found different time steps: {np.unique(time_diffs)}" + ) + return time_diffs[0] + def _slice_time( self, da_state, @@ -368,7 +370,7 @@ def _slice_time( {"elapsed_forecast_duration": "time"} ) # Asserting that the forecast time step is consistent - self.get_time_step(da_state_sliced.time) + self._get_time_step(da_state_sliced.time) else: # For analysis data we slice the time dimension directly. The offset From 105108e9bd144c64075e0f5588f15176fc1fde52 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 2 Dec 2024 12:43:01 +0100 Subject: [PATCH 062/190] fix scope of _get_time_step --- neural_lam/weather_dataset.py | 40 ++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 991965d9..4bc9d5c7 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -142,28 +142,14 @@ def __init__( else: self.da_state = self.da_state - def get_time_step(times): - """Calculate the time step from the data - - Parameters - ---------- - times : xr.DataArray - The time dataarray to calculate the time step from. - """ - time_diffs = np.diff(times) - if not np.all(time_diffs == time_diffs[0]): - raise ValueError( - "Inconsistent time steps in data. " - f"Found different time steps: {np.unique(time_diffs)}" - ) - return time_diffs[0] + # Check time step consistency in state data if self.datastore.is_forecast: state_times = self.da_state.analysis_time else: state_times = self.da_state.time - _ = get_time_step(state_times) + _ = self._get_time_step(state_times) # Check time coverage for forcing and boundary data if self.da_forcing is not None or self.da_boundary is not None: @@ -182,7 +168,7 @@ def get_time_step(times): forcing_times = self.da_forcing.analysis_time else: forcing_times = self.da_forcing.time - get_time_step(forcing_times.values) + self._get_time_step(forcing_times.values) if self.da_boundary is not None: # Boundary data is part of a separate datastore @@ -192,7 +178,7 @@ def get_time_step(times): boundary_times = self.da_boundary.analysis_time else: boundary_times = self.da_boundary.time - boundary_time_step = get_time_step(boundary_times.values) + boundary_time_step = self._get_time_step(boundary_times.values) boundary_time_min = boundary_times.min().values boundary_time_max = boundary_times.max().values @@ -296,6 +282,22 @@ def __len__(self): - self.num_future_forcing_steps ) + def _get_time_step(self, times): + """Calculate the time step from the data + + Parameters + ---------- + times : xr.DataArray + The time dataarray to calculate the time step from. + """ + time_diffs = np.diff(times) + if not np.all(time_diffs == time_diffs[0]): + raise ValueError( + "Inconsistent time steps in data. " + f"Found different time steps: {np.unique(time_diffs)}" + ) + return time_diffs[0] + def _slice_time( self, da_state, @@ -368,7 +370,7 @@ def _slice_time( {"elapsed_forecast_duration": "time"} ) # Asserting that the forecast time step is consistent - self.get_time_step(da_state_sliced.time) + self._get_time_step(da_state_sliced.time) else: # For analysis data we slice the time dimension directly. The offset From ae0cf764bd23adfde2befa4bef8ef89122975688 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 2 Dec 2024 16:58:46 +0100 Subject: [PATCH 063/190] added information about optional boundary datastore --- README.md | 22 +++++++++++++--------- neural_lam/weather_dataset.py | 2 -- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index e21b7c24..7a5e5caf 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,9 @@ Once `neural-lam` is installed you will be able to train/evaluate models. For th interface that provides the data in a data-structure that can be used within neural-lam. A datastore is used to create a `pytorch.Dataset`-derived class that samples the data in time to create individual samples for - training, validation and testing. + training, validation and testing. A secondary datastore can be provided + for the boundary data. Currently, boundary datastore must be of type `mdp` + and only contain forcing features. This can easily be expanded in the future. 2. **The graph structure** is used to define message-passing GNN layers, that are trained to emulate fluid flow in the atmosphere over time. The @@ -121,7 +123,7 @@ different aspects about the training and evaluation of the model. The path you provide to the neural-lam config (`config.yaml`) also sets the root directory relative to which all other paths are resolved, as in the parent -directory of the config becomes the root directory. Both the datastore and +directory of the config becomes the root directory. Both the datastores and graphs you generate are then stored in subdirectories of this root directory. Exactly how and where a specific datastore expects its source data to be stored and where it stores its derived data is up to the implementation of the @@ -134,6 +136,7 @@ assume you placed `config.yaml` in a folder called `data`): data/ ├── config.yaml - Configuration file for neural-lam ├── danra.datastore.yaml - Configuration file for the datastore, referred to from config.yaml +├── era5.datastore.zarr/ - Optional configuration file for the boundary datastore, referred to from config.yaml └── graphs/ - Directory containing graphs for training ``` @@ -142,18 +145,20 @@ And the content of `config.yaml` could in this case look like: datastore: kind: mdp config_path: danra.datastore.yaml +datastore_boundary: + kind: mdp + config_path: era5.datastore.yaml training: state_feature_weighting: __config_class__: ManualStateFeatureWeighting - values: + weights: u100m: 1.0 v100m: 1.0 ``` -For now the neural-lam config only defines two things: 1) the kind of data -store and the path to its config, and 2) the weighting of different features in -the loss function. If you don't define the state feature weighting it will default -to weighting all features equally. +For now the neural-lam config only defines two things: +1) the kind of datastores and the path to their config +2) the weighting of different features in the loss function. If you don't define the state feature weighting it will default to weighting all features equally. (This example is taken from the `tests/datastore_examples/mdp` directory.) @@ -525,5 +530,4 @@ Furthermore, all tests in the ```tests``` directory will be run upon pushing cha # Contact If you are interested in machine learning models for LAM, have questions about the implementation or ideas for extending it, feel free to get in touch. -There is an open [mllam slack channel](https://join.slack.com/t/ml-lam/shared_invite/zt-2t112zvm8-Vt6aBvhX7nYa6Kbj_LkCBQ) that anyone can join (after following the link you have to request to join, this is to avoid spam bots). -You can also open a github issue on this page, or (if more suitable) send an email to [joel.oskarsson@liu.se](mailto:joel.oskarsson@liu.se). +There is an open [mllam slack channel](https://join.slack.com/t/ml-lam/shared_invite/zt-2t112zvm8-Vt6aBvhX7nYa6Kbj_LkCBQ) that anyone can join. You can also open a github issue on this page, or (if more suitable) send an email to [joel.oskarsson@liu.se](mailto:joel.oskarsson@liu.se). diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 4bc9d5c7..8d82229f 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -142,8 +142,6 @@ def __init__( else: self.da_state = self.da_state - - # Check time step consistency in state data if self.datastore.is_forecast: state_times = self.da_state.analysis_time From 9af27e0741894319860d11eb22cd9e9fd398e1ec Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 18 Nov 2024 19:46:37 +0100 Subject: [PATCH 064/190] add datastore_boundary to neural_lam --- neural_lam/train_model.py | 22 ++++++++++++++++++++++ neural_lam/weather_dataset.py | 10 ++++++++++ 2 files changed, 32 insertions(+) diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index 74146c89..37bf6db7 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -34,6 +34,11 @@ def main(input_args=None): type=str, help="Path to the configuration for neural-lam", ) + parser.add_argument( + "--config_path_boundary", + type=str, + help="Path to the configuration for boundary conditions", + ) parser.add_argument( "--model", type=str, @@ -212,6 +217,9 @@ def main(input_args=None): assert ( args.config_path is not None ), "Specify your config with --config_path" + assert ( + args.config_path_boundary is not None + ), "Specify your config with --config_path_boundary" assert args.model in MODELS, f"Unknown model: {args.model}" assert args.eval in ( None, @@ -227,10 +235,24 @@ def main(input_args=None): # Load neural-lam configuration and datastore to use config, datastore = load_config_and_datastore(config_path=args.config_path) + config_boundary, datastore_boundary = load_config_and_datastore( + config_path=args.config_path_boundary + ) + + # TODO this should not be required, make more flexible + assert ( + datastore.num_past_forcing_steps + == datastore_boundary.num_past_forcing_steps + ), "Mismatch in num_past_forcing_steps" + assert ( + datastore.num_future_forcing_steps + == datastore_boundary.num_future_forcing_steps + ), "Mismatch in num_future_forcing_steps" # Create datamodule data_module = WeatherDataModule( datastore=datastore, + datastore_boundary=datastore_boundary, ar_steps_train=args.ar_steps_train, ar_steps_eval=args.ar_steps_eval, standardize=True, diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index b5f85580..75f7e04e 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -22,6 +22,8 @@ class WeatherDataset(torch.utils.data.Dataset): ---------- datastore : BaseDatastore The datastore to load the data from (e.g. mdp). + datastore_boundary : BaseDatastore + The boundary datastore to load the data from (e.g. mdp). split : str, optional The data split to use ("train", "val" or "test"). Default is "train". ar_steps : int, optional @@ -43,6 +45,7 @@ class WeatherDataset(torch.utils.data.Dataset): def __init__( self, datastore: BaseDatastore, + datastore_boundary: BaseDatastore, split="train", ar_steps=3, num_past_forcing_steps=1, @@ -54,6 +57,7 @@ def __init__( self.split = split self.ar_steps = ar_steps self.datastore = datastore + self.datastore_boundary = datastore_boundary self.num_past_forcing_steps = num_past_forcing_steps self.num_future_forcing_steps = num_future_forcing_steps @@ -606,6 +610,7 @@ class WeatherDataModule(pl.LightningDataModule): def __init__( self, datastore: BaseDatastore, + datastore_boundary: BaseDatastore, ar_steps_train=3, ar_steps_eval=25, standardize=True, @@ -616,6 +621,7 @@ def __init__( ): super().__init__() self._datastore = datastore + self._datastore_boundary = datastore_boundary self.num_past_forcing_steps = num_past_forcing_steps self.num_future_forcing_steps = num_future_forcing_steps self.ar_steps_train = ar_steps_train @@ -627,6 +633,7 @@ def __init__( self.val_dataset = None self.test_dataset = None if num_workers > 0: + # BUG: There also seem to be issues with "spawn", to be investigated # default to spawn for now, as the default on linux "fork" hangs # when using dask (which the npyfilesmeps datastore uses) self.multiprocessing_context = "spawn" @@ -637,6 +644,7 @@ def setup(self, stage=None): if stage == "fit" or stage is None: self.train_dataset = WeatherDataset( datastore=self._datastore, + datastore_boundary=self._datastore_boundary, split="train", ar_steps=self.ar_steps_train, standardize=self.standardize, @@ -645,6 +653,7 @@ def setup(self, stage=None): ) self.val_dataset = WeatherDataset( datastore=self._datastore, + datastore_boundary=self._datastore_boundary, split="val", ar_steps=self.ar_steps_eval, standardize=self.standardize, @@ -655,6 +664,7 @@ def setup(self, stage=None): if stage == "test" or stage is None: self.test_dataset = WeatherDataset( datastore=self._datastore, + datastore_boundary=self._datastore_boundary, split="test", ar_steps=self.ar_steps_eval, standardize=self.standardize, From c25fb30ab6b9fc8038227a590b5551f1660dbe19 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 18 Nov 2024 20:15:41 +0100 Subject: [PATCH 065/190] complete integration of boundary in weatherDataset --- neural_lam/weather_dataset.py | 55 ++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 75f7e04e..7585207c 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -67,6 +67,9 @@ def __init__( self.da_forcing = self.datastore.get_dataarray( category="forcing", split=self.split ) + self.da_boundary = self.datastore_boundary.get_dataarray( + category="boundary", split=self.split + ) # check that with the provided data-arrays and ar_steps that we have a # non-zero amount of samples @@ -118,6 +121,15 @@ def __init__( self.da_forcing_mean = self.ds_forcing_stats.forcing_mean self.da_forcing_std = self.ds_forcing_stats.forcing_std + if self.da_boundary is not None: + self.ds_boundary_stats = ( + self.datastore_boundary.get_standardization_dataarray( + category="boundary" + ) + ) + self.da_boundary_mean = self.ds_boundary_stats.boundary_mean + self.da_boundary_std = self.ds_boundary_stats.boundary_std + def __len__(self): if self.datastore.is_forecast: # for now we simply create a single sample for each analysis time @@ -352,6 +364,8 @@ def _build_item_dataarrays(self, idx): The dataarray for the target states. da_forcing_windowed : xr.DataArray The dataarray for the forcing data, windowed for the sample. + da_boundary_windowed : xr.DataArray + The dataarray for the boundary data, windowed for the sample. da_target_times : xr.DataArray The dataarray for the target times. """ @@ -381,6 +395,11 @@ def _build_item_dataarrays(self, idx): else: da_forcing = None + if self.da_boundary is not None: + da_boundary = self.da_boundary + else: + da_boundary = None + # handle time sampling in a way that is compatible with both analysis # and forecast data da_state = self._slice_state_time( @@ -390,11 +409,17 @@ def _build_item_dataarrays(self, idx): da_forcing_windowed = self._slice_forcing_time( da_forcing=da_forcing, idx=idx, n_steps=self.ar_steps ) + if da_boundary is not None: + da_boundary_windowed = self._slice_forcing_time( + da_forcing=da_boundary, idx=idx, n_steps=self.ar_steps + ) # load the data into memory da_state.load() if da_forcing is not None: da_forcing_windowed.load() + if da_boundary is not None: + da_boundary_windowed.load() da_init_states = da_state.isel(time=slice(0, 2)) da_target_states = da_state.isel(time=slice(2, None)) @@ -417,6 +442,11 @@ def _build_item_dataarrays(self, idx): da_forcing_windowed - self.da_forcing_mean ) / self.da_forcing_std + if da_boundary is not None: + da_boundary_windowed = ( + da_boundary_windowed - self.da_boundary_mean + ) / self.da_boundary_std + if da_forcing is not None: # stack the `forcing_feature` and `window_sample` dimensions into a # single `forcing_feature` dimension @@ -436,11 +466,31 @@ def _build_item_dataarrays(self, idx): "forcing_feature": [], }, ) + if da_boundary is not None: + # stack the `forcing_feature` and `window_sample` dimensions into a + # single `forcing_feature` dimension + da_boundary_windowed = da_boundary_windowed.stack( + boundary_feature_windowed=("boundary_feature", "window") + ) + else: + # create an empty forcing tensor with the right shape + da_boundary_windowed = xr.DataArray( + data=np.empty( + (self.ar_steps, da_state.grid_index.size, 0), + ), + dims=("time", "grid_index", "boundary_feature"), + coords={ + "time": da_target_times, + "grid_index": da_state.grid_index, + "boundary_feature": [], + }, + ) return ( da_init_states, da_target_states, da_forcing_windowed, + da_boundary_windowed, da_target_times, ) @@ -475,6 +525,7 @@ def __getitem__(self, idx): da_init_states, da_target_states, da_forcing_windowed, + da_boundary_windowed, da_target_times, ) = self._build_item_dataarrays(idx=idx) @@ -491,13 +542,15 @@ def __getitem__(self, idx): ) forcing = torch.tensor(da_forcing_windowed.values, dtype=tensor_dtype) + boundary = torch.tensor(da_boundary_windowed.values, dtype=tensor_dtype) # init_states: (2, N_grid, d_features) # target_states: (ar_steps, N_grid, d_features) # forcing: (ar_steps, N_grid, d_windowed_forcing) + # boundary: (ar_steps, N_grid, d_windowed_boundary) # target_times: (ar_steps,) - return init_states, target_states, forcing, target_times + return init_states, target_states, forcing, boundary, target_times def __iter__(self): """ From 505ceeb589c3398d37100a6073fa5590e7d786c2 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 18 Nov 2024 20:15:55 +0100 Subject: [PATCH 066/190] Add test to check timestep length and spacing --- neural_lam/weather_dataset.py | 76 +++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 7585207c..8e55d4a5 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -101,6 +101,82 @@ def __init__( "the data in `BaseDatastore.get_dataarray`?" ) + # Check time coverage for forcing and boundary data + if self.da_forcing is not None or self.da_boundary is not None: + state_times = self.da_state.time + state_time_min = state_times.min().values + state_time_max = state_times.max().values + + def get_time_step(times): + """Calculate the time step from the data""" + time_diffs = np.diff(times) + if not np.all(time_diffs == time_diffs[0]): + raise ValueError( + "Inconsistent time steps in data. " + f"Found different time steps: {np.unique(time_diffs)}" + ) + return time_diffs[0] + + if self.da_forcing is not None: + forcing_times = self.da_forcing.time + forcing_time_step = get_time_step(forcing_times.values) + forcing_time_min = forcing_times.min().values + forcing_time_max = forcing_times.max().values + + # Calculate required bounds for forcing using its time step + forcing_required_time_min = ( + state_time_min + - self.num_past_forcing_steps * forcing_time_step + ) + forcing_required_time_max = ( + state_time_max + + self.num_future_forcing_steps * forcing_time_step + ) + + if forcing_time_min > forcing_required_time_min: + raise ValueError( + f"Forcing data starts too late." + f"Required start: {forcing_required_time_min}, " + f"but forcing starts at {forcing_time_min}." + ) + + if forcing_time_max < forcing_required_time_max: + raise ValueError( + f"Forcing data ends too early." + f"Required end: {forcing_required_time_max}," + f"but forcing ends at {forcing_time_max}." + ) + + if self.da_boundary is not None: + boundary_times = self.da_boundary.time + boundary_time_step = get_time_step(boundary_times.values) + boundary_time_min = boundary_times.min().values + boundary_time_max = boundary_times.max().values + + # Calculate required bounds for boundary using its time step + boundary_required_time_min = ( + state_time_min + - self.num_past_forcing_steps * boundary_time_step + ) + boundary_required_time_max = ( + state_time_max + + self.num_future_forcing_steps * boundary_time_step + ) + + if boundary_time_min > boundary_required_time_min: + raise ValueError( + f"Boundary data starts too late." + f"Required start: {boundary_required_time_min}, " + f"but boundary starts at {boundary_time_min}." + ) + + if boundary_time_max < boundary_required_time_max: + raise ValueError( + f"Boundary data ends too early." + f"Required end: {boundary_required_time_max}, " + f"but boundary ends at {boundary_time_max}." + ) + # Set up for standardization # TODO: This will become part of ar_model.py soon! self.standardize = standardize From e7330664661bd336caf40842dfb46a406b120721 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 18 Nov 2024 21:43:57 +0100 Subject: [PATCH 067/190] setting default mdp boundary to 0 gridcells --- neural_lam/datastore/mdp.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 0d1aac7b..b6f1676c 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -27,7 +27,7 @@ class MDPDatastore(BaseRegularGridDatastore): SHORT_NAME = "mdp" - def __init__(self, config_path, n_boundary_points=30, reuse_existing=True): + def __init__(self, config_path, n_boundary_points=0, reuse_existing=True): """ Construct a new MDPDatastore from the configuration file at `config_path`. A boundary mask is created with `n_boundary_points` @@ -336,19 +336,22 @@ def boundary_mask(self) -> xr.DataArray: boundary point and 0 is not. """ - ds_unstacked = self.unstack_grid_coords(da_or_ds=self._ds) - da_state_variable = ( - ds_unstacked["state"].isel(time=0).isel(state_feature=0) - ) - da_domain_allzero = xr.zeros_like(da_state_variable) - ds_unstacked["boundary_mask"] = da_domain_allzero.isel( - x=slice(self._n_boundary_points, -self._n_boundary_points), - y=slice(self._n_boundary_points, -self._n_boundary_points), - ) - ds_unstacked["boundary_mask"] = ds_unstacked.boundary_mask.fillna( - 1 - ).astype(int) - return self.stack_grid_coords(da_or_ds=ds_unstacked.boundary_mask) + if self._n_boundary_points > 0: + ds_unstacked = self.unstack_grid_coords(da_or_ds=self._ds) + da_state_variable = ( + ds_unstacked["state"].isel(time=0).isel(state_feature=0) + ) + da_domain_allzero = xr.zeros_like(da_state_variable) + ds_unstacked["boundary_mask"] = da_domain_allzero.isel( + x=slice(self._n_boundary_points, -self._n_boundary_points), + y=slice(self._n_boundary_points, -self._n_boundary_points), + ) + ds_unstacked["boundary_mask"] = ds_unstacked.boundary_mask.fillna( + 1 + ).astype(int) + return self.stack_grid_coords(da_or_ds=ds_unstacked.boundary_mask) + else: + return None @property def coords_projection(self) -> ccrs.Projection: From d8349a4801654c152f14924aa86d08c4ab952468 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 18 Nov 2024 21:44:54 +0100 Subject: [PATCH 068/190] implement time-based slicing combine two slicing fcts into one --- neural_lam/weather_dataset.py | 300 ++++++++++++++++++---------------- 1 file changed, 161 insertions(+), 139 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 8e55d4a5..5559e838 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -67,8 +67,9 @@ def __init__( self.da_forcing = self.datastore.get_dataarray( category="forcing", split=self.split ) + # XXX For now boundary data is always considered forcing data self.da_boundary = self.datastore_boundary.get_dataarray( - category="boundary", split=self.split + category="forcing", split=self.split ) # check that with the provided data-arrays and ar_steps that we have a @@ -200,7 +201,7 @@ def get_time_step(times): if self.da_boundary is not None: self.ds_boundary_stats = ( self.datastore_boundary.get_standardization_dataarray( - category="boundary" + category="forcing" ) ) self.da_boundary_mean = self.ds_boundary_stats.boundary_mean @@ -252,175 +253,156 @@ def __len__(self): - self.num_future_forcing_steps ) - def _slice_state_time(self, da_state, idx, n_steps: int): + def _slice_time(self, da_state, da_forcing, idx, n_steps: int): """ - Produce a time slice of the given dataarray `da_state` (state) starting - at `idx` and with `n_steps` steps. An `offset`is calculated based on the - `num_past_forcing_steps` class attribute. `Offset` is used to offset the - start of the sample, to assert that enough previous time steps are - available for the 2 initial states and any corresponding forcings - (calculated in `_slice_forcing_time`). + Produce time slices of the given dataarrays `da_state` (state) and + `da_forcing` (forcing). For the state data, slicing is done as before + based on `idx`. For the forcing data, nearest neighbor matching is + performed based on the state times. Additionally, the time difference + between the matched forcing times and state times (in multiples of state + time steps) is added to the forcing dataarray. Parameters ---------- da_state : xr.DataArray - The dataarray to slice. This is expected to have a `time` dimension - if the datastore is providing analysis only data, and a - `analysis_time` and `elapsed_forecast_duration` dimensions if the - datastore is providing forecast data. + The state dataarray to slice. + da_forcing : xr.DataArray + The forcing dataarray to slice. idx : int - The index of the time step to start the sample from. + The index of the time step to start the sample from in the state + data. n_steps : int The number of time steps to include in the sample. Returns ------- - da_sliced : xr.DataArray - The sliced dataarray with dims ('time', 'grid_index', + da_state_sliced : xr.DataArray + The sliced state dataarray with dims ('time', 'grid_index', 'state_feature'). + da_forcing_matched : xr.DataArray + The forcing dataarray matched to state times with an added + coordinate 'time_diff', representing the time difference to state + times in multiples of state time steps. """ - # The current implementation requires at least 2 time steps for the - # initial state (see GraphCast). + # Number of initial steps required (e.g., for initializing models) init_steps = 2 - # slice the dataarray to include the required number of time steps + + # Slice the state data as before if self.datastore.is_forecast: + # Calculate start and end indices for slicing start_idx = max(0, self.num_past_forcing_steps - init_steps) end_idx = max(init_steps, self.num_past_forcing_steps) + n_steps - # this implies that the data will have both `analysis_time` and - # `elapsed_forecast_duration` dimensions for forecasts. We for now - # simply select a analysis time and the first `n_steps` forecast - # times (given no offset). Note that this means that we get one - # sample per forecast, always starting at forecast time 2. - da_sliced = da_state.isel( + + # Slice the state data over the elapsed forecast duration + da_state_sliced = da_state.isel( analysis_time=idx, elapsed_forecast_duration=slice(start_idx, end_idx), ) - # create a new time dimension so that the produced sample has a - # `time` dimension, similarly to the analysis only data - da_sliced["time"] = ( - da_sliced.analysis_time + da_sliced.elapsed_forecast_duration + + # Create a new 'time' dimension + da_state_sliced["time"] = ( + da_state_sliced.analysis_time + + da_state_sliced.elapsed_forecast_duration ) - da_sliced = da_sliced.swap_dims( + da_state_sliced = da_state_sliced.swap_dims( {"elapsed_forecast_duration": "time"} ) + else: - # For analysis data we slice the time dimension directly. The offset - # is only relevant for the very first (and last) samples in the - # dataset. + # For analysis data, slice the time dimension directly start_idx = idx + max(0, self.num_past_forcing_steps - init_steps) end_idx = ( idx + max(init_steps, self.num_past_forcing_steps) + n_steps ) - da_sliced = da_state.isel(time=slice(start_idx, end_idx)) - return da_sliced + da_state_sliced = da_state.isel(time=slice(start_idx, end_idx)) - def _slice_forcing_time(self, da_forcing, idx, n_steps: int): - """ - Produce a time slice of the given dataarray `da_forcing` (forcing) - starting at `idx` and with `n_steps` steps. An `offset` is calculated - based on the `num_past_forcing_steps` class attribute. It is used to - offset the start of the sample, to ensure that enough previous time - steps are available for the forcing data. The forcing data is windowed - around the current autoregressive time step to include the past and - future forcings. - - Parameters - ---------- - da_forcing : xr.DataArray - The forcing dataarray to slice. This is expected to have a `time` - dimension if the datastore is providing analysis only data, and a - `analysis_time` and `elapsed_forecast_duration` dimensions if the - datastore is providing forecast data. - idx : int - The index of the time step to start the sample from. - n_steps : int - The number of time steps to include in the sample. - - Returns - ------- - da_concat : xr.DataArray - The sliced dataarray with dims ('time', 'grid_index', - 'window', 'forcing_feature'). - """ - # The current implementation requires at least 2 time steps for the - # initial state (see GraphCast). The forcing data is windowed around the - # current autregressive time step. The two `init_steps` can also be used - # as past forcings. - init_steps = 2 - da_list = [] + # Get the state times for matching + state_times = da_state_sliced["time"] + # Match forcing data to state times based on nearest neighbor if self.datastore.is_forecast: - # This implies that the data will have both `analysis_time` and - # `elapsed_forecast_duration` dimensions for forecasts. We for now - # simply select an analysis time and the first `n_steps` forecast - # times (given no offset). Note that this means that we get one - # sample per forecast. - # Add a 'time' dimension using the actual forecast times - offset = max(init_steps, self.num_past_forcing_steps) - for step in range(n_steps): - start_idx = offset + step - self.num_past_forcing_steps - end_idx = offset + step + self.num_future_forcing_steps - - current_time = ( - da_forcing.analysis_time[idx] - + da_forcing.elapsed_forecast_duration[offset + step] - ) - - da_sliced = da_forcing.isel( - analysis_time=idx, - elapsed_forecast_duration=slice(start_idx, end_idx + 1), - ) - - da_sliced = da_sliced.rename( - {"elapsed_forecast_duration": "window"} - ) + # Calculate all possible forcing times + forcing_times = ( + da_forcing.analysis_time + da_forcing.elapsed_forecast_duration + ) + forcing_times_flat = forcing_times.stack( + forecast_time=("analysis_time", "elapsed_forecast_duration") + ) - # Assign the 'window' coordinate to be relative positions - da_sliced = da_sliced.assign_coords( - window=np.arange(len(da_sliced.window)) - ) + # Compute time differences + time_deltas = ( + forcing_times_flat.values[:, np.newaxis] + - state_times.values[np.newaxis, :] + ) + time_diffs = np.abs(time_deltas) + idx_min = time_diffs.argmin(axis=0) + + # Retrieve corresponding indices for analysis_time and + # elapsed_forecast_duration + forecast_time_index = forcing_times_flat["forecast_time"][idx_min] + analysis_time_indices = forecast_time_index["analysis_time"] + elapsed_forecast_duration_indices = forecast_time_index[ + "elapsed_forecast_duration" + ] + + # Slice the forcing data using matched indices + da_forcing_matched = da_forcing.isel( + analysis_time=("time", analysis_time_indices), + elapsed_forecast_duration=( + "time", + elapsed_forecast_duration_indices, + ), + ) - da_sliced = da_sliced.expand_dims( - dim={"time": [current_time.values]} - ) + # Assign matched state times to the forcing data + da_forcing_matched["time"] = state_times + da_forcing_matched = da_forcing_matched.swap_dims( + {"elapsed_forecast_duration": "time"} + ) - da_list.append(da_sliced) + # Calculate time differences in multiples of state time steps + state_time_step = state_times.values[1] - state_times.values[0] + time_diff_steps = ( + time_deltas[idx_min, np.arange(len(state_times))] + / state_time_step + ) - # Concatenate the list of DataArrays along the 'time' dimension - da_concat = xr.concat(da_list, dim="time") + # Add time difference as a new coordinate + da_forcing_matched = da_forcing_matched.assign_coords( + time_diff=("time", time_diff_steps) + ) else: - # For analysis data, we slice the time dimension directly. The - # offset is only relevant for the very first (and last) samples in - # the dataset. - offset = idx + max(init_steps, self.num_past_forcing_steps) - for step in range(n_steps): - start_idx = offset + step - self.num_past_forcing_steps - end_idx = offset + step + self.num_future_forcing_steps - - # Slice the data over the desired time window - da_sliced = da_forcing.isel(time=slice(start_idx, end_idx + 1)) - - da_sliced = da_sliced.rename({"time": "window"}) - - # Assign the 'window' coordinate to be relative positions - da_sliced = da_sliced.assign_coords( - window=np.arange(len(da_sliced.window)) - ) + # For analysis data, match directly using the 'time' coordinate + forcing_times = da_forcing["time"] - # Add a 'time' dimension to keep track of steps using actual - # time coordinates - current_time = da_forcing.time[offset + step] - da_sliced = da_sliced.expand_dims( - dim={"time": [current_time.values]} - ) + # Compute time differences + time_deltas = ( + forcing_times.values[:, np.newaxis] + - state_times.values[np.newaxis, :] + ) + time_diffs = np.abs(time_deltas) + idx_min = time_diffs.argmin(axis=0) - da_list.append(da_sliced) + # Slice the forcing data using matched indices + da_forcing_matched = da_forcing.isel(time=idx_min) + da_forcing_matched = da_forcing_matched.assign_coords( + time=state_times + ) - # Concatenate the list of DataArrays along the 'time' dimension - da_concat = xr.concat(da_list, dim="time") + # Calculate time differences in multiples of state time steps + state_time_step = state_times.values[1] - state_times.values[0] + time_diff_steps = ( + time_deltas[idx_min, np.arange(len(state_times))] + / state_time_step + ) - return da_concat + # Add time difference as a new coordinate + da_forcing_matched = da_forcing_matched.assign_coords( + time_diff=("time", time_diff_steps) + ) + + return da_state_sliced, da_forcing_matched def _build_item_dataarrays(self, idx): """ @@ -442,6 +424,7 @@ def _build_item_dataarrays(self, idx): The dataarray for the forcing data, windowed for the sample. da_boundary_windowed : xr.DataArray The dataarray for the boundary data, windowed for the sample. + Boundary data is always considered forcing data. da_target_times : xr.DataArray The dataarray for the target times. """ @@ -478,15 +461,15 @@ def _build_item_dataarrays(self, idx): # handle time sampling in a way that is compatible with both analysis # and forecast data - da_state = self._slice_state_time( + da_state = self._slice_time( da_state=da_state, idx=idx, n_steps=self.ar_steps ) if da_forcing is not None: - da_forcing_windowed = self._slice_forcing_time( + da_forcing_windowed = self._slice_time( da_forcing=da_forcing, idx=idx, n_steps=self.ar_steps ) if da_boundary is not None: - da_boundary_windowed = self._slice_forcing_time( + da_boundary_windowed = self._slice_time( da_forcing=da_boundary, idx=idx, n_steps=self.ar_steps ) @@ -524,13 +507,32 @@ def _build_item_dataarrays(self, idx): ) / self.da_boundary_std if da_forcing is not None: - # stack the `forcing_feature` and `window_sample` dimensions into a - # single `forcing_feature` dimension + # Expand 'time_diff' to align with 'forcing_feature' and 'window' + # dimensions 'time_diff' has dimension ('time'), expand to ('time', + # 'forcing_feature', 'window') + time_diff_expanded = da_forcing_windowed["time_diff"].expand_dims( + forcing_feature=da_forcing_windowed["forcing_feature"], + window=da_forcing_windowed["window"], + ) + + # Stack 'forcing_feature' and 'window' into a single + # 'forcing_feature_windowed' dimension da_forcing_windowed = da_forcing_windowed.stack( forcing_feature_windowed=("forcing_feature", "window") ) + time_diff_expanded = time_diff_expanded.stack( + forcing_feature_windowed=("forcing_feature", "window") + ) + + # Assign 'time_diff' as a coordinate to 'forcing_feature_windowed' + da_forcing_windowed = da_forcing_windowed.assign_coords( + time_diff=( + "forcing_feature_windowed", + time_diff_expanded.values, + ) + ) else: - # create an empty forcing tensor with the right shape + # Create an empty forcing tensor with the right shape da_forcing_windowed = xr.DataArray( data=np.empty( (self.ar_steps, da_state.grid_index.size, 0), @@ -542,14 +544,34 @@ def _build_item_dataarrays(self, idx): "forcing_feature": [], }, ) + if da_boundary is not None: - # stack the `forcing_feature` and `window_sample` dimensions into a - # single `forcing_feature` dimension + # If 'da_boundary_windowed' also has 'time_diff', process similarly + # Expand 'time_diff' to align with 'boundary_feature' and 'window' + # dimensions + time_diff_expanded = da_boundary_windowed["time_diff"].expand_dims( + boundary_feature=da_boundary_windowed["boundary_feature"], + window=da_boundary_windowed["window"], + ) + + # Stack 'boundary_feature' and 'window' into a single + # 'boundary_feature_windowed' dimension da_boundary_windowed = da_boundary_windowed.stack( boundary_feature_windowed=("boundary_feature", "window") ) + time_diff_expanded = time_diff_expanded.stack( + boundary_feature_windowed=("boundary_feature", "window") + ) + + # Assign 'time_diff' as a coordinate to 'boundary_feature_windowed' + da_boundary_windowed = da_boundary_windowed.assign_coords( + time_diff=( + "boundary_feature_windowed", + time_diff_expanded.values, + ) + ) else: - # create an empty forcing tensor with the right shape + # Create an empty boundary tensor with the right shape da_boundary_windowed = xr.DataArray( data=np.empty( (self.ar_steps, da_state.grid_index.size, 0), From fd791bfb51c3c751ff4af8d74eaa47c81b63a1eb Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 19 Nov 2024 06:26:54 +0100 Subject: [PATCH 069/190] remove all interior_mask and boundary_mask --- neural_lam/datastore/base.py | 17 ---- neural_lam/datastore/mdp.py | 34 -------- neural_lam/datastore/npyfilesmeps/store.py | 28 ------ neural_lam/models/ar_model.py | 53 +++--------- neural_lam/vis.py | 12 --- .../config.yaml | 18 ++++ .../era5.datastore.yaml | 85 +++++++++++++++++++ .../meps_example_reduced.datastore.yaml | 44 ++++++++++ tests/dummy_datastore.py | 22 ----- tests/test_datastores.py | 21 ----- 10 files changed, 157 insertions(+), 177 deletions(-) create mode 100644 tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/config.yaml create mode 100644 tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml create mode 100644 tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/meps_example_reduced.datastore.yaml diff --git a/neural_lam/datastore/base.py b/neural_lam/datastore/base.py index b0055e39..e2d21404 100644 --- a/neural_lam/datastore/base.py +++ b/neural_lam/datastore/base.py @@ -228,23 +228,6 @@ def get_dataarray( """ pass - @cached_property - @abc.abstractmethod - def boundary_mask(self) -> xr.DataArray: - """ - Return the boundary mask for the dataset, with spatial dimensions - stacked. Where the value is 1, the grid point is a boundary point, and - where the value is 0, the grid point is not a boundary point. - - Returns - ------- - xr.DataArray - The boundary mask for the dataset, with dimensions - `('grid_index',)`. - - """ - pass - @abc.abstractmethod def get_xy(self, category: str) -> np.ndarray: """ diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index b6f1676c..e662cb63 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -319,40 +319,6 @@ def get_standardization_dataarray(self, category: str) -> xr.Dataset: ds_stats = self._ds[stats_variables.keys()].rename(stats_variables) return ds_stats - @cached_property - def boundary_mask(self) -> xr.DataArray: - """ - Produce a 0/1 mask for the boundary points of the dataset, these will - sit at the edges of the domain (in x/y extent) and will be used to mask - out the boundary points from the loss function and to overwrite the - boundary points from the prediction. For now this is created when the - mask is requested, but in the future this could be saved to the zarr - file. - - Returns - ------- - xr.DataArray - A 0/1 mask for the boundary points of the dataset, where 1 is a - boundary point and 0 is not. - - """ - if self._n_boundary_points > 0: - ds_unstacked = self.unstack_grid_coords(da_or_ds=self._ds) - da_state_variable = ( - ds_unstacked["state"].isel(time=0).isel(state_feature=0) - ) - da_domain_allzero = xr.zeros_like(da_state_variable) - ds_unstacked["boundary_mask"] = da_domain_allzero.isel( - x=slice(self._n_boundary_points, -self._n_boundary_points), - y=slice(self._n_boundary_points, -self._n_boundary_points), - ) - ds_unstacked["boundary_mask"] = ds_unstacked.boundary_mask.fillna( - 1 - ).astype(int) - return self.stack_grid_coords(da_or_ds=ds_unstacked.boundary_mask) - else: - return None - @property def coords_projection(self) -> ccrs.Projection: """ diff --git a/neural_lam/datastore/npyfilesmeps/store.py b/neural_lam/datastore/npyfilesmeps/store.py index 42e80706..146b0627 100644 --- a/neural_lam/datastore/npyfilesmeps/store.py +++ b/neural_lam/datastore/npyfilesmeps/store.py @@ -668,34 +668,6 @@ def grid_shape_state(self) -> CartesianGridShape: ny, nx = self.config.grid_shape_state return CartesianGridShape(x=nx, y=ny) - @cached_property - def boundary_mask(self) -> xr.DataArray: - """The boundary mask for the dataset. This is a binary mask that is 1 - where the grid cell is on the boundary of the domain, and 0 otherwise. - - Returns - ------- - xr.DataArray - The boundary mask for the dataset, with dimensions `[grid_index]`. - - """ - xy = self.get_xy(category="state", stacked=False) - xs = xy[:, :, 0] - ys = xy[:, :, 1] - # Check if x-coordinates are constant along columns - assert np.allclose(xs, xs[:, [0]]), "x-coordinates are not constant" - # Check if y-coordinates are constant along rows - assert np.allclose(ys, ys[[0], :]), "y-coordinates are not constant" - # Extract unique x and y coordinates - x = xs[:, 0] # Unique x-coordinates (changes along the first axis) - y = ys[0, :] # Unique y-coordinates (changes along the second axis) - values = np.load(self.root_path / "static" / "border_mask.npy") - da_mask = xr.DataArray( - values, dims=["y", "x"], coords=dict(x=x, y=y), name="boundary_mask" - ) - da_mask_stacked_xy = self.stack_grid_coords(da_mask).astype(int) - return da_mask_stacked_xy - def get_standardization_dataarray(self, category: str) -> xr.Dataset: """Return the standardization dataarray for the given category. This should contain a `{category}_mean` and `{category}_std` variable for diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 44baf9c2..710efcec 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -45,7 +45,6 @@ def __init__( da_state_stats = datastore.get_standardization_dataarray( category="state" ) - da_boundary_mask = datastore.boundary_mask num_past_forcing_steps = args.num_past_forcing_steps num_future_forcing_steps = args.num_future_forcing_steps @@ -118,18 +117,6 @@ def __init__( # Instantiate loss function self.loss = metrics.get_metric(args.loss) - boundary_mask = torch.tensor( - da_boundary_mask.values, dtype=torch.float32 - ).unsqueeze( - 1 - ) # add feature dim - - self.register_buffer("boundary_mask", boundary_mask, persistent=False) - # Pre-compute interior mask for use in loss function - self.register_buffer( - "interior_mask", 1.0 - self.boundary_mask, persistent=False - ) # (num_grid_nodes, 1), 1 for non-border - self.val_metrics = { "mse": [], } @@ -194,13 +181,6 @@ def configure_optimizers(self): ) return opt - @property - def interior_mask_bool(self): - """ - Get the interior mask as a boolean (N,) mask. - """ - return self.interior_mask[:, 0].to(torch.bool) - @staticmethod def expand_to_batch(x, batch_size): """ @@ -232,7 +212,6 @@ def unroll_prediction(self, init_states, forcing_features, true_states): for i in range(pred_steps): forcing = forcing_features[:, i] - border_state = true_states[:, i] pred_state, pred_std = self.predict_step( prev_state, prev_prev_state, forcing @@ -240,19 +219,13 @@ def unroll_prediction(self, init_states, forcing_features, true_states): # state: (B, num_grid_nodes, d_f) pred_std: (B, num_grid_nodes, # d_f) or None - # Overwrite border with true state - new_state = ( - self.boundary_mask * border_state - + self.interior_mask * pred_state - ) - - prediction_list.append(new_state) + prediction_list.append(pred_state) if self.output_std: pred_std_list.append(pred_std) # Update conditioning states prev_prev_state = prev_state - prev_state = new_state + prev_state = pred_state prediction = torch.stack( prediction_list, dim=1 @@ -290,12 +263,14 @@ def training_step(self, batch): """ prediction, target, pred_std, _ = self.common_step(batch) - # Compute loss + # Compute loss - mean over unrolled times and batch batch_loss = torch.mean( self.loss( - prediction, target, pred_std, mask=self.interior_mask_bool + prediction, + target, + pred_std, ) - ) # mean over unrolled times and batch + ) log_dict = {"train_loss": batch_loss} self.log_dict( @@ -328,9 +303,7 @@ def validation_step(self, batch, batch_idx): prediction, target, pred_std, _ = self.common_step(batch) time_step_loss = torch.mean( - self.loss( - prediction, target, pred_std, mask=self.interior_mask_bool - ), + self.loss(prediction, target, pred_std), dim=0, ) # (time_steps-1) mean_loss = torch.mean(time_step_loss) @@ -355,7 +328,6 @@ def validation_step(self, batch, batch_idx): prediction, target, pred_std, - mask=self.interior_mask_bool, sum_vars=False, ) # (B, pred_steps, d_f) self.val_metrics["mse"].append(entry_mses) @@ -382,9 +354,7 @@ def test_step(self, batch, batch_idx): # pred_steps, num_grid_nodes, d_f) or (d_f,) time_step_loss = torch.mean( - self.loss( - prediction, target, pred_std, mask=self.interior_mask_bool - ), + self.loss(prediction, target, pred_std), dim=0, ) # (time_steps-1,) mean_loss = torch.mean(time_step_loss) @@ -413,16 +383,13 @@ def test_step(self, batch, batch_idx): prediction, target, pred_std, - mask=self.interior_mask_bool, sum_vars=False, ) # (B, pred_steps, d_f) self.test_metrics[metric_name].append(batch_metric_vals) if self.output_std: # Store output std. per variable, spatially averaged - mean_pred_std = torch.mean( - pred_std[..., self.interior_mask_bool, :], dim=-2 - ) # (B, pred_steps, d_f) + mean_pred_std = torch.mean(pred_std, dim=-2) # (B, pred_steps, d_f) self.test_metrics["output_std"].append(mean_pred_std) # Save per-sample spatial loss for specific times diff --git a/neural_lam/vis.py b/neural_lam/vis.py index d6b57f88..efab20bf 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -87,11 +87,6 @@ def plot_prediction( extent = datastore.get_xy_extent("state") - # Set up masking of border region - da_mask = datastore.unstack_grid_coords(datastore.boundary_mask) - mask_values = np.invert(da_mask.values.astype(bool)).astype(float) - pixel_alpha = mask_values.clip(0.7, 1) # Faded border region - fig, axes = plt.subplots( 1, 2, @@ -107,7 +102,6 @@ def plot_prediction( origin="lower", x="x", extent=extent, - alpha=pixel_alpha.T, vmin=vmin, vmax=vmax, cmap="plasma", @@ -141,11 +135,6 @@ def plot_spatial_error( extent = datastore.get_xy_extent("state") - # Set up masking of border region - da_mask = datastore.unstack_grid_coords(datastore.boundary_mask) - mask_reshaped = da_mask.values - pixel_alpha = mask_reshaped.clip(0.7, 1) # Faded border region - fig, ax = plt.subplots( figsize=(5, 4.8), subplot_kw={"projection": datastore.coords_projection}, @@ -164,7 +153,6 @@ def plot_spatial_error( error_grid, origin="lower", extent=extent, - alpha=pixel_alpha, vmin=vmin, vmax=vmax, cmap="OrRd", diff --git a/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/config.yaml b/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/config.yaml new file mode 100644 index 00000000..27cc9764 --- /dev/null +++ b/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/config.yaml @@ -0,0 +1,18 @@ +datastore: + kind: npyfilesmeps + config_path: meps_example_reduced.datastore.yaml +datastore_boundary: + kind: mdp + config_path: era5.datastore.yaml +training: + state_feature_weighting: + __config_class__: ManualStateFeatureWeighting + weights: + nlwrs_0: 1.0 + nswrs_0: 1.0 + pres_0g: 1.0 + pres_0s: 1.0 + r_2: 1.0 + r_65: 1.0 + t_2: 1.0 + t_65: 1.0 diff --git a/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml b/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml new file mode 100644 index 00000000..600a1845 --- /dev/null +++ b/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml @@ -0,0 +1,85 @@ +schema_version: v0.5.0 +dataset_version: v1.0.0 + +output: + variables: + forcing: [time, grid_index, forcing_feature] + coord_ranges: + time: + start: 1990-09-01T00:00 + end: 2022-09-30T00:00 + step: PT6H + chunking: + time: 1 + splitting: + dim: time + splits: + train: + start: 1990-09-01T00:00 + end: 2022-09-30T00:00 + compute_statistics: + ops: [mean, std, diff_mean, diff_std] + dims: [grid_index, time] + val: + start: 1990-09-01T00:00 + end: 2022-09-30T00:00 + test: + start: 1990-09-01T00:00 + end: 2022-09-30T00:00 + +inputs: + era_height_levels: + path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721_with_derived_variables.zarr' + dims: [time, longitude, latitude, level] + variables: + u_component_of_wind: + level: + values: [1000,] + units: hPa + dim_mapping: + time: + method: rename + dim: time + x: + method: rename + dim: longitude + y: + method: rename + dim: latitude + forcing_feature: + method: stack_variables_by_var_name + dims: [level] + name_format: "{var_name}{level}hPa" + grid_index: + method: stack + dims: [x, y] + target_output_variable: forcing + + era5_surface: + path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721_with_derived_variables.zarr' + dims: [time, longitude, latitude, level] + variables: + - mean_surface_net_short_wave_radiation_flux + dim_mapping: + time: + method: rename + dim: time + x: + method: rename + dim: longitude + y: + method: rename + dim: latitude + forcing_feature: + method: stack_variables_by_var_name + name_format: "{var_name}" + grid_index: + method: stack + dims: [x, y] + target_output_variable: forcing + +extra: + projection: + class_name: PlateCarree + kwargs: + central_longitude: 0.0 diff --git a/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/meps_example_reduced.datastore.yaml b/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/meps_example_reduced.datastore.yaml new file mode 100644 index 00000000..3d88d4a4 --- /dev/null +++ b/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/meps_example_reduced.datastore.yaml @@ -0,0 +1,44 @@ +dataset: + name: meps_example_reduced + num_forcing_features: 16 + var_longnames: + - pres_heightAboveGround_0_instant + - pres_heightAboveSea_0_instant + - nlwrs_heightAboveGround_0_accum + - nswrs_heightAboveGround_0_accum + - r_heightAboveGround_2_instant + - r_hybrid_65_instant + - t_heightAboveGround_2_instant + - t_hybrid_65_instant + var_names: + - pres_0g + - pres_0s + - nlwrs_0 + - nswrs_0 + - r_2 + - r_65 + - t_2 + - t_65 + var_units: + - Pa + - Pa + - W/m**2 + - W/m**2 + - '' + - '' + - K + - K + num_timesteps: 65 + num_ensemble_members: 2 + step_length: 3 +grid_shape_state: +- 134 +- 119 +projection: + class_name: LambertConformal + kwargs: + central_latitude: 63.3 + central_longitude: 15.0 + standard_parallels: + - 63.3 + - 63.3 diff --git a/tests/dummy_datastore.py b/tests/dummy_datastore.py index 9075d404..d62c7356 100644 --- a/tests/dummy_datastore.py +++ b/tests/dummy_datastore.py @@ -148,12 +148,6 @@ def __init__( times = [self.T0 + dt * i for i in range(n_timesteps)] self.ds.coords["time"] = times - # Add boundary mask - self.ds["boundary_mask"] = xr.DataArray( - np.random.choice([0, 1], size=(n_points_1d, n_points_1d)), - dims=["x", "y"], - ) - # Stack the spatial dimensions into grid_index self.ds = self.ds.stack(grid_index=self.CARTESIAN_COORDS) @@ -342,22 +336,6 @@ def get_dataarray( dim_order = self.expected_dim_order(category=category) return self.ds[category].transpose(*dim_order) - @cached_property - def boundary_mask(self) -> xr.DataArray: - """ - Return the boundary mask for the dataset, with spatial dimensions - stacked. Where the value is 1, the grid point is a boundary point, and - where the value is 0, the grid point is not a boundary point. - - Returns - ------- - xr.DataArray - The boundary mask for the dataset, with dimensions - `('grid_index',)`. - - """ - return self.ds["boundary_mask"] - def get_xy(self, category: str, stacked: bool) -> ndarray: """Return the x, y coordinates of the dataset. diff --git a/tests/test_datastores.py b/tests/test_datastores.py index 4a4b1100..a91f6245 100644 --- a/tests/test_datastores.py +++ b/tests/test_datastores.py @@ -18,8 +18,6 @@ dataarray for the given category. - `get_dataarray` (method): Return the processed data (as a single `xr.DataArray`) for the given category and test/train/val-split. -- `boundary_mask` (property): Return the boundary mask for the dataset, - with spatial dimensions stacked. - `config` (property): Return the configuration of the datastore. In addition BaseRegularGridDatastore must have the following methods and @@ -213,25 +211,6 @@ def test_get_dataarray(datastore_name): assert n_features["train"] == n_features["val"] == n_features["test"] -@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -def test_boundary_mask(datastore_name): - """Check that the `datastore.boundary_mask` property is implemented and - that the returned object is an xarray DataArray with the correct shape.""" - datastore = init_datastore_example(datastore_name) - da_mask = datastore.boundary_mask - - assert isinstance(da_mask, xr.DataArray) - assert set(da_mask.dims) == {"grid_index"} - assert da_mask.dtype == "int" - assert set(da_mask.values) == {0, 1} - assert da_mask.sum() > 0 - assert da_mask.sum() < da_mask.size - - if isinstance(datastore, BaseRegularGridDatastore): - grid_shape = datastore.grid_shape_state - assert datastore.boundary_mask.size == grid_shape.x * grid_shape.y - - @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) def test_get_xy_extent(datastore_name): """Check that the `datastore.get_xy_extent` method is implemented and that From ae82cdb8360d899b063bdf48a877a42184306cab Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 19 Nov 2024 16:55:56 +0100 Subject: [PATCH 070/190] added gcsfs dependency for era5 weatherbench download --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index fdcb7f3e..38e7cb0e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ "torch-geometric==2.3.1", "parse>=1.20.2", "dataclass-wizard<0.31.0", + "gcsfs>=2021.10.0", "mllam-data-prep>=0.5.0", ] requires-python = ">=3.9" From 34a6cc7d24ffb218b2aef909cac7db06ffbef618 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 19 Nov 2024 16:57:57 +0100 Subject: [PATCH 071/190] added new era5 datastore config for boundary --- tests/conftest.py | 19 +++- .../mdp/era5_1000hPa_winds/.gitignore | 2 + .../mdp/era5_1000hPa_winds/config.yaml | 3 + .../era5_1000hPa_winds/era5.datastore.yaml | 90 +++++++++++++++++++ 4 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 tests/datastore_examples/mdp/era5_1000hPa_winds/.gitignore create mode 100644 tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml create mode 100644 tests/datastore_examples/mdp/era5_1000hPa_winds/era5.datastore.yaml diff --git a/tests/conftest.py b/tests/conftest.py index 6f579621..be5cf3e7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -94,6 +94,15 @@ def download_meps_example_reduced_dataset(): dummydata=None, ) +DATASTORES_BOUNDARY_EXAMPLES = dict( + mdp=( + DATASTORE_EXAMPLES_ROOT_PATH + / "mdp" + / "era5_1000hPa_winds" + / "era5.datastore.yaml" + ) +) + DATASTORES[DummyDatastore.SHORT_NAME] = DummyDatastore @@ -102,5 +111,13 @@ def init_datastore_example(datastore_kind): datastore_kind=datastore_kind, config_path=DATASTORES_EXAMPLES[datastore_kind], ) - return datastore + + +def init_datastore_boundary_example(datastore_kind): + datastore_boundary = init_datastore( + datastore_kind=datastore_kind, + config_path=DATASTORES_BOUNDARY_EXAMPLES[datastore_kind], + ) + + return datastore_boundary diff --git a/tests/datastore_examples/mdp/era5_1000hPa_winds/.gitignore b/tests/datastore_examples/mdp/era5_1000hPa_winds/.gitignore new file mode 100644 index 00000000..f2828f46 --- /dev/null +++ b/tests/datastore_examples/mdp/era5_1000hPa_winds/.gitignore @@ -0,0 +1,2 @@ +*.zarr/ +graph/ diff --git a/tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml b/tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml new file mode 100644 index 00000000..5d1e05f2 --- /dev/null +++ b/tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml @@ -0,0 +1,3 @@ +datastore: + kind: mdp + config_path: era5.datastore.yaml diff --git a/tests/datastore_examples/mdp/era5_1000hPa_winds/era5.datastore.yaml b/tests/datastore_examples/mdp/era5_1000hPa_winds/era5.datastore.yaml new file mode 100644 index 00000000..36b39501 --- /dev/null +++ b/tests/datastore_examples/mdp/era5_1000hPa_winds/era5.datastore.yaml @@ -0,0 +1,90 @@ +#TODO: What do these versions mean? Should they be updated? +schema_version: v0.2.0+dev +dataset_version: v1.0.0 + +output: + variables: + forcing: [time, grid_index, forcing_feature] + coord_ranges: + time: + start: 1990-09-02T00:00 + end: 1990-09-10T00:00 + step: PT6H + chunking: + time: 1 + splitting: + dim: time + splits: + train: + start: 1990-09-02T00:00 + end: 1990-09-07T00:00 + compute_statistics: + ops: [mean, std, diff_mean, diff_std] + dims: [grid_index, time] + val: + start: 1990-09-05T00:00 + end: 1990-09-08T00:00 + test: + start: 1990-09-06T00:00 + end: 1990-09-10T00:00 + +inputs: + era_height_levels: + path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721_with_derived_variables.zarr' + dims: [time, longitude, latitude, level] + variables: + u_component_of_wind: + level: + values: [1000,] + units: hPa + v_component_of_wind: + level: + values: [1000, ] + units: hPa + dim_mapping: + time: + method: rename + dim: time + x: + method: rename + dim: longitude + y: + method: rename + dim: latitude + forcing_feature: + method: stack_variables_by_var_name + dims: [level] + name_format: "{var_name}{level}hPa" + grid_index: + method: stack + dims: [x, y] + target_output_variable: forcing + + era5_surface: + path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721_with_derived_variables.zarr' + dims: [time, longitude, latitude, level] + variables: + - mean_surface_net_short_wave_radiation_flux + dim_mapping: + time: + method: rename + dim: time + x: + method: rename + dim: longitude + y: + method: rename + dim: latitude + forcing_feature: + method: stack_variables_by_var_name + name_format: "{var_name}" + grid_index: + method: stack + dims: [x, y] + target_output_variable: forcing + +extra: + projection: + class_name: PlateCarree + kwargs: + central_longitude: 0.0 From 2dc67a02e2acad0665452bfe336384de1cc34b4e Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 19 Nov 2024 16:58:36 +0100 Subject: [PATCH 072/190] removed left-over boundary-mask references --- neural_lam/datastore/mdp.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index e662cb63..b28d2650 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -27,11 +27,10 @@ class MDPDatastore(BaseRegularGridDatastore): SHORT_NAME = "mdp" - def __init__(self, config_path, n_boundary_points=0, reuse_existing=True): + def __init__(self, config_path, reuse_existing=True): """ Construct a new MDPDatastore from the configuration file at - `config_path`. A boundary mask is created with `n_boundary_points` - boundary points. If `reuse_existing` is True, the dataset is loaded + `config_path`. If `reuse_existing` is True, the dataset is loaded from a zarr file if it exists (unless the config has been modified since the zarr was created), otherwise it is created from the configuration file. @@ -42,8 +41,6 @@ def __init__(self, config_path, n_boundary_points=0, reuse_existing=True): The path to the configuration file, this will be fed to the `mllam_data_prep.Config.from_yaml_file` method to then call `mllam_data_prep.create_dataset` to create the dataset. - n_boundary_points : int - The number of boundary points to use in the boundary mask. reuse_existing : bool Whether to reuse an existing dataset zarr file if it exists and its creation date is newer than the configuration file. @@ -70,7 +67,6 @@ def __init__(self, config_path, n_boundary_points=0, reuse_existing=True): if self._ds is None: self._ds = mdp.create_dataset(config=self._config) self._ds.to_zarr(fp_ds) - self._n_boundary_points = n_boundary_points print("The loaded datastore contains the following features:") for category in ["state", "forcing", "static"]: From 9f8628e03487a80ab3313656857b5fde3e6fde45 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 19 Nov 2024 16:59:12 +0100 Subject: [PATCH 073/190] make check for existing category in datastore more flexible (for boundary) --- neural_lam/datastore/mdp.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index b28d2650..7b947c20 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -154,8 +154,8 @@ def get_vars_units(self, category: str) -> List[str]: The units of the variables in the given category. """ - if category not in self._ds and category == "forcing": - warnings.warn("no forcing data found in datastore") + if category not in self._ds: + warnings.warn(f"no {category} data found in datastore") return [] return self._ds[f"{category}_feature_units"].values.tolist() @@ -173,8 +173,8 @@ def get_vars_names(self, category: str) -> List[str]: The names of the variables in the given category. """ - if category not in self._ds and category == "forcing": - warnings.warn("no forcing data found in datastore") + if category not in self._ds: + warnings.warn(f"no {category} data found in datastore") return [] return self._ds[f"{category}_feature"].values.tolist() @@ -193,8 +193,8 @@ def get_vars_long_names(self, category: str) -> List[str]: The long names of the variables in the given category. """ - if category not in self._ds and category == "forcing": - warnings.warn("no forcing data found in datastore") + if category not in self._ds: + warnings.warn(f"no {category} data found in datastore") return [] return self._ds[f"{category}_feature_long_name"].values.tolist() @@ -249,9 +249,9 @@ def get_dataarray(self, category: str, split: str) -> xr.DataArray: The xarray DataArray object with processed dataset. """ - if category not in self._ds and category == "forcing": - warnings.warn("no forcing data found in datastore") - return None + if category not in self._ds: + warnings.warn(f"no {category} data found in datastore") + return [] da_category = self._ds[category] From 388c79df3fdbbaa24ef025621a09dd25ac567ac5 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Wed, 20 Nov 2024 16:00:15 +0100 Subject: [PATCH 074/190] implement xarray based (mostly) time slicing and windowing --- neural_lam/weather_dataset.py | 255 +++++++++++++++------------------- 1 file changed, 111 insertions(+), 144 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 5559e838..555f2c35 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -64,10 +64,16 @@ def __init__( self.da_state = self.datastore.get_dataarray( category="state", split=self.split ) + if self.da_state is None: + raise ValueError( + "A non-empty state dataarray must be provided. " + "The datastore.get_dataarray() returned None or empty array " + "for category='state'" + ) self.da_forcing = self.datastore.get_dataarray( category="forcing", split=self.split ) - # XXX For now boundary data is always considered forcing data + # XXX For now boundary data is always considered mdp-forcing data self.da_boundary = self.datastore_boundary.get_dataarray( category="forcing", split=self.split ) @@ -102,53 +108,36 @@ def __init__( "the data in `BaseDatastore.get_dataarray`?" ) + def get_time_step(times): + """Calculate the time step from the data""" + time_diffs = np.diff(times) + if not np.all(time_diffs == time_diffs[0]): + raise ValueError( + "Inconsistent time steps in data. " + f"Found different time steps: {np.unique(time_diffs)}" + ) + return time_diffs[0] + + # Check time step consistency in state data + _ = get_time_step(self.da_state.time.values) + # Check time coverage for forcing and boundary data if self.da_forcing is not None or self.da_boundary is not None: state_times = self.da_state.time state_time_min = state_times.min().values state_time_max = state_times.max().values - def get_time_step(times): - """Calculate the time step from the data""" - time_diffs = np.diff(times) - if not np.all(time_diffs == time_diffs[0]): - raise ValueError( - "Inconsistent time steps in data. " - f"Found different time steps: {np.unique(time_diffs)}" - ) - return time_diffs[0] - if self.da_forcing is not None: + # Forcing data is part of the same datastore as state data + # During creation the time dimension of the forcing data + # is matched to the state data forcing_times = self.da_forcing.time - forcing_time_step = get_time_step(forcing_times.values) - forcing_time_min = forcing_times.min().values - forcing_time_max = forcing_times.max().values - - # Calculate required bounds for forcing using its time step - forcing_required_time_min = ( - state_time_min - - self.num_past_forcing_steps * forcing_time_step - ) - forcing_required_time_max = ( - state_time_max - + self.num_future_forcing_steps * forcing_time_step - ) - - if forcing_time_min > forcing_required_time_min: - raise ValueError( - f"Forcing data starts too late." - f"Required start: {forcing_required_time_min}, " - f"but forcing starts at {forcing_time_min}." - ) - - if forcing_time_max < forcing_required_time_max: - raise ValueError( - f"Forcing data ends too early." - f"Required end: {forcing_required_time_max}," - f"but forcing ends at {forcing_time_max}." - ) + _ = get_time_step(forcing_times.values) if self.da_boundary is not None: + # Boundary data is part of a separate datastore + # The boundary data is allowed to have a different time_step + # Check that the boundary data covers the required time range boundary_times = self.da_boundary.time boundary_time_step = get_time_step(boundary_times.values) boundary_time_min = boundary_times.min().values @@ -204,8 +193,8 @@ def get_time_step(times): category="forcing" ) ) - self.da_boundary_mean = self.ds_boundary_stats.boundary_mean - self.da_boundary_std = self.ds_boundary_stats.boundary_std + self.da_boundary_mean = self.ds_boundary_stats.forcing_mean + self.da_boundary_std = self.ds_boundary_stats.forcing_std def __len__(self): if self.datastore.is_forecast: @@ -253,7 +242,7 @@ def __len__(self): - self.num_future_forcing_steps ) - def _slice_time(self, da_state, da_forcing, idx, n_steps: int): + def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): """ Produce time slices of the given dataarrays `da_state` (state) and `da_forcing` (forcing). For the state data, slicing is done as before @@ -316,8 +305,13 @@ def _slice_time(self, da_state, da_forcing, idx, n_steps: int): ) da_state_sliced = da_state.isel(time=slice(start_idx, end_idx)) + if da_forcing is None: + return da_state_sliced, None + # Get the state times for matching state_times = da_state_sliced["time"] + # Calculate time differences in multiples of state time steps + state_time_step = state_times.values[1] - state_times.values[0] # Match forcing data to state times based on nearest neighbor if self.datastore.is_forecast: @@ -371,39 +365,80 @@ def _slice_time(self, da_state, da_forcing, idx, n_steps: int): da_forcing_matched = da_forcing_matched.assign_coords( time_diff=("time", time_diff_steps) ) - else: # For analysis data, match directly using the 'time' coordinate forcing_times = da_forcing["time"] # Compute time differences time_deltas = ( - forcing_times.values[:, np.newaxis] - - state_times.values[np.newaxis, :] + state_times.values[np.newaxis, :] + - forcing_times.values[:, np.newaxis] + ) + idx_min = np.abs(time_deltas).argmin(axis=0) + + time_diff_steps = xr.DataArray( + np.stack( + [ + np.diagonal(time_deltas, offset=offset)[ + -len(state_times) + init_steps : + ] + / state_time_step + for offset in range( + -self.num_past_forcing_steps, + self.num_future_forcing_steps + 1, + ) + ], + axis=1, + ), + dims=["time", "window"], + coords={ + "time": state_times.isel(time=slice(init_steps, None)), + "window": np.arange( + -self.num_past_forcing_steps, + self.num_future_forcing_steps + 1, + ), + }, + name="time_diff_steps", ) - time_diffs = np.abs(time_deltas) - idx_min = time_diffs.argmin(axis=0) - # Slice the forcing data using matched indices - da_forcing_matched = da_forcing.isel(time=idx_min) - da_forcing_matched = da_forcing_matched.assign_coords( - time=state_times + # Create window dimension using rolling + window_size = ( + self.num_past_forcing_steps + self.num_future_forcing_steps + 1 ) - - # Calculate time differences in multiples of state time steps - state_time_step = state_times.values[1] - state_times.values[0] - time_diff_steps = ( - time_deltas[idx_min, np.arange(len(state_times))] - / state_time_step + da_forcing_windowed = da_forcing.rolling( + time=window_size, center=True + ).construct(window_dim="window") + da_forcing_matched = da_forcing_windowed.isel( + time=idx_min[init_steps:] ) # Add time difference as a new coordinate da_forcing_matched = da_forcing_matched.assign_coords( - time_diff=("time", time_diff_steps) + time_diff=time_diff_steps ) return da_state_sliced, da_forcing_matched + def _process_windowed_data(self, da_windowed, da_state, da_target_times): + """Helper function to process windowed data after standardization.""" + stacked_dim = "forcing_feature_windowed" + if da_windowed is not None: + # Stack the 'feature' and 'window' dimensions + da_windowed = da_windowed.stack( + {stacked_dim: ("forcing_feature", "window")} + ) + else: + # Create empty DataArray with the correct dimensions and coordinates + return xr.DataArray( + data=np.empty((self.ar_steps, da_state.grid_index.size, 0)), + dims=("time", "grid_index", f"{stacked_dim}"), + coords={ + "time": da_target_times, + "grid_index": da_state.grid_index, + f"{stacked_dim}": [], + }, + ) + def _build_item_dataarrays(self, idx): """ Create the dataarrays for the initial states, target states and forcing @@ -459,18 +494,21 @@ def _build_item_dataarrays(self, idx): else: da_boundary = None - # handle time sampling in a way that is compatible with both analysis - # and forecast data - da_state = self._slice_time( - da_state=da_state, idx=idx, n_steps=self.ar_steps + # if da_forcing is None, the function will return None for + # da_forcing_windowed + da_state, da_forcing_windowed = self._slice_time( + da_state=da_state, + idx=idx, + n_steps=self.ar_steps, + da_forcing=da_forcing, ) - if da_forcing is not None: - da_forcing_windowed = self._slice_time( - da_forcing=da_forcing, idx=idx, n_steps=self.ar_steps - ) + if da_boundary is not None: - da_boundary_windowed = self._slice_time( - da_forcing=da_boundary, idx=idx, n_steps=self.ar_steps + _, da_boundary_windowed = self._slice_time( + da_state=da_state, + idx=idx, + n_steps=self.ar_steps, + da_forcing=da_boundary, ) # load the data into memory @@ -506,83 +544,12 @@ def _build_item_dataarrays(self, idx): da_boundary_windowed - self.da_boundary_mean ) / self.da_boundary_std - if da_forcing is not None: - # Expand 'time_diff' to align with 'forcing_feature' and 'window' - # dimensions 'time_diff' has dimension ('time'), expand to ('time', - # 'forcing_feature', 'window') - time_diff_expanded = da_forcing_windowed["time_diff"].expand_dims( - forcing_feature=da_forcing_windowed["forcing_feature"], - window=da_forcing_windowed["window"], - ) - - # Stack 'forcing_feature' and 'window' into a single - # 'forcing_feature_windowed' dimension - da_forcing_windowed = da_forcing_windowed.stack( - forcing_feature_windowed=("forcing_feature", "window") - ) - time_diff_expanded = time_diff_expanded.stack( - forcing_feature_windowed=("forcing_feature", "window") - ) - - # Assign 'time_diff' as a coordinate to 'forcing_feature_windowed' - da_forcing_windowed = da_forcing_windowed.assign_coords( - time_diff=( - "forcing_feature_windowed", - time_diff_expanded.values, - ) - ) - else: - # Create an empty forcing tensor with the right shape - da_forcing_windowed = xr.DataArray( - data=np.empty( - (self.ar_steps, da_state.grid_index.size, 0), - ), - dims=("time", "grid_index", "forcing_feature"), - coords={ - "time": da_target_times, - "grid_index": da_state.grid_index, - "forcing_feature": [], - }, - ) - - if da_boundary is not None: - # If 'da_boundary_windowed' also has 'time_diff', process similarly - # Expand 'time_diff' to align with 'boundary_feature' and 'window' - # dimensions - time_diff_expanded = da_boundary_windowed["time_diff"].expand_dims( - boundary_feature=da_boundary_windowed["boundary_feature"], - window=da_boundary_windowed["window"], - ) - - # Stack 'boundary_feature' and 'window' into a single - # 'boundary_feature_windowed' dimension - da_boundary_windowed = da_boundary_windowed.stack( - boundary_feature_windowed=("boundary_feature", "window") - ) - time_diff_expanded = time_diff_expanded.stack( - boundary_feature_windowed=("boundary_feature", "window") - ) - - # Assign 'time_diff' as a coordinate to 'boundary_feature_windowed' - da_boundary_windowed = da_boundary_windowed.assign_coords( - time_diff=( - "boundary_feature_windowed", - time_diff_expanded.values, - ) - ) - else: - # Create an empty boundary tensor with the right shape - da_boundary_windowed = xr.DataArray( - data=np.empty( - (self.ar_steps, da_state.grid_index.size, 0), - ), - dims=("time", "grid_index", "boundary_feature"), - coords={ - "time": da_target_times, - "grid_index": da_state.grid_index, - "boundary_feature": [], - }, - ) + da_forcing_windowed = self._process_windowed_data( + da_forcing_windowed, da_state, da_target_times + ) + da_boundary_windowed = self._process_windowed_data( + da_boundary_windowed, da_state, da_target_times + ) return ( da_init_states, From 2529969b12eb7babdcfd3311d6eae3045fe1fe15 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Thu, 21 Nov 2024 07:09:52 +0100 Subject: [PATCH 075/190] cleanup analysis based time-slicing --- neural_lam/weather_dataset.py | 85 +++++++++++++++++------------------ 1 file changed, 42 insertions(+), 43 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 555f2c35..fd40a2c8 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -245,11 +245,12 @@ def __len__(self): def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): """ Produce time slices of the given dataarrays `da_state` (state) and - `da_forcing` (forcing). For the state data, slicing is done as before - based on `idx`. For the forcing data, nearest neighbor matching is - performed based on the state times. Additionally, the time difference - between the matched forcing times and state times (in multiples of state - time steps) is added to the forcing dataarray. + `da_forcing` (forcing). For the state data, slicing is done based on + `idx`. For the forcing data, nearest neighbor matching is performed + based on the state times. Additionally, the time difference between the + matched forcing times and state times (in multiples of state time steps) + is added to the forcing dataarray. This will be used as an additional + feature in the model (temporal embedding). Parameters ---------- @@ -269,9 +270,8 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): The sliced state dataarray with dims ('time', 'grid_index', 'state_feature'). da_forcing_matched : xr.DataArray - The forcing dataarray matched to state times with an added - coordinate 'time_diff', representing the time difference to state - times in multiples of state time steps. + The sliced state dataarray with dims ('time', 'grid_index', + 'forcing_feature_windowed'). """ # Number of initial steps required (e.g., for initializing models) init_steps = 2 @@ -308,9 +308,9 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): if da_forcing is None: return da_state_sliced, None - # Get the state times for matching + # Get the state times and its temporal resolution for matching with + # forcing data state_times = da_state_sliced["time"] - # Calculate time differences in multiples of state time steps state_time_step = state_times.values[1] - state_times.values[0] # Match forcing data to state times based on nearest neighbor @@ -369,39 +369,29 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): # For analysis data, match directly using the 'time' coordinate forcing_times = da_forcing["time"] - # Compute time differences + # Compute time differences between forcing and state times + # (in multiples of state time steps) + # Retrieve the indices of the closest times in the forcing data time_deltas = ( - state_times.values[np.newaxis, :] - - forcing_times.values[:, np.newaxis] - ) + forcing_times.values[:, np.newaxis] + - state_times.values[np.newaxis, :] + ) / state_time_step idx_min = np.abs(time_deltas).argmin(axis=0) - time_diff_steps = xr.DataArray( - np.stack( - [ - np.diagonal(time_deltas, offset=offset)[ - -len(state_times) + init_steps : - ] - / state_time_step - for offset in range( - -self.num_past_forcing_steps, - self.num_future_forcing_steps + 1, - ) - ], - axis=1, - ), - dims=["time", "window"], - coords={ - "time": state_times.isel(time=slice(init_steps, None)), - "window": np.arange( - -self.num_past_forcing_steps, - self.num_future_forcing_steps + 1, - ), - }, - name="time_diff_steps", + time_diff_steps = np.stack( + [ + time_deltas[ + idx_i + - self.num_past_forcing_steps : idx_i + + self.num_future_forcing_steps + + 1, + init_steps + step_i, + ] + for (step_i, idx_i) in enumerate(idx_min[init_steps:]) + ], ) - # Create window dimension using rolling + # Create window dimension for forcing data to stack later window_size = ( self.num_past_forcing_steps + self.num_future_forcing_steps + 1 ) @@ -412,9 +402,11 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): time=idx_min[init_steps:] ) - # Add time difference as a new coordinate - da_forcing_matched = da_forcing_matched.assign_coords( - time_diff=time_diff_steps + # Add time difference as a new coordinate to concatenate to the + # forcing features later + da_forcing_matched["time_diff_steps"] = ( + ("time", "window"), + time_diff_steps, ) return da_state_sliced, da_forcing_matched @@ -423,13 +415,19 @@ def _process_windowed_data(self, da_windowed, da_state, da_target_times): """Helper function to process windowed data after standardization.""" stacked_dim = "forcing_feature_windowed" if da_windowed is not None: - # Stack the 'feature' and 'window' dimensions + # Stack the 'feature' and 'window' dimensions and add the + # time step differences to the existing features as a temporal + # embedding da_windowed = da_windowed.stack( {stacked_dim: ("forcing_feature", "window")} ) + da_windowed = xr.concat( + [da_windowed, da_windowed.time_diff_steps], + dim="forcing_feature_windowed", + ) else: # Create empty DataArray with the correct dimensions and coordinates - return xr.DataArray( + da_windowed = xr.DataArray( data=np.empty((self.ar_steps, da_state.grid_index.size, 0)), dims=("time", "grid_index", f"{stacked_dim}"), coords={ @@ -438,6 +436,7 @@ def _process_windowed_data(self, da_windowed, da_state, da_target_times): f"{stacked_dim}": [], }, ) + return da_windowed def _build_item_dataarrays(self, idx): """ From 179a035ac8b976a74e54ce4f38102addf06ed318 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 19 Nov 2024 16:59:42 +0100 Subject: [PATCH 076/190] implement datastore_boundary in existing tests --- tests/test_datasets.py | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 419aece0..67eac70e 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -14,12 +14,19 @@ from neural_lam.datastore.base import BaseRegularGridDatastore from neural_lam.models.graph_lam import GraphLAM from neural_lam.weather_dataset import WeatherDataset -from tests.conftest import init_datastore_example +from tests.conftest import ( + DATASTORES_BOUNDARY_EXAMPLES, + init_datastore_boundary_example, + init_datastore_example, +) from tests.dummy_datastore import DummyDatastore @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -def test_dataset_item_shapes(datastore_name): +@pytest.mark.parametrize( + "datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys() +) +def test_dataset_item_shapes(datastore_name, datastore_boundary_name): """Check that the `datastore.get_dataarray` method is implemented. Validate the shapes of the tensors match between the different @@ -31,6 +38,9 @@ def test_dataset_item_shapes(datastore_name): """ datastore = init_datastore_example(datastore_name) + datastore_boundary = init_datastore_boundary_example( + datastore_boundary_name + ) N_gridpoints = datastore.num_grid_points N_pred_steps = 4 @@ -38,6 +48,7 @@ def test_dataset_item_shapes(datastore_name): num_future_forcing_steps = 1 dataset = WeatherDataset( datastore=datastore, + datastore_boundary=datastore_boundary, split="train", ar_steps=N_pred_steps, num_past_forcing_steps=num_past_forcing_steps, @@ -48,7 +59,7 @@ def test_dataset_item_shapes(datastore_name): # unpack the item, this is the current return signature for # WeatherDataset.__getitem__ - init_states, target_states, forcing, target_times = item + init_states, target_states, forcing, boundary, target_times = item # initial states assert init_states.ndim == 3 @@ -81,14 +92,23 @@ def test_dataset_item_shapes(datastore_name): @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -def test_dataset_item_create_dataarray_from_tensor(datastore_name): +@pytest.mark.parametrize( + "datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys() +) +def test_dataset_item_create_dataarray_from_tensor( + datastore_name, datastore_boundary_name +): datastore = init_datastore_example(datastore_name) + datastore_boundary = init_datastore_boundary_example( + datastore_boundary_name + ) N_pred_steps = 4 num_past_forcing_steps = 1 num_future_forcing_steps = 1 dataset = WeatherDataset( datastore=datastore, + datastore_boundary=datastore_boundary, split="train", ar_steps=N_pred_steps, num_past_forcing_steps=num_past_forcing_steps, @@ -158,13 +178,19 @@ def test_dataset_item_create_dataarray_from_tensor(datastore_name): @pytest.mark.parametrize("split", ["train", "val", "test"]) @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -def test_single_batch(datastore_name, split): +@pytest.mark.parametrize( + "datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys() +) +def test_single_batch(datastore_name, datastore_boundary_name, split): """Check that the `datastore.get_dataarray` method is implemented. And that it returns an xarray DataArray with the correct dimensions. """ datastore = init_datastore_example(datastore_name) + datastore_boundary = init_datastore_boundary_example( + datastore_boundary_name + ) device_name = ( torch.device("cuda") if torch.cuda.is_available() else "cpu" @@ -210,7 +236,9 @@ def _create_graph(): ) ) - dataset = WeatherDataset(datastore=datastore, split=split, ar_steps=2) + dataset = WeatherDataset( + datastore=datastore, datastore_boundary=datastore_boundary, split=split + ) model = GraphLAM(args=args, datastore=datastore, config=config) # noqa From 2daeb1642d276730496cc7ab183203ed5abba6ce Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Thu, 21 Nov 2024 16:39:27 +0100 Subject: [PATCH 077/190] allow for grid shape retrieval from forcing data --- neural_lam/datastore/mdp.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 7b947c20..809bbdb8 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -380,8 +380,17 @@ def grid_shape_state(self): The shape of the cartesian grid for the state variables. """ - ds_state = self.unstack_grid_coords(self._ds["state"]) - da_x, da_y = ds_state.x, ds_state.y + # Boundary data often has no state features + if "state" not in self._ds: + warnings.warn( + "no state data found in datastore" + "returning grid shape from forcing data" + ) + ds_forcing = self.unstack_grid_coords(self._ds["forcing"]) + da_x, da_y = ds_forcing.x, ds_forcing.y + else: + ds_state = self.unstack_grid_coords(self._ds["state"]) + da_x, da_y = ds_state.x, ds_state.y assert da_x.ndim == da_y.ndim == 1 return CartesianGridShape(x=da_x.size, y=da_y.size) From cbcdcaee71039977090a66ec2b8b1116063cf2a4 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Thu, 21 Nov 2024 16:40:47 +0100 Subject: [PATCH 078/190] rearrange time slicing, boundary first --- neural_lam/weather_dataset.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index fd40a2c8..f172d47f 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -495,13 +495,6 @@ def _build_item_dataarrays(self, idx): # if da_forcing is None, the function will return None for # da_forcing_windowed - da_state, da_forcing_windowed = self._slice_time( - da_state=da_state, - idx=idx, - n_steps=self.ar_steps, - da_forcing=da_forcing, - ) - if da_boundary is not None: _, da_boundary_windowed = self._slice_time( da_state=da_state, @@ -509,6 +502,12 @@ def _build_item_dataarrays(self, idx): n_steps=self.ar_steps, da_forcing=da_boundary, ) + da_state, da_forcing_windowed = self._slice_time( + da_state=da_state, + idx=idx, + n_steps=self.ar_steps, + da_forcing=da_forcing, + ) # load the data into memory da_state.load() From e6ace2727038d5a472a18e7eab7e6a26b6362fbb Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:42:05 +0100 Subject: [PATCH 079/190] renaming test datastores --- tests/datastore_examples/.gitignore | 3 +- .../.gitignore | 0 .../era5_1000hPa_danra_100m_winds/config.yaml | 12 +++ .../danra.datastore.yaml | 99 +++++++++++++++++++ .../era5.datastore.yaml | 23 ++--- .../mdp/era5_1000hPa_winds/config.yaml | 3 - 6 files changed, 122 insertions(+), 18 deletions(-) rename tests/datastore_examples/mdp/{era5_1000hPa_winds => era5_1000hPa_danra_100m_winds}/.gitignore (100%) create mode 100644 tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/config.yaml create mode 100644 tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/danra.datastore.yaml rename tests/datastore_examples/mdp/{era5_1000hPa_winds => era5_1000hPa_danra_100m_winds}/era5.datastore.yaml (80%) delete mode 100644 tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml diff --git a/tests/datastore_examples/.gitignore b/tests/datastore_examples/.gitignore index e84e6493..4fbd2326 100644 --- a/tests/datastore_examples/.gitignore +++ b/tests/datastore_examples/.gitignore @@ -1,2 +1,3 @@ npyfilesmeps/*.zip -npyfilesmeps/meps_example_reduced/ +npyfilesmeps/meps_example_reduced +npyfilesmeps/era5_1000hPa_temp_meps_example_reduced diff --git a/tests/datastore_examples/mdp/era5_1000hPa_winds/.gitignore b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/.gitignore similarity index 100% rename from tests/datastore_examples/mdp/era5_1000hPa_winds/.gitignore rename to tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/.gitignore diff --git a/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/config.yaml b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/config.yaml new file mode 100644 index 00000000..a158bee3 --- /dev/null +++ b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/config.yaml @@ -0,0 +1,12 @@ +datastore: + kind: mdp + config_path: danra.datastore.yaml +datastore_boundary: + kind: mdp + config_path: era5.datastore.yaml +training: + state_feature_weighting: + __config_class__: ManualStateFeatureWeighting + weights: + u100m: 1.0 + v100m: 1.0 diff --git a/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/danra.datastore.yaml b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/danra.datastore.yaml new file mode 100644 index 00000000..3edf1267 --- /dev/null +++ b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/danra.datastore.yaml @@ -0,0 +1,99 @@ +schema_version: v0.5.0 +dataset_version: v0.1.0 + +output: + variables: + static: [grid_index, static_feature] + state: [time, grid_index, state_feature] + forcing: [time, grid_index, forcing_feature] + coord_ranges: + time: + start: 1990-09-03T00:00 + end: 1990-09-09T00:00 + step: PT3H + chunking: + time: 1 + splitting: + dim: time + splits: + train: + start: 1990-09-03T00:00 + end: 1990-09-06T00:00 + compute_statistics: + ops: [mean, std, diff_mean, diff_std] + dims: [grid_index, time] + val: + start: 1990-09-06T00:00 + end: 1990-09-07T00:00 + test: + start: 1990-09-07T00:00 + end: 1990-09-09T00:00 + +inputs: + danra_height_levels: + path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/height_levels.zarr + dims: [time, x, y, altitude] + variables: + u: + altitude: + values: [100,] + units: m + v: + altitude: + values: [100, ] + units: m + dim_mapping: + time: + method: rename + dim: time + state_feature: + method: stack_variables_by_var_name + dims: [altitude] + name_format: "{var_name}{altitude}m" + grid_index: + method: stack + dims: [x, y] + target_output_variable: state + + danra_surface: + path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/single_levels.zarr + dims: [time, x, y] + variables: + # use surface incoming shortwave radiation as forcing + - swavr0m + dim_mapping: + time: + method: rename + dim: time + grid_index: + method: stack + dims: [x, y] + forcing_feature: + method: stack_variables_by_var_name + name_format: "{var_name}" + target_output_variable: forcing + + danra_lsm: + path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/lsm.zarr + dims: [x, y] + variables: + - lsm + dim_mapping: + grid_index: + method: stack + dims: [x, y] + static_feature: + method: stack_variables_by_var_name + name_format: "{var_name}" + target_output_variable: static + +extra: + projection: + class_name: LambertConformal + kwargs: + central_longitude: 25.0 + central_latitude: 56.7 + standard_parallels: [56.7, 56.7] + globe: + semimajor_axis: 6367470.0 + semiminor_axis: 6367470.0 diff --git a/tests/datastore_examples/mdp/era5_1000hPa_winds/era5.datastore.yaml b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml similarity index 80% rename from tests/datastore_examples/mdp/era5_1000hPa_winds/era5.datastore.yaml rename to tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml index 36b39501..c97da4bc 100644 --- a/tests/datastore_examples/mdp/era5_1000hPa_winds/era5.datastore.yaml +++ b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml @@ -1,5 +1,4 @@ -#TODO: What do these versions mean? Should they be updated? -schema_version: v0.2.0+dev +schema_version: v0.5.0 dataset_version: v1.0.0 output: @@ -7,8 +6,8 @@ output: forcing: [time, grid_index, forcing_feature] coord_ranges: time: - start: 1990-09-02T00:00 - end: 1990-09-10T00:00 + start: 1990-09-01T00:00 + end: 2022-09-30T00:00 step: PT6H chunking: time: 1 @@ -16,17 +15,17 @@ output: dim: time splits: train: - start: 1990-09-02T00:00 - end: 1990-09-07T00:00 + start: 1990-09-01T00:00 + end: 2022-09-30T00:00 compute_statistics: ops: [mean, std, diff_mean, diff_std] dims: [grid_index, time] val: - start: 1990-09-05T00:00 - end: 1990-09-08T00:00 + start: 1990-09-01T00:00 + end: 2022-09-30T00:00 test: - start: 1990-09-06T00:00 - end: 1990-09-10T00:00 + start: 1990-09-01T00:00 + end: 2022-09-30T00:00 inputs: era_height_levels: @@ -37,10 +36,6 @@ inputs: level: values: [1000,] units: hPa - v_component_of_wind: - level: - values: [1000, ] - units: hPa dim_mapping: time: method: rename diff --git a/tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml b/tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml deleted file mode 100644 index 5d1e05f2..00000000 --- a/tests/datastore_examples/mdp/era5_1000hPa_winds/config.yaml +++ /dev/null @@ -1,3 +0,0 @@ -datastore: - kind: mdp - config_path: era5.datastore.yaml From 42818f0e91ccebb03c506b00f42e05e7d8d6fdfa Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:44:15 +0100 Subject: [PATCH 080/190] adding num_past/future_boundary_step args --- neural_lam/train_model.py | 37 +++++++++++++++------------------ tests/test_datasets.py | 43 +++++++++++++++++++++++++++++++++------ tests/test_training.py | 24 ++++++++++++++++++++-- 3 files changed, 75 insertions(+), 29 deletions(-) diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index 37bf6db7..2a61e86c 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -34,11 +34,6 @@ def main(input_args=None): type=str, help="Path to the configuration for neural-lam", ) - parser.add_argument( - "--config_path_boundary", - type=str, - help="Path to the configuration for boundary conditions", - ) parser.add_argument( "--model", type=str, @@ -208,6 +203,18 @@ def main(input_args=None): default=1, help="Number of future time steps to use as input for forcing data", ) + parser.add_argument( + "--num_past_boundary_steps", + type=int, + default=1, + help="Number of past time steps to use as input for boundary data", + ) + parser.add_argument( + "--num_future_boundary_steps", + type=int, + default=1, + help="Number of future time steps to use as input for boundary data", + ) args = parser.parse_args(input_args) args.var_leads_metrics_watch = { int(k): v for k, v in json.loads(args.var_leads_metrics_watch).items() @@ -217,9 +224,6 @@ def main(input_args=None): assert ( args.config_path is not None ), "Specify your config with --config_path" - assert ( - args.config_path_boundary is not None - ), "Specify your config with --config_path_boundary" assert args.model in MODELS, f"Unknown model: {args.model}" assert args.eval in ( None, @@ -234,21 +238,10 @@ def main(input_args=None): seed.seed_everything(args.seed) # Load neural-lam configuration and datastore to use - config, datastore = load_config_and_datastore(config_path=args.config_path) - config_boundary, datastore_boundary = load_config_and_datastore( - config_path=args.config_path_boundary + config, datastore, datastore_boundary = load_config_and_datastore( + config_path=args.config_path ) - # TODO this should not be required, make more flexible - assert ( - datastore.num_past_forcing_steps - == datastore_boundary.num_past_forcing_steps - ), "Mismatch in num_past_forcing_steps" - assert ( - datastore.num_future_forcing_steps - == datastore_boundary.num_future_forcing_steps - ), "Mismatch in num_future_forcing_steps" - # Create datamodule data_module = WeatherDataModule( datastore=datastore, @@ -258,6 +251,8 @@ def main(input_args=None): standardize=True, num_past_forcing_steps=args.num_past_forcing_steps, num_future_forcing_steps=args.num_future_forcing_steps, + num_past_boundary_steps=args.num_past_boundary_steps, + num_future_boundary_steps=args.num_future_boundary_steps, batch_size=args.batch_size, num_workers=args.num_workers, ) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 67eac70e..5fbe4a5d 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -42,10 +42,13 @@ def test_dataset_item_shapes(datastore_name, datastore_boundary_name): datastore_boundary_name ) N_gridpoints = datastore.num_grid_points + N_gridpoints_boundary = datastore_boundary.num_grid_points N_pred_steps = 4 num_past_forcing_steps = 1 num_future_forcing_steps = 1 + num_past_boundary_steps = 1 + num_future_boundary_steps = 1 dataset = WeatherDataset( datastore=datastore, datastore_boundary=datastore_boundary, @@ -53,6 +56,8 @@ def test_dataset_item_shapes(datastore_name, datastore_boundary_name): ar_steps=N_pred_steps, num_past_forcing_steps=num_past_forcing_steps, num_future_forcing_steps=num_future_forcing_steps, + num_past_boundary_steps=num_past_boundary_steps, + num_future_boundary_steps=num_future_boundary_steps, ) item = dataset[0] @@ -77,8 +82,23 @@ def test_dataset_item_shapes(datastore_name, datastore_boundary_name): assert forcing.ndim == 3 assert forcing.shape[0] == N_pred_steps assert forcing.shape[1] == N_gridpoints - assert forcing.shape[2] == datastore.get_num_data_vars("forcing") * ( - num_past_forcing_steps + num_future_forcing_steps + 1 + # each stacked forcing feature has one corresponding temporal embedding + assert ( + forcing.shape[2] + == datastore.get_num_data_vars("forcing") + * (num_past_forcing_steps + num_future_forcing_steps + 1) + * 2 + ) + + # boundary + assert boundary.ndim == 3 + assert boundary.shape[0] == N_pred_steps + assert boundary.shape[1] == N_gridpoints_boundary + assert ( + boundary.shape[2] + == datastore_boundary.get_num_data_vars("forcing") + * (num_past_boundary_steps + num_future_boundary_steps + 1) + * 2 ) # batch times @@ -88,6 +108,7 @@ def test_dataset_item_shapes(datastore_name, datastore_boundary_name): # try to get the last item of the dataset to ensure slicing and stacking # operations are working as expected and are consistent with the dataset # length + dataset[len(dataset) - 1] @@ -106,6 +127,9 @@ def test_dataset_item_create_dataarray_from_tensor( N_pred_steps = 4 num_past_forcing_steps = 1 num_future_forcing_steps = 1 + num_past_boundary_steps = 1 + num_future_boundary_steps = 1 + dataset = WeatherDataset( datastore=datastore, datastore_boundary=datastore_boundary, @@ -113,16 +137,22 @@ def test_dataset_item_create_dataarray_from_tensor( ar_steps=N_pred_steps, num_past_forcing_steps=num_past_forcing_steps, num_future_forcing_steps=num_future_forcing_steps, + num_past_boundary_steps=num_past_boundary_steps, + num_future_boundary_steps=num_future_boundary_steps, ) idx = 0 # unpack the item, this is the current return signature for # WeatherDataset.__getitem__ - _, target_states, _, target_times_arr = dataset[idx] - _, da_target_true, _, da_target_times_true = dataset._build_item_dataarrays( - idx=idx - ) + _, target_states, _, _, target_times_arr = dataset[idx] + ( + _, + da_target_true, + _, + _, + da_target_times_true, + ) = dataset._build_item_dataarrays(idx=idx) target_times = np.array(target_times_arr, dtype="datetime64[ns]") np.testing.assert_equal(target_times, da_target_times_true.values) @@ -272,6 +302,7 @@ def test_dataset_length(dataset_config): dataset = WeatherDataset( datastore=datastore, + datastore_boundary=None, split="train", ar_steps=dataset_config["ar_steps"], num_past_forcing_steps=dataset_config["past"], diff --git a/tests/test_training.py b/tests/test_training.py index 1ed1847d..28566a4b 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -14,18 +14,33 @@ from neural_lam.datastore.base import BaseRegularGridDatastore from neural_lam.models.graph_lam import GraphLAM from neural_lam.weather_dataset import WeatherDataModule -from tests.conftest import init_datastore_example +from tests.conftest import ( + DATASTORES_BOUNDARY_EXAMPLES, + init_datastore_boundary_example, + init_datastore_example, +) @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -def test_training(datastore_name): +@pytest.mark.parametrize( + "datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys() +) +def test_training(datastore_name, datastore_boundary_name): datastore = init_datastore_example(datastore_name) + datastore_boundary = init_datastore_boundary_example( + datastore_boundary_name + ) if not isinstance(datastore, BaseRegularGridDatastore): pytest.skip( f"Skipping test for {datastore_name} as it is not a regular " "grid datastore." ) + if not isinstance(datastore_boundary, BaseRegularGridDatastore): + pytest.skip( + f"Skipping test for {datastore_boundary_name} as it is not a regular " + "grid datastore." + ) if torch.cuda.is_available(): device_name = "cuda" @@ -59,6 +74,7 @@ def test_training(datastore_name): data_module = WeatherDataModule( datastore=datastore, + datastore_boundary=datastore_boundary, ar_steps_train=3, ar_steps_eval=5, standardize=True, @@ -66,6 +82,8 @@ def test_training(datastore_name): num_workers=1, num_past_forcing_steps=1, num_future_forcing_steps=1, + num_past_boundary_steps=1, + num_future_boundary_steps=1, ) class ModelArgs: @@ -85,6 +103,8 @@ class ModelArgs: metrics_watch = [] num_past_forcing_steps = 1 num_future_forcing_steps = 1 + num_past_boundary_steps = 1 + num_future_boundary_steps = 1 model_args = ModelArgs() From 0103b6e70927cb53e59b77c30245d3fa8139f8ed Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:44:51 +0100 Subject: [PATCH 081/190] using combined config file --- neural_lam/config.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/neural_lam/config.py b/neural_lam/config.py index d3e09697..914ebb38 100644 --- a/neural_lam/config.py +++ b/neural_lam/config.py @@ -168,4 +168,15 @@ def load_config_and_datastore( datastore_kind=config.datastore.kind, config_path=datastore_config_path ) - return config, datastore + if config.datastore_boundary is not None: + datastore_boundary_config_path = ( + Path(config_path).parent / config.datastore_boundary.config_path + ) + datastore_boundary = init_datastore( + datastore_kind=config.datastore_boundary.kind, + config_path=datastore_boundary_config_path, + ) + else: + datastore_boundary = None + + return config, datastore, datastore_boundary From 089634447df0c2704670df900fc4733a727fce38 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:45:12 +0100 Subject: [PATCH 082/190] proper handling of state/forcing/boundary in dataset --- neural_lam/weather_dataset.py | 304 +++++++++++++++++++--------------- 1 file changed, 167 insertions(+), 137 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index f172d47f..7dbe0567 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -38,6 +38,16 @@ class WeatherDataset(torch.utils.data.Dataset): forcing from times t, t+1, ..., t+j-1, t+j (and potentially times before t, given num_past_forcing_steps) are included as forcing inputs at time t. Default is 1. + num_past_boundary_steps: int, optional + Number of past time steps to include in boundary input. If set to i, + boundary from times t-i, t-i+1, ..., t-1, t (and potentially beyond, + given num_future_forcing_steps) are included as boundary inputs at time t + Default is 1. + num_future_boundary_steps: int, optional + Number of future time steps to include in boundary input. If set to j, + boundary from times t, t+1, ..., t+j-1, t+j (and potentially times before + t, given num_past_forcing_steps) are included as boundary inputs at time + t. Default is 1. standardize : bool, optional Whether to standardize the data. Default is True. """ @@ -50,6 +60,8 @@ def __init__( ar_steps=3, num_past_forcing_steps=1, num_future_forcing_steps=1, + num_past_boundary_steps=1, + num_future_boundary_steps=1, standardize=True, ): super().__init__() @@ -60,10 +72,10 @@ def __init__( self.datastore_boundary = datastore_boundary self.num_past_forcing_steps = num_past_forcing_steps self.num_future_forcing_steps = num_future_forcing_steps + self.num_past_boundary_steps = num_past_boundary_steps + self.num_future_boundary_steps = num_future_boundary_steps - self.da_state = self.datastore.get_dataarray( - category="state", split=self.split - ) + self.da_state = self.datastore.get_dataarray(category="state", split=self.split) if self.da_state is None: raise ValueError( "A non-empty state dataarray must be provided. " @@ -74,9 +86,12 @@ def __init__( category="forcing", split=self.split ) # XXX For now boundary data is always considered mdp-forcing data - self.da_boundary = self.datastore_boundary.get_dataarray( - category="forcing", split=self.split - ) + if self.datastore_boundary is not None: + self.da_boundary = self.datastore_boundary.get_dataarray( + category="forcing", split=self.split + ) + else: + self.da_boundary = None # check that with the provided data-arrays and ar_steps that we have a # non-zero amount of samples @@ -97,9 +112,7 @@ def __init__( parts["forcing"] = self.da_forcing for part, da in parts.items(): - expected_dim_order = self.datastore.expected_dim_order( - category=part - ) + expected_dim_order = self.datastore.expected_dim_order(category=part) if da.dims != expected_dim_order: raise ValueError( f"The dimension order of the `{part}` data ({da.dims}) " @@ -108,6 +121,23 @@ def __init__( "the data in `BaseDatastore.get_dataarray`?" ) + # handling ensemble data + if self.datastore.is_ensemble: + # for the now the strategy is to only include the first ensemble + # member + # XXX: this could be changed to include all ensemble members by + # splitting `idx` into two parts, one for the analysis time and one + # for the ensemble member and then increasing self.__len__ to + # include all ensemble members + warnings.warn( + "only use of ensemble member 0 (the first member) is " + "implemented for ensemble data" + ) + i_ensemble = 0 + self.da_state = self.da_state.isel(ensemble_member=i_ensemble) + else: + self.da_state = self.da_state + def get_time_step(times): """Calculate the time step from the data""" time_diffs = np.diff(times) @@ -119,11 +149,18 @@ def get_time_step(times): return time_diffs[0] # Check time step consistency in state data - _ = get_time_step(self.da_state.time.values) + if self.datastore.is_forecast: + state_times = self.da_state.analysis_time + else: + state_times = self.da_state.time + _ = get_time_step(state_times) # Check time coverage for forcing and boundary data if self.da_forcing is not None or self.da_boundary is not None: - state_times = self.da_state.time + if self.datastore.is_forecast: + state_times = self.da_state.analysis_time + else: + state_times = self.da_state.time state_time_min = state_times.min().values state_time_max = state_times.max().values @@ -131,26 +168,30 @@ def get_time_step(times): # Forcing data is part of the same datastore as state data # During creation the time dimension of the forcing data # is matched to the state data - forcing_times = self.da_forcing.time - _ = get_time_step(forcing_times.values) + if self.datastore.is_forecast: + forcing_times = self.da_forcing.analysis_time + else: + forcing_times = self.da_forcing.time + get_time_step(forcing_times.values) if self.da_boundary is not None: # Boundary data is part of a separate datastore # The boundary data is allowed to have a different time_step # Check that the boundary data covers the required time range - boundary_times = self.da_boundary.time + if self.datastore_boundary.is_forecast: + boundary_times = self.da_boundary.analysis_time + else: + boundary_times = self.da_boundary.time boundary_time_step = get_time_step(boundary_times.values) boundary_time_min = boundary_times.min().values boundary_time_max = boundary_times.max().values # Calculate required bounds for boundary using its time step boundary_required_time_min = ( - state_time_min - - self.num_past_forcing_steps * boundary_time_step + state_time_min - self.num_past_forcing_steps * boundary_time_step ) boundary_required_time_max = ( - state_time_max - + self.num_future_forcing_steps * boundary_time_step + state_time_max + self.num_future_forcing_steps * boundary_time_step ) if boundary_time_min > boundary_required_time_min: @@ -179,10 +220,8 @@ def get_time_step(times): self.da_state_std = self.ds_state_stats.state_std if self.da_forcing is not None: - self.ds_forcing_stats = ( - self.datastore.get_standardization_dataarray( - category="forcing" - ) + self.ds_forcing_stats = self.datastore.get_standardization_dataarray( + category="forcing" ) self.da_forcing_mean = self.ds_forcing_stats.forcing_mean self.da_forcing_std = self.ds_forcing_stats.forcing_std @@ -208,7 +247,7 @@ def __len__(self): warnings.warn( "only using first ensemble member, so dataset size is " " effectively reduced by the number of ensemble members " - f"({self.da_state.ensemble_member.size})", + f"({self.datastore._num_ensemble_members})", UserWarning, ) @@ -242,36 +281,50 @@ def __len__(self): - self.num_future_forcing_steps ) - def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): + def _slice_time( + self, + da_state, + idx, + n_steps: int, + da_forcing_boundary=None, + num_past_steps=None, + num_future_steps=None, + ): """ Produce time slices of the given dataarrays `da_state` (state) and - `da_forcing` (forcing). For the state data, slicing is done based on - `idx`. For the forcing data, nearest neighbor matching is performed - based on the state times. Additionally, the time difference between the - matched forcing times and state times (in multiples of state time steps) - is added to the forcing dataarray. This will be used as an additional - feature in the model (temporal embedding). + `da_forcing_boundary`. For the state data, slicing is done + based on `idx`. For the forcing/boundary data, nearest neighbor matching + is performed based on the state times. Additionally, the time difference + between the matched forcing/boundary times and state times (in multiples + of state time steps) is added to the forcing dataarray. This will be + used as an additional feature in the model (temporal embedding). Parameters ---------- da_state : xr.DataArray The state dataarray to slice. - da_forcing : xr.DataArray - The forcing dataarray to slice. idx : int The index of the time step to start the sample from in the state data. n_steps : int The number of time steps to include in the sample. + da_forcing_boundary : xr.DataArray + The forcing/boundary dataarray to slice. + num_past_steps : int, optional + The number of past time steps to include in the forcing/boundary + data. Default is `None`. + num_future_steps : int, optional + The number of future time steps to include in the forcing/boundary + data. Default is `None`. Returns ------- da_state_sliced : xr.DataArray The sliced state dataarray with dims ('time', 'grid_index', 'state_feature'). - da_forcing_matched : xr.DataArray + da_forcing_boundary_matched : xr.DataArray The sliced state dataarray with dims ('time', 'grid_index', - 'forcing_feature_windowed'). + 'forcing/boundary_feature_windowed'). """ # Number of initial steps required (e.g., for initializing models) init_steps = 2 @@ -279,8 +332,8 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): # Slice the state data as before if self.datastore.is_forecast: # Calculate start and end indices for slicing - start_idx = max(0, self.num_past_forcing_steps - init_steps) - end_idx = max(init_steps, self.num_past_forcing_steps) + n_steps + start_idx = max(0, num_past_steps - init_steps) + end_idx = max(init_steps, num_past_steps) + n_steps # Slice the state data over the elapsed forecast duration da_state_sliced = da_state.isel( @@ -299,13 +352,11 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): else: # For analysis data, slice the time dimension directly - start_idx = idx + max(0, self.num_past_forcing_steps - init_steps) - end_idx = ( - idx + max(init_steps, self.num_past_forcing_steps) + n_steps - ) + start_idx = idx + max(0, num_past_steps - init_steps) + end_idx = idx + max(init_steps, num_past_steps) + n_steps da_state_sliced = da_state.isel(time=slice(start_idx, end_idx)) - if da_forcing is None: + if da_forcing_boundary is None: return da_state_sliced, None # Get the state times and its temporal resolution for matching with @@ -313,78 +364,66 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): state_times = da_state_sliced["time"] state_time_step = state_times.values[1] - state_times.values[0] - # Match forcing data to state times based on nearest neighbor - if self.datastore.is_forecast: - # Calculate all possible forcing times - forcing_times = ( - da_forcing.analysis_time + da_forcing.elapsed_forecast_duration - ) - forcing_times_flat = forcing_times.stack( - forecast_time=("analysis_time", "elapsed_forecast_duration") - ) + if "analysis_time" in da_forcing_boundary.dims: + idx = np.abs( + da_forcing_boundary.analysis_time.values + - self.da_state.analysis_time.values[idx] + ).argmin() + # Add a 'time' dimension using the actual forecast times + offset = max(init_steps, num_past_steps) + da_list = [] + for step in range(n_steps): + start_idx = offset + step - num_past_steps + end_idx = offset + step + num_future_steps + + current_time = ( + da_forcing_boundary.analysis_time[idx] + + da_forcing_boundary.elapsed_forecast_duration[offset + step] + ) - # Compute time differences - time_deltas = ( - forcing_times_flat.values[:, np.newaxis] - - state_times.values[np.newaxis, :] - ) - time_diffs = np.abs(time_deltas) - idx_min = time_diffs.argmin(axis=0) - - # Retrieve corresponding indices for analysis_time and - # elapsed_forecast_duration - forecast_time_index = forcing_times_flat["forecast_time"][idx_min] - analysis_time_indices = forecast_time_index["analysis_time"] - elapsed_forecast_duration_indices = forecast_time_index[ - "elapsed_forecast_duration" - ] - - # Slice the forcing data using matched indices - da_forcing_matched = da_forcing.isel( - analysis_time=("time", analysis_time_indices), - elapsed_forecast_duration=( - "time", - elapsed_forecast_duration_indices, - ), - ) + da_sliced = da_forcing_boundary.isel( + analysis_time=idx, + elapsed_forecast_duration=slice(start_idx, end_idx + 1), + ) - # Assign matched state times to the forcing data - da_forcing_matched["time"] = state_times - da_forcing_matched = da_forcing_matched.swap_dims( - {"elapsed_forecast_duration": "time"} - ) + da_sliced = da_sliced.rename({"elapsed_forecast_duration": "window"}) + da_sliced = da_sliced.assign_coords( + window=np.arange(-num_past_steps, num_future_steps + 1) + ) - # Calculate time differences in multiples of state time steps - state_time_step = state_times.values[1] - state_times.values[0] - time_diff_steps = ( - time_deltas[idx_min, np.arange(len(state_times))] - / state_time_step - ) + da_sliced = da_sliced.expand_dims(dim={"time": [current_time.values]}) + + da_list.append(da_sliced) - # Add time difference as a new coordinate - da_forcing_matched = da_forcing_matched.assign_coords( - time_diff=("time", time_diff_steps) + # Concatenate the list of DataArrays along the 'time' dimension + da_forcing_boundary_matched = xr.concat(da_list, dim="time") + forcing_time_step = ( + da_forcing_boundary_matched.time.values[1] + - da_forcing_boundary_matched.time.values[0] ) + da_forcing_boundary_matched["window"] = da_forcing_boundary_matched["window"] * ( + forcing_time_step / state_time_step + ) + time_diff_steps = da_forcing_boundary_matched.isel( + grid_index=0, forcing_feature=0 + ).data + else: # For analysis data, match directly using the 'time' coordinate - forcing_times = da_forcing["time"] + forcing_times = da_forcing_boundary["time"] # Compute time differences between forcing and state times # (in multiples of state time steps) # Retrieve the indices of the closest times in the forcing data time_deltas = ( - forcing_times.values[:, np.newaxis] - - state_times.values[np.newaxis, :] + forcing_times.values[:, np.newaxis] - state_times.values[np.newaxis, :] ) / state_time_step idx_min = np.abs(time_deltas).argmin(axis=0) time_diff_steps = np.stack( [ time_deltas[ - idx_i - - self.num_past_forcing_steps : idx_i - + self.num_future_forcing_steps - + 1, + idx_i - num_past_steps : idx_i + num_future_steps + 1, init_steps + step_i, ] for (step_i, idx_i) in enumerate(idx_min[init_steps:]) @@ -392,24 +431,22 @@ def _slice_time(self, da_state, idx, n_steps: int, da_forcing=None): ) # Create window dimension for forcing data to stack later - window_size = ( - self.num_past_forcing_steps + self.num_future_forcing_steps + 1 - ) - da_forcing_windowed = da_forcing.rolling( - time=window_size, center=True + window_size = num_past_steps + num_future_steps + 1 + da_forcing_boundary_windowed = da_forcing_boundary.rolling( + time=window_size, center=False ).construct(window_dim="window") - da_forcing_matched = da_forcing_windowed.isel( + da_forcing_boundary_matched = da_forcing_boundary_windowed.isel( time=idx_min[init_steps:] ) - # Add time difference as a new coordinate to concatenate to the - # forcing features later - da_forcing_matched["time_diff_steps"] = ( - ("time", "window"), - time_diff_steps, - ) + # Add time difference as a new coordinate to concatenate to the + # forcing features later + da_forcing_boundary_matched["time_diff_steps"] = ( + ("time", "window"), + time_diff_steps, + ) - return da_state_sliced, da_forcing_matched + return da_state_sliced, da_forcing_boundary_matched def _process_windowed_data(self, da_windowed, da_state, da_target_times): """Helper function to process windowed data after standardization.""" @@ -462,23 +499,7 @@ def _build_item_dataarrays(self, idx): da_target_times : xr.DataArray The dataarray for the target times. """ - # handling ensemble data - if self.datastore.is_ensemble: - # for the now the strategy is to only include the first ensemble - # member - # XXX: this could be changed to include all ensemble members by - # splitting `idx` into two parts, one for the analysis time and one - # for the ensemble member and then increasing self.__len__ to - # include all ensemble members - warnings.warn( - "only use of ensemble member 0 (the first member) is " - "implemented for ensemble data" - ) - i_ensemble = 0 - da_state = self.da_state.isel(ensemble_member=i_ensemble) - else: - da_state = self.da_state - + da_state = self.da_state if self.da_forcing is not None: if "ensemble_member" in self.da_forcing.dims: raise NotImplementedError( @@ -500,13 +521,19 @@ def _build_item_dataarrays(self, idx): da_state=da_state, idx=idx, n_steps=self.ar_steps, - da_forcing=da_boundary, + da_forcing_boundary=da_boundary, + num_future_steps=self.num_future_boundary_steps, + num_past_steps=self.num_past_boundary_steps, ) + else: + da_boundary_windowed = None da_state, da_forcing_windowed = self._slice_time( da_state=da_state, idx=idx, n_steps=self.ar_steps, - da_forcing=da_forcing, + da_forcing_boundary=da_forcing, + num_future_steps=self.num_future_forcing_steps, + num_past_steps=self.num_past_forcing_steps, ) # load the data into memory @@ -521,9 +548,7 @@ def _build_item_dataarrays(self, idx): da_target_times = da_target_states.time if self.standardize: - da_init_states = ( - da_init_states - self.da_state_mean - ) / self.da_state_std + da_init_states = (da_init_states - self.da_state_mean) / self.da_state_std da_target_states = ( da_target_states - self.da_state_mean ) / self.da_state_std @@ -595,9 +620,7 @@ def __getitem__(self, idx): tensor_dtype = torch.float32 init_states = torch.tensor(da_init_states.values, dtype=tensor_dtype) - target_states = torch.tensor( - da_target_states.values, dtype=tensor_dtype - ) + target_states = torch.tensor(da_target_states.values, dtype=tensor_dtype) target_times = torch.tensor( da_target_times.astype("datetime64[ns]").astype("int64").values, @@ -708,10 +731,7 @@ def _is_listlike(obj): ) for grid_coord in ["x", "y"]: - if ( - grid_coord in da_datastore_state.coords - and grid_coord not in da.coords - ): + if grid_coord in da_datastore_state.coords and grid_coord not in da.coords: da.coords[grid_coord] = da_datastore_state[grid_coord] if not add_time_as_dim: @@ -732,6 +752,8 @@ def __init__( standardize=True, num_past_forcing_steps=1, num_future_forcing_steps=1, + num_past_boundary_steps=1, + num_future_boundary_steps=1, batch_size=4, num_workers=16, ): @@ -740,6 +762,8 @@ def __init__( self._datastore_boundary = datastore_boundary self.num_past_forcing_steps = num_past_forcing_steps self.num_future_forcing_steps = num_future_forcing_steps + self.num_past_boundary_steps = num_past_boundary_steps + self.num_future_boundary_steps = num_future_boundary_steps self.ar_steps_train = ar_steps_train self.ar_steps_eval = ar_steps_eval self.standardize = standardize @@ -766,6 +790,8 @@ def setup(self, stage=None): standardize=self.standardize, num_past_forcing_steps=self.num_past_forcing_steps, num_future_forcing_steps=self.num_future_forcing_steps, + num_past_boundary_steps=self.num_past_boundary_steps, + num_future_boundary_steps=self.num_future_boundary_steps, ) self.val_dataset = WeatherDataset( datastore=self._datastore, @@ -775,6 +801,8 @@ def setup(self, stage=None): standardize=self.standardize, num_past_forcing_steps=self.num_past_forcing_steps, num_future_forcing_steps=self.num_future_forcing_steps, + num_past_boundary_steps=self.num_past_boundary_steps, + num_future_boundary_steps=self.num_future_boundary_steps, ) if stage == "test" or stage is None: @@ -786,6 +814,8 @@ def setup(self, stage=None): standardize=self.standardize, num_past_forcing_steps=self.num_past_forcing_steps, num_future_forcing_steps=self.num_future_forcing_steps, + num_past_boundary_steps=self.num_past_boundary_steps, + num_future_boundary_steps=self.num_future_boundary_steps, ) def train_dataloader(self): From 355423c8412677823db63d34ad4b2649abcf1478 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:45:35 +0100 Subject: [PATCH 083/190] datastore_boundars=None introduced --- .../datastore/npyfilesmeps/compute_standardization_stats.py | 1 + 1 file changed, 1 insertion(+) diff --git a/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py b/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py index f2c80e8a..4207812f 100644 --- a/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py +++ b/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py @@ -172,6 +172,7 @@ def main( ar_steps = 63 ds = WeatherDataset( datastore=datastore, + datastore_boundary=None, split="train", ar_steps=ar_steps, standardize=False, From 121d460930fd24ae0ff90dd0d07279c75a15b1d5 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:46:02 +0100 Subject: [PATCH 084/190] bug fix for file retrieval per member --- neural_lam/datastore/npyfilesmeps/store.py | 51 +++++++++------------- 1 file changed, 20 insertions(+), 31 deletions(-) diff --git a/neural_lam/datastore/npyfilesmeps/store.py b/neural_lam/datastore/npyfilesmeps/store.py index 146b0627..7ee583be 100644 --- a/neural_lam/datastore/npyfilesmeps/store.py +++ b/neural_lam/datastore/npyfilesmeps/store.py @@ -244,9 +244,7 @@ def get_dataarray(self, category: str, split: str) -> DataArray: # them separately features = ["toa_downwelling_shortwave_flux", "open_water_fraction"] das = [ - self._get_single_timeseries_dataarray( - features=[feature], split=split - ) + self._get_single_timeseries_dataarray(features=[feature], split=split) for feature in features ] da = xr.concat(das, dim="feature") @@ -259,9 +257,9 @@ def get_dataarray(self, category: str, split: str) -> DataArray: # variable is turned into a dask array and so execution of the # calculation is delayed until the feature values are actually # used. - da_forecast_time = ( - da.analysis_time + da.elapsed_forecast_duration - ).chunk({"elapsed_forecast_duration": 1}) + da_forecast_time = (da.analysis_time + da.elapsed_forecast_duration).chunk( + {"elapsed_forecast_duration": 1} + ) da_datetime_forcing_features = self._calc_datetime_forcing_features( da_time=da_forecast_time ) @@ -339,10 +337,7 @@ def _get_single_timeseries_dataarray( for all categories of data """ - if ( - set(features).difference(self.get_vars_names(category="static")) - == set() - ): + if set(features).difference(self.get_vars_names(category="static")) == set(): assert split in ( "train", "val", @@ -356,12 +351,8 @@ def _get_single_timeseries_dataarray( "test", ), f"Unknown dataset split {split} for features {features}" - if member is not None and features != self.get_vars_names( - category="state" - ): - raise ValueError( - "Member can only be specified for the 'state' category" - ) + if member is not None and features != self.get_vars_names(category="state"): + raise ValueError("Member can only be specified for the 'state' category") concat_axis = 0 @@ -377,9 +368,7 @@ def _get_single_timeseries_dataarray( fp_samples = self.root_path / "samples" / split if self._remove_state_features_with_index: n_to_drop = len(self._remove_state_features_with_index) - feature_dim_mask = np.ones( - len(features) + n_to_drop, dtype=bool - ) + feature_dim_mask = np.ones(len(features) + n_to_drop, dtype=bool) feature_dim_mask[self._remove_state_features_with_index] = False elif features == ["toa_downwelling_shortwave_flux"]: filename_format = TOA_SW_DOWN_FLUX_FILENAME_FORMAT @@ -445,7 +434,7 @@ def _get_single_timeseries_dataarray( * np.timedelta64(1, "h") ) elif d == "analysis_time": - coord_values = self._get_analysis_times(split=split) + coord_values = self._get_analysis_times(split=split, member_id=member) elif d == "y": coord_values = y elif d == "x": @@ -464,9 +453,7 @@ def _get_single_timeseries_dataarray( if features_vary_with_analysis_time: filepaths = [ fp_samples - / filename_format.format( - analysis_time=analysis_time, **file_params - ) + / filename_format.format(analysis_time=analysis_time, **file_params) for analysis_time in coords["analysis_time"] ] else: @@ -505,7 +492,7 @@ def _get_single_timeseries_dataarray( return da - def _get_analysis_times(self, split) -> List[np.datetime64]: + def _get_analysis_times(self, split, member_id) -> List[np.datetime64]: """Get the analysis times for the given split by parsing the filenames of all the files found for the given split. @@ -513,6 +500,8 @@ def _get_analysis_times(self, split) -> List[np.datetime64]: ---------- split : str The dataset split to get the analysis times for. + member_id : int + The ensemble member to get the analysis times for. Returns ------- @@ -520,8 +509,12 @@ def _get_analysis_times(self, split) -> List[np.datetime64]: The analysis times for the given split. """ + if member_id is None: + # Only interior state data files have member_id, to avoid duplicates + # we only look at the first member for all other categories + member_id = 0 pattern = re.sub(r"{analysis_time:[^}]*}", "*", STATE_FILENAME_FORMAT) - pattern = re.sub(r"{member_id:[^}]*}", "*", pattern) + pattern = re.sub(r"{member_id:[^}]*}", f"{member_id:03d}", pattern) sample_dir = self.root_path / "samples" / split sample_files = sample_dir.glob(pattern) @@ -531,9 +524,7 @@ def _get_analysis_times(self, split) -> List[np.datetime64]: times.append(name_parts["analysis_time"]) if len(times) == 0: - raise ValueError( - f"No files found in {sample_dir} with pattern {pattern}" - ) + raise ValueError(f"No files found in {sample_dir} with pattern {pattern}") return times @@ -690,9 +681,7 @@ def get_standardization_dataarray(self, category: str) -> xr.Dataset: """ def load_pickled_tensor(fn): - return torch.load( - self.root_path / "static" / fn, weights_only=True - ).numpy() + return torch.load(self.root_path / "static" / fn, weights_only=True).numpy() mean_diff_values = None std_diff_values = None From 7e82eef5d797c76a7667271603e5ea94a3485ac2 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:46:17 +0100 Subject: [PATCH 085/190] rename datastore for tests --- tests/conftest.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index be5cf3e7..90a86d0d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -94,14 +94,14 @@ def download_meps_example_reduced_dataset(): dummydata=None, ) -DATASTORES_BOUNDARY_EXAMPLES = dict( - mdp=( +DATASTORES_BOUNDARY_EXAMPLES = { + "mdp": ( DATASTORE_EXAMPLES_ROOT_PATH / "mdp" - / "era5_1000hPa_winds" + / "era5_1000hPa_danra_100m_winds" / "era5.datastore.yaml" - ) -) + ), +} DATASTORES[DummyDatastore.SHORT_NAME] = DummyDatastore From 320d7c4826e4055fef0edfa748c3e7b6704c589a Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:46:31 +0100 Subject: [PATCH 086/190] aligned time with danra for easier boundary testing --- tests/dummy_datastore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/dummy_datastore.py b/tests/dummy_datastore.py index d62c7356..a958b8f5 100644 --- a/tests/dummy_datastore.py +++ b/tests/dummy_datastore.py @@ -28,7 +28,7 @@ class DummyDatastore(BaseRegularGridDatastore): """ SHORT_NAME = "dummydata" - T0 = isodate.parse_datetime("2021-01-01T00:00:00") + T0 = isodate.parse_datetime("1990-09-02T00:00:00") N_FEATURES = dict(state=5, forcing=2, static=1) CARTESIAN_COORDS = ["x", "y"] From f18dcc2340434ce96f709ba987af482d063de4e5 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Sat, 30 Nov 2024 20:46:50 +0100 Subject: [PATCH 087/190] Fixed test for temporal embedding --- tests/test_time_slicing.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/tests/test_time_slicing.py b/tests/test_time_slicing.py index 29161505..2f5ed96c 100644 --- a/tests/test_time_slicing.py +++ b/tests/test_time_slicing.py @@ -40,9 +40,7 @@ def get_dataarray(self, category, split): if self.is_forecast: raise NotImplementedError() else: - da = xr.DataArray( - values, dims=["time"], coords={"time": self._time_values} - ) + da = xr.DataArray(values, dims=["time"], coords={"time": self._time_values}) # add `{category}_feature` and `grid_index` dimensions da = da.expand_dims("grid_index") @@ -78,10 +76,8 @@ def get_vars_long_names(self, category): def test_time_slicing_analysis( ar_steps, num_past_forcing_steps, num_future_forcing_steps ): - # state and forcing variables have only on dimension, `time` - time_values = np.datetime64("2020-01-01") + np.arange( - len(ANALYSIS_STATE_VALUES) - ) + # state and forcing variables have only one dimension, `time` + time_values = np.datetime64("2020-01-01") + np.arange(len(ANALYSIS_STATE_VALUES)) assert len(ANALYSIS_STATE_VALUES) == len(FORCING_VALUES) == len(time_values) datastore = SinglePointDummyDatastore( @@ -93,6 +89,7 @@ def test_time_slicing_analysis( dataset = WeatherDataset( datastore=datastore, + datastore_boundary=None, ar_steps=ar_steps, num_future_forcing_steps=num_future_forcing_steps, num_past_forcing_steps=num_past_forcing_steps, @@ -101,9 +98,7 @@ def test_time_slicing_analysis( sample = dataset[0] - init_states, target_states, forcing, _ = [ - tensor.numpy() for tensor in sample - ] + init_states, target_states, forcing, _, _ = [tensor.numpy() for tensor in sample] expected_init_states = [0, 1] if ar_steps == 3: @@ -130,7 +125,7 @@ def test_time_slicing_analysis( # init_states: (2, N_grid, d_features) # target_states: (ar_steps, N_grid, d_features) - # forcing: (ar_steps, N_grid, d_windowed_forcing) + # forcing: (ar_steps, N_grid, d_windowed_forcing * 2) # target_times: (ar_steps,) assert init_states.shape == (2, 1, 1) assert init_states[:, 0, 0].tolist() == expected_init_states @@ -141,6 +136,10 @@ def test_time_slicing_analysis( assert forcing.shape == ( 3, 1, - 1 + num_past_forcing_steps + num_future_forcing_steps, + # Factor 2 because each window step has a temporal embedding + (1 + num_past_forcing_steps + num_future_forcing_steps) * 2, + ) + np.testing.assert_equal( + forcing[:, 0, : num_past_forcing_steps + num_future_forcing_steps + 1], + np.array(expected_forcing_values), ) - np.testing.assert_equal(forcing[:, 0, :], np.array(expected_forcing_values)) From e6327d88373bb2708733f6331aebe407facc1f67 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 2 Dec 2024 10:40:48 +0100 Subject: [PATCH 088/190] allow boundary as input to ar_model.common_step --- neural_lam/models/ar_model.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 710efcec..331966e4 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -110,7 +110,9 @@ def __init__( self.grid_dim = ( 2 * self.grid_output_dim + grid_static_dim - + num_forcing_vars + # Factor 2 because of temporal embedding or windowed features + + 2 + * num_forcing_vars * (num_past_forcing_steps + num_future_forcing_steps + 1) ) @@ -241,19 +243,20 @@ def unroll_prediction(self, init_states, forcing_features, true_states): def common_step(self, batch): """ - Predict on single batch batch consists of: init_states: (B, 2, - num_grid_nodes, d_features) target_states: (B, pred_steps, - num_grid_nodes, d_features) forcing_features: (B, pred_steps, - num_grid_nodes, d_forcing), - where index 0 corresponds to index 1 of init_states + Predict on single batch batch consists of: + init_states: (B, 2,num_grid_nodes, d_features) + target_states: (B, pred_steps,num_grid_nodes, d_features) + forcing_features: (B, pred_steps,num_grid_nodes, d_forcing) + boundary_features: (B, pred_steps,num_grid_nodes, d_boundaries) + batch_times: (B, pred_steps) """ - (init_states, target_states, forcing_features, batch_times) = batch + (init_states, target_states, forcing_features, _, batch_times) = batch prediction, pred_std = self.unroll_prediction( init_states, forcing_features, target_states - ) # (B, pred_steps, num_grid_nodes, d_f) - # prediction: (B, pred_steps, num_grid_nodes, d_f) pred_std: (B, - # pred_steps, num_grid_nodes, d_f) or (d_f,) + ) + # prediction: (B, pred_steps, num_grid_nodes, d_f) + # pred_std: (B, pred_steps, num_grid_nodes, d_f) or (d_f,) return prediction, target_states, pred_std, batch_times From 1374a1976f002ffba86c7c203c6fbb2bea83fb0e Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 2 Dec 2024 10:40:56 +0100 Subject: [PATCH 089/190] linting --- neural_lam/datastore/npyfilesmeps/store.py | 43 ++++++++---- neural_lam/weather_dataset.py | 66 ++++++++++++------- .../era5.datastore.yaml | 2 +- tests/test_time_slicing.py | 12 +++- tests/test_training.py | 17 ++--- 5 files changed, 91 insertions(+), 49 deletions(-) diff --git a/neural_lam/datastore/npyfilesmeps/store.py b/neural_lam/datastore/npyfilesmeps/store.py index 7ee583be..24349e7e 100644 --- a/neural_lam/datastore/npyfilesmeps/store.py +++ b/neural_lam/datastore/npyfilesmeps/store.py @@ -244,7 +244,9 @@ def get_dataarray(self, category: str, split: str) -> DataArray: # them separately features = ["toa_downwelling_shortwave_flux", "open_water_fraction"] das = [ - self._get_single_timeseries_dataarray(features=[feature], split=split) + self._get_single_timeseries_dataarray( + features=[feature], split=split + ) for feature in features ] da = xr.concat(das, dim="feature") @@ -257,9 +259,9 @@ def get_dataarray(self, category: str, split: str) -> DataArray: # variable is turned into a dask array and so execution of the # calculation is delayed until the feature values are actually # used. - da_forecast_time = (da.analysis_time + da.elapsed_forecast_duration).chunk( - {"elapsed_forecast_duration": 1} - ) + da_forecast_time = ( + da.analysis_time + da.elapsed_forecast_duration + ).chunk({"elapsed_forecast_duration": 1}) da_datetime_forcing_features = self._calc_datetime_forcing_features( da_time=da_forecast_time ) @@ -337,7 +339,10 @@ def _get_single_timeseries_dataarray( for all categories of data """ - if set(features).difference(self.get_vars_names(category="static")) == set(): + if ( + set(features).difference(self.get_vars_names(category="static")) + == set() + ): assert split in ( "train", "val", @@ -351,8 +356,12 @@ def _get_single_timeseries_dataarray( "test", ), f"Unknown dataset split {split} for features {features}" - if member is not None and features != self.get_vars_names(category="state"): - raise ValueError("Member can only be specified for the 'state' category") + if member is not None and features != self.get_vars_names( + category="state" + ): + raise ValueError( + "Member can only be specified for the 'state' category" + ) concat_axis = 0 @@ -368,7 +377,9 @@ def _get_single_timeseries_dataarray( fp_samples = self.root_path / "samples" / split if self._remove_state_features_with_index: n_to_drop = len(self._remove_state_features_with_index) - feature_dim_mask = np.ones(len(features) + n_to_drop, dtype=bool) + feature_dim_mask = np.ones( + len(features) + n_to_drop, dtype=bool + ) feature_dim_mask[self._remove_state_features_with_index] = False elif features == ["toa_downwelling_shortwave_flux"]: filename_format = TOA_SW_DOWN_FLUX_FILENAME_FORMAT @@ -434,7 +445,9 @@ def _get_single_timeseries_dataarray( * np.timedelta64(1, "h") ) elif d == "analysis_time": - coord_values = self._get_analysis_times(split=split, member_id=member) + coord_values = self._get_analysis_times( + split=split, member_id=member + ) elif d == "y": coord_values = y elif d == "x": @@ -453,7 +466,9 @@ def _get_single_timeseries_dataarray( if features_vary_with_analysis_time: filepaths = [ fp_samples - / filename_format.format(analysis_time=analysis_time, **file_params) + / filename_format.format( + analysis_time=analysis_time, **file_params + ) for analysis_time in coords["analysis_time"] ] else: @@ -524,7 +539,9 @@ def _get_analysis_times(self, split, member_id) -> List[np.datetime64]: times.append(name_parts["analysis_time"]) if len(times) == 0: - raise ValueError(f"No files found in {sample_dir} with pattern {pattern}") + raise ValueError( + f"No files found in {sample_dir} with pattern {pattern}" + ) return times @@ -681,7 +698,9 @@ def get_standardization_dataarray(self, category: str) -> xr.Dataset: """ def load_pickled_tensor(fn): - return torch.load(self.root_path / "static" / fn, weights_only=True).numpy() + return torch.load( + self.root_path / "static" / fn, weights_only=True + ).numpy() mean_diff_values = None std_diff_values = None diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 7dbe0567..60f8d316 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -41,13 +41,13 @@ class WeatherDataset(torch.utils.data.Dataset): num_past_boundary_steps: int, optional Number of past time steps to include in boundary input. If set to i, boundary from times t-i, t-i+1, ..., t-1, t (and potentially beyond, - given num_future_forcing_steps) are included as boundary inputs at time t - Default is 1. + given num_future_forcing_steps) are included as boundary inputs at time + t Default is 1. num_future_boundary_steps: int, optional Number of future time steps to include in boundary input. If set to j, - boundary from times t, t+1, ..., t+j-1, t+j (and potentially times before - t, given num_past_forcing_steps) are included as boundary inputs at time - t. Default is 1. + boundary from times t, t+1, ..., t+j-1, t+j (and potentially times + before t, given num_past_forcing_steps) are included as boundary inputs + at time t. Default is 1. standardize : bool, optional Whether to standardize the data. Default is True. """ @@ -75,7 +75,9 @@ def __init__( self.num_past_boundary_steps = num_past_boundary_steps self.num_future_boundary_steps = num_future_boundary_steps - self.da_state = self.datastore.get_dataarray(category="state", split=self.split) + self.da_state = self.datastore.get_dataarray( + category="state", split=self.split + ) if self.da_state is None: raise ValueError( "A non-empty state dataarray must be provided. " @@ -112,7 +114,9 @@ def __init__( parts["forcing"] = self.da_forcing for part, da in parts.items(): - expected_dim_order = self.datastore.expected_dim_order(category=part) + expected_dim_order = self.datastore.expected_dim_order( + category=part + ) if da.dims != expected_dim_order: raise ValueError( f"The dimension order of the `{part}` data ({da.dims}) " @@ -188,10 +192,12 @@ def get_time_step(times): # Calculate required bounds for boundary using its time step boundary_required_time_min = ( - state_time_min - self.num_past_forcing_steps * boundary_time_step + state_time_min + - self.num_past_forcing_steps * boundary_time_step ) boundary_required_time_max = ( - state_time_max + self.num_future_forcing_steps * boundary_time_step + state_time_max + + self.num_future_forcing_steps * boundary_time_step ) if boundary_time_min > boundary_required_time_min: @@ -220,8 +226,10 @@ def get_time_step(times): self.da_state_std = self.ds_state_stats.state_std if self.da_forcing is not None: - self.ds_forcing_stats = self.datastore.get_standardization_dataarray( - category="forcing" + self.ds_forcing_stats = ( + self.datastore.get_standardization_dataarray( + category="forcing" + ) ) self.da_forcing_mean = self.ds_forcing_stats.forcing_mean self.da_forcing_std = self.ds_forcing_stats.forcing_std @@ -378,7 +386,9 @@ def _slice_time( current_time = ( da_forcing_boundary.analysis_time[idx] - + da_forcing_boundary.elapsed_forecast_duration[offset + step] + + da_forcing_boundary.elapsed_forecast_duration[ + offset + step + ] ) da_sliced = da_forcing_boundary.isel( @@ -386,12 +396,16 @@ def _slice_time( elapsed_forecast_duration=slice(start_idx, end_idx + 1), ) - da_sliced = da_sliced.rename({"elapsed_forecast_duration": "window"}) + da_sliced = da_sliced.rename( + {"elapsed_forecast_duration": "window"} + ) da_sliced = da_sliced.assign_coords( window=np.arange(-num_past_steps, num_future_steps + 1) ) - da_sliced = da_sliced.expand_dims(dim={"time": [current_time.values]}) + da_sliced = da_sliced.expand_dims( + dim={"time": [current_time.values]} + ) da_list.append(da_sliced) @@ -401,13 +415,13 @@ def _slice_time( da_forcing_boundary_matched.time.values[1] - da_forcing_boundary_matched.time.values[0] ) - da_forcing_boundary_matched["window"] = da_forcing_boundary_matched["window"] * ( - forcing_time_step / state_time_step - ) + da_forcing_boundary_matched["window"] = da_forcing_boundary_matched[ + "window" + ] * (forcing_time_step / state_time_step) time_diff_steps = da_forcing_boundary_matched.isel( grid_index=0, forcing_feature=0 ).data - + else: # For analysis data, match directly using the 'time' coordinate forcing_times = da_forcing_boundary["time"] @@ -416,7 +430,8 @@ def _slice_time( # (in multiples of state time steps) # Retrieve the indices of the closest times in the forcing data time_deltas = ( - forcing_times.values[:, np.newaxis] - state_times.values[np.newaxis, :] + forcing_times.values[:, np.newaxis] + - state_times.values[np.newaxis, :] ) / state_time_step idx_min = np.abs(time_deltas).argmin(axis=0) @@ -548,7 +563,9 @@ def _build_item_dataarrays(self, idx): da_target_times = da_target_states.time if self.standardize: - da_init_states = (da_init_states - self.da_state_mean) / self.da_state_std + da_init_states = ( + da_init_states - self.da_state_mean + ) / self.da_state_std da_target_states = ( da_target_states - self.da_state_mean ) / self.da_state_std @@ -620,7 +637,9 @@ def __getitem__(self, idx): tensor_dtype = torch.float32 init_states = torch.tensor(da_init_states.values, dtype=tensor_dtype) - target_states = torch.tensor(da_target_states.values, dtype=tensor_dtype) + target_states = torch.tensor( + da_target_states.values, dtype=tensor_dtype + ) target_times = torch.tensor( da_target_times.astype("datetime64[ns]").astype("int64").values, @@ -731,7 +750,10 @@ def _is_listlike(obj): ) for grid_coord in ["x", "y"]: - if grid_coord in da_datastore_state.coords and grid_coord not in da.coords: + if ( + grid_coord in da_datastore_state.coords + and grid_coord not in da.coords + ): da.coords[grid_coord] = da_datastore_state[grid_coord] if not add_time_as_dim: diff --git a/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml index c97da4bc..7c5ffb3b 100644 --- a/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml +++ b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml @@ -25,7 +25,7 @@ output: end: 2022-09-30T00:00 test: start: 1990-09-01T00:00 - end: 2022-09-30T00:00 + end: 2022-09-30T00:00 inputs: era_height_levels: diff --git a/tests/test_time_slicing.py b/tests/test_time_slicing.py index 2f5ed96c..4a59c81e 100644 --- a/tests/test_time_slicing.py +++ b/tests/test_time_slicing.py @@ -40,7 +40,9 @@ def get_dataarray(self, category, split): if self.is_forecast: raise NotImplementedError() else: - da = xr.DataArray(values, dims=["time"], coords={"time": self._time_values}) + da = xr.DataArray( + values, dims=["time"], coords={"time": self._time_values} + ) # add `{category}_feature` and `grid_index` dimensions da = da.expand_dims("grid_index") @@ -77,7 +79,9 @@ def test_time_slicing_analysis( ar_steps, num_past_forcing_steps, num_future_forcing_steps ): # state and forcing variables have only one dimension, `time` - time_values = np.datetime64("2020-01-01") + np.arange(len(ANALYSIS_STATE_VALUES)) + time_values = np.datetime64("2020-01-01") + np.arange( + len(ANALYSIS_STATE_VALUES) + ) assert len(ANALYSIS_STATE_VALUES) == len(FORCING_VALUES) == len(time_values) datastore = SinglePointDummyDatastore( @@ -98,7 +102,9 @@ def test_time_slicing_analysis( sample = dataset[0] - init_states, target_states, forcing, _, _ = [tensor.numpy() for tensor in sample] + init_states, target_states, forcing, _, _ = [ + tensor.numpy() for tensor in sample + ] expected_init_states = [0, 1] if ar_steps == 3: diff --git a/tests/test_training.py b/tests/test_training.py index 28566a4b..7a1b4717 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -5,6 +5,7 @@ import pytest import pytorch_lightning as pl import torch + import wandb # First-party @@ -22,14 +23,10 @@ @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -@pytest.mark.parametrize( - "datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys() -) +@pytest.mark.parametrize("datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys()) def test_training(datastore_name, datastore_boundary_name): datastore = init_datastore_example(datastore_name) - datastore_boundary = init_datastore_boundary_example( - datastore_boundary_name - ) + datastore_boundary = init_datastore_boundary_example(datastore_boundary_name) if not isinstance(datastore, BaseRegularGridDatastore): pytest.skip( @@ -38,15 +35,13 @@ def test_training(datastore_name, datastore_boundary_name): ) if not isinstance(datastore_boundary, BaseRegularGridDatastore): pytest.skip( - f"Skipping test for {datastore_boundary_name} as it is not a regular " - "grid datastore." + f"Skipping test for {datastore_boundary_name} as it is not a " + "regular grid datastore." ) if torch.cuda.is_available(): device_name = "cuda" - torch.set_float32_matmul_precision( - "high" - ) # Allows using Tensor Cores on A100s + torch.set_float32_matmul_precision("high") # Allows using Tensor Cores on A100s else: device_name = "cpu" From 779f3e9ed31d9525851793fae409cc145a30e15a Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 2 Dec 2024 12:10:29 +0100 Subject: [PATCH 090/190] improved docstrings and added some assertions --- neural_lam/weather_dataset.py | 105 ++++++++++++++++++++++++++-------- 1 file changed, 82 insertions(+), 23 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 60f8d316..c65ec468 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -143,7 +143,13 @@ def __init__( self.da_state = self.da_state def get_time_step(times): - """Calculate the time step from the data""" + """Calculate the time step from the data + + Parameters + ---------- + times : xr.DataArray + The time dataarray to calculate the time step from. + """ time_diffs = np.diff(times) if not np.all(time_diffs == time_diffs[0]): raise ValueError( @@ -234,6 +240,7 @@ def get_time_step(times): self.da_forcing_mean = self.ds_forcing_stats.forcing_mean self.da_forcing_std = self.ds_forcing_stats.forcing_std + # XXX: Again, the boundary data is considered forcing data for now if self.da_boundary is not None: self.ds_boundary_stats = ( self.datastore_boundary.get_standardization_dataarray( @@ -305,7 +312,7 @@ def _slice_time( is performed based on the state times. Additionally, the time difference between the matched forcing/boundary times and state times (in multiples of state time steps) is added to the forcing dataarray. This will be - used as an additional feature in the model (temporal embedding). + used as an additional input feature in the model (temporal embedding). Parameters ---------- @@ -333,23 +340,26 @@ def _slice_time( da_forcing_boundary_matched : xr.DataArray The sliced state dataarray with dims ('time', 'grid_index', 'forcing/boundary_feature_windowed'). + If no forcing/boundary data is provided, this will be `None`. """ - # Number of initial steps required (e.g., for initializing models) + # The current implementation requires at least 2 time steps for the + # initial state (see GraphCast). init_steps = 2 - - # Slice the state data as before + # slice the dataarray to include the required number of time steps if self.datastore.is_forecast: - # Calculate start and end indices for slicing - start_idx = max(0, num_past_steps - init_steps) - end_idx = max(init_steps, num_past_steps) + n_steps - - # Slice the state data over the elapsed forecast duration + start_idx = max(0, self.num_past_forcing_steps - init_steps) + end_idx = max(init_steps, self.num_past_forcing_steps) + n_steps + # this implies that the data will have both `analysis_time` and + # `elapsed_forecast_duration` dimensions for forecasts. We for now + # simply select a analysis time and the first `n_steps` forecast + # times (given no offset). Note that this means that we get one + # sample per forecast, always starting at forecast time 2. da_state_sliced = da_state.isel( analysis_time=idx, elapsed_forecast_duration=slice(start_idx, end_idx), ) - - # Create a new 'time' dimension + # create a new time dimension so that the produced sample has a + # `time` dimension, similarly to the analysis only data da_state_sliced["time"] = ( da_state_sliced.analysis_time + da_state_sliced.elapsed_forecast_duration @@ -357,9 +367,13 @@ def _slice_time( da_state_sliced = da_state_sliced.swap_dims( {"elapsed_forecast_duration": "time"} ) + # Asserting that the forecast time step is consistent + self.get_time_step(da_state_sliced.time) else: - # For analysis data, slice the time dimension directly + # For analysis data we slice the time dimension directly. The offset + # is only relevant for the very first (and last) samples in the + # dataset. start_idx = idx + max(0, num_past_steps - init_steps) end_idx = idx + max(init_steps, num_past_steps) + n_steps da_state_sliced = da_state.isel(time=slice(start_idx, end_idx)) @@ -372,7 +386,13 @@ def _slice_time( state_times = da_state_sliced["time"] state_time_step = state_times.values[1] - state_times.values[0] + # Here we cannot check 'self.datastore.is_forecast' directly because we + # might be dealing with a datastore_boundary if "analysis_time" in da_forcing_boundary.dims: + # Select the closest analysis time in the forcing/boundary data + # This is mostly relevant for boundary data where the time steps + # are not necessarily the same as the state data. But still fast + # enough for forcing data where the time steps are the same. idx = np.abs( da_forcing_boundary.analysis_time.values - self.da_state.analysis_time.values[idx] @@ -399,6 +419,8 @@ def _slice_time( da_sliced = da_sliced.rename( {"elapsed_forecast_duration": "window"} ) + + # Assign the 'window' coordinate to be relative positions da_sliced = da_sliced.assign_coords( window=np.arange(-num_past_steps, num_future_steps + 1) ) @@ -409,7 +431,10 @@ def _slice_time( da_list.append(da_sliced) - # Concatenate the list of DataArrays along the 'time' dimension + # Generate temporal embedding `time_diff_steps` for the + # forcing/boundary data. This is the time difference in multiples + # of state time steps between the forcing/boundary time and the + # state time. da_forcing_boundary_matched = xr.concat(da_list, dim="time") forcing_time_step = ( da_forcing_boundary_matched.time.values[1] @@ -423,7 +448,9 @@ def _slice_time( ).data else: - # For analysis data, match directly using the 'time' coordinate + # For analysis data, we slice the time dimension directly. The + # offset is only relevant for the very first (and last) samples in + # the dataset. forcing_times = da_forcing_boundary["time"] # Compute time differences between forcing and state times @@ -455,7 +482,7 @@ def _slice_time( ) # Add time difference as a new coordinate to concatenate to the - # forcing features later + # forcing features later as temporal embedding da_forcing_boundary_matched["time_diff_steps"] = ( ("time", "window"), time_diff_steps, @@ -464,7 +491,26 @@ def _slice_time( return da_state_sliced, da_forcing_boundary_matched def _process_windowed_data(self, da_windowed, da_state, da_target_times): - """Helper function to process windowed data after standardization.""" + """Helper function to process windowed data. This function stacks the + 'forcing_feature' and 'window' dimensions and adds the time step + differences to the existing features as a temporal embedding. + + Parameters + ---------- + da_windowed : xr.DataArray + The windowed data to process. Can be `None` if no data is provided. + da_state : xr.DataArray + The state dataarray. + da_target_times : xr.DataArray + The target times. + + Returns + ------- + da_windowed : xr.DataArray + The processed windowed data. If `da_windowed` is `None`, an empty + DataArray with the correct dimensions and coordinates is returned. + + """ stacked_dim = "forcing_feature_windowed" if da_windowed is not None: # Stack the 'feature' and 'window' dimensions and add the @@ -492,8 +538,8 @@ def _process_windowed_data(self, da_windowed, da_state, da_target_times): def _build_item_dataarrays(self, idx): """ - Create the dataarrays for the initial states, target states and forcing - data for the sample at index `idx`. + Create the dataarrays for the initial states, target states, forcing + and boundary data for the sample at index `idx`. Parameters ---------- @@ -529,7 +575,7 @@ def _build_item_dataarrays(self, idx): else: da_boundary = None - # if da_forcing is None, the function will return None for + # if da_forcing_boundary is None, the function will return None for # da_forcing_windowed if da_boundary is not None: _, da_boundary_windowed = self._slice_time( @@ -542,6 +588,9 @@ def _build_item_dataarrays(self, idx): ) else: da_boundary_windowed = None + # XXX: Currently, the order of the `slice_time` calls is important + # as `da_state` is modified in the second call. This should be + # refactored to be more robust. da_state, da_forcing_windowed = self._slice_time( da_state=da_state, idx=idx, @@ -584,6 +633,10 @@ def _build_item_dataarrays(self, idx): da_boundary_windowed - self.da_boundary_mean ) / self.da_boundary_std + # This function handles the stacking of the forcing and boundary data + # and adds the time step differences as a temporal embedding. + # It can handle `None` inputs for the forcing and boundary data + # (and simlpy return an empty DataArray in that case). da_forcing_windowed = self._process_windowed_data( da_forcing_windowed, da_state, da_target_times ) @@ -655,6 +708,11 @@ def __getitem__(self, idx): # boundary: (ar_steps, N_grid, d_windowed_boundary) # target_times: (ar_steps,) + # Assert that the boundary data is an empty tensor if the corresponding + # datastore_boundary is `None` + if self.datastore_boundary is None: + assert boundary.numel() == 0 + return init_states, target_states, forcing, boundary, target_times def __iter__(self): @@ -795,9 +853,10 @@ def __init__( self.val_dataset = None self.test_dataset = None if num_workers > 0: - # BUG: There also seem to be issues with "spawn", to be investigated - # default to spawn for now, as the default on linux "fork" hangs - # when using dask (which the npyfilesmeps datastore uses) + # BUG: There also seem to be issues with "spawn" and `gloo`, to be + # investigated. Defaults to spawn for now, as the default on linux + # "fork" hangs when using dask (which the npyfilesmeps datastore + # uses) self.multiprocessing_context = "spawn" else: self.multiprocessing_context = None From f126ec27b6c7d8534893850f07427e3737418216 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 2 Dec 2024 12:11:32 +0100 Subject: [PATCH 091/190] remove boundary datastore from tests that don't need it --- tests/test_datasets.py | 17 ++--------------- tests/test_training.py | 13 +++++++++---- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 5fbe4a5d..063ec147 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -108,37 +108,24 @@ def test_dataset_item_shapes(datastore_name, datastore_boundary_name): # try to get the last item of the dataset to ensure slicing and stacking # operations are working as expected and are consistent with the dataset # length - dataset[len(dataset) - 1] @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -@pytest.mark.parametrize( - "datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys() -) -def test_dataset_item_create_dataarray_from_tensor( - datastore_name, datastore_boundary_name -): +def test_dataset_item_create_dataarray_from_tensor(datastore_name): datastore = init_datastore_example(datastore_name) - datastore_boundary = init_datastore_boundary_example( - datastore_boundary_name - ) N_pred_steps = 4 num_past_forcing_steps = 1 num_future_forcing_steps = 1 - num_past_boundary_steps = 1 - num_future_boundary_steps = 1 dataset = WeatherDataset( datastore=datastore, - datastore_boundary=datastore_boundary, + datastore_boundary=None, split="train", ar_steps=N_pred_steps, num_past_forcing_steps=num_past_forcing_steps, num_future_forcing_steps=num_future_forcing_steps, - num_past_boundary_steps=num_past_boundary_steps, - num_future_boundary_steps=num_future_boundary_steps, ) idx = 0 diff --git a/tests/test_training.py b/tests/test_training.py index 7a1b4717..ca0ebf41 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -5,7 +5,6 @@ import pytest import pytorch_lightning as pl import torch - import wandb # First-party @@ -23,10 +22,14 @@ @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -@pytest.mark.parametrize("datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys()) +@pytest.mark.parametrize( + "datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys() +) def test_training(datastore_name, datastore_boundary_name): datastore = init_datastore_example(datastore_name) - datastore_boundary = init_datastore_boundary_example(datastore_boundary_name) + datastore_boundary = init_datastore_boundary_example( + datastore_boundary_name + ) if not isinstance(datastore, BaseRegularGridDatastore): pytest.skip( @@ -41,7 +44,9 @@ def test_training(datastore_name, datastore_boundary_name): if torch.cuda.is_available(): device_name = "cuda" - torch.set_float32_matmul_precision("high") # Allows using Tensor Cores on A100s + torch.set_float32_matmul_precision( + "high" + ) # Allows using Tensor Cores on A100s else: device_name = "cpu" From 4b656da04526d3d38d71881deab18ee69519b29d Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 2 Dec 2024 12:43:01 +0100 Subject: [PATCH 092/190] fix scope of _get_time_step --- neural_lam/weather_dataset.py | 40 ++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index c65ec468..3685e227 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -142,28 +142,14 @@ def __init__( else: self.da_state = self.da_state - def get_time_step(times): - """Calculate the time step from the data - - Parameters - ---------- - times : xr.DataArray - The time dataarray to calculate the time step from. - """ - time_diffs = np.diff(times) - if not np.all(time_diffs == time_diffs[0]): - raise ValueError( - "Inconsistent time steps in data. " - f"Found different time steps: {np.unique(time_diffs)}" - ) - return time_diffs[0] + # Check time step consistency in state data if self.datastore.is_forecast: state_times = self.da_state.analysis_time else: state_times = self.da_state.time - _ = get_time_step(state_times) + _ = self._get_time_step(state_times) # Check time coverage for forcing and boundary data if self.da_forcing is not None or self.da_boundary is not None: @@ -182,7 +168,7 @@ def get_time_step(times): forcing_times = self.da_forcing.analysis_time else: forcing_times = self.da_forcing.time - get_time_step(forcing_times.values) + self._get_time_step(forcing_times.values) if self.da_boundary is not None: # Boundary data is part of a separate datastore @@ -192,7 +178,7 @@ def get_time_step(times): boundary_times = self.da_boundary.analysis_time else: boundary_times = self.da_boundary.time - boundary_time_step = get_time_step(boundary_times.values) + boundary_time_step = self._get_time_step(boundary_times.values) boundary_time_min = boundary_times.min().values boundary_time_max = boundary_times.max().values @@ -296,6 +282,22 @@ def __len__(self): - self.num_future_forcing_steps ) + def _get_time_step(self, times): + """Calculate the time step from the data + + Parameters + ---------- + times : xr.DataArray + The time dataarray to calculate the time step from. + """ + time_diffs = np.diff(times) + if not np.all(time_diffs == time_diffs[0]): + raise ValueError( + "Inconsistent time steps in data. " + f"Found different time steps: {np.unique(time_diffs)}" + ) + return time_diffs[0] + def _slice_time( self, da_state, @@ -368,7 +370,7 @@ def _slice_time( {"elapsed_forecast_duration": "time"} ) # Asserting that the forecast time step is consistent - self.get_time_step(da_state_sliced.time) + self._get_time_step(da_state_sliced.time) else: # For analysis data we slice the time dimension directly. The offset From 75db4b8a5ac0769dab7be8837e707b734c62ff92 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 2 Dec 2024 16:58:46 +0100 Subject: [PATCH 093/190] added information about optional boundary datastore --- README.md | 22 +++++++++++++--------- neural_lam/weather_dataset.py | 2 -- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index e21b7c24..7a5e5caf 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,9 @@ Once `neural-lam` is installed you will be able to train/evaluate models. For th interface that provides the data in a data-structure that can be used within neural-lam. A datastore is used to create a `pytorch.Dataset`-derived class that samples the data in time to create individual samples for - training, validation and testing. + training, validation and testing. A secondary datastore can be provided + for the boundary data. Currently, boundary datastore must be of type `mdp` + and only contain forcing features. This can easily be expanded in the future. 2. **The graph structure** is used to define message-passing GNN layers, that are trained to emulate fluid flow in the atmosphere over time. The @@ -121,7 +123,7 @@ different aspects about the training and evaluation of the model. The path you provide to the neural-lam config (`config.yaml`) also sets the root directory relative to which all other paths are resolved, as in the parent -directory of the config becomes the root directory. Both the datastore and +directory of the config becomes the root directory. Both the datastores and graphs you generate are then stored in subdirectories of this root directory. Exactly how and where a specific datastore expects its source data to be stored and where it stores its derived data is up to the implementation of the @@ -134,6 +136,7 @@ assume you placed `config.yaml` in a folder called `data`): data/ ├── config.yaml - Configuration file for neural-lam ├── danra.datastore.yaml - Configuration file for the datastore, referred to from config.yaml +├── era5.datastore.zarr/ - Optional configuration file for the boundary datastore, referred to from config.yaml └── graphs/ - Directory containing graphs for training ``` @@ -142,18 +145,20 @@ And the content of `config.yaml` could in this case look like: datastore: kind: mdp config_path: danra.datastore.yaml +datastore_boundary: + kind: mdp + config_path: era5.datastore.yaml training: state_feature_weighting: __config_class__: ManualStateFeatureWeighting - values: + weights: u100m: 1.0 v100m: 1.0 ``` -For now the neural-lam config only defines two things: 1) the kind of data -store and the path to its config, and 2) the weighting of different features in -the loss function. If you don't define the state feature weighting it will default -to weighting all features equally. +For now the neural-lam config only defines two things: +1) the kind of datastores and the path to their config +2) the weighting of different features in the loss function. If you don't define the state feature weighting it will default to weighting all features equally. (This example is taken from the `tests/datastore_examples/mdp` directory.) @@ -525,5 +530,4 @@ Furthermore, all tests in the ```tests``` directory will be run upon pushing cha # Contact If you are interested in machine learning models for LAM, have questions about the implementation or ideas for extending it, feel free to get in touch. -There is an open [mllam slack channel](https://join.slack.com/t/ml-lam/shared_invite/zt-2t112zvm8-Vt6aBvhX7nYa6Kbj_LkCBQ) that anyone can join (after following the link you have to request to join, this is to avoid spam bots). -You can also open a github issue on this page, or (if more suitable) send an email to [joel.oskarsson@liu.se](mailto:joel.oskarsson@liu.se). +There is an open [mllam slack channel](https://join.slack.com/t/ml-lam/shared_invite/zt-2t112zvm8-Vt6aBvhX7nYa6Kbj_LkCBQ) that anyone can join. You can also open a github issue on this page, or (if more suitable) send an email to [joel.oskarsson@liu.se](mailto:joel.oskarsson@liu.se). diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 3685e227..f02cfbd4 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -142,8 +142,6 @@ def __init__( else: self.da_state = self.da_state - - # Check time step consistency in state data if self.datastore.is_forecast: state_times = self.da_state.analysis_time From 4c175452af54fa4833fd9ac67bb4b1b36cdaa777 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 3 Dec 2024 05:14:38 +0100 Subject: [PATCH 094/190] moved gcsfs to dev group --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 38e7cb0e..f556ef6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,13 +26,12 @@ dependencies = [ "torch-geometric==2.3.1", "parse>=1.20.2", "dataclass-wizard<0.31.0", - "gcsfs>=2021.10.0", "mllam-data-prep>=0.5.0", ] requires-python = ">=3.9" [project.optional-dependencies] -dev = ["pre-commit>=3.8.0", "pytest>=8.3.2", "pooch>=1.8.2"] +dev = ["pre-commit>=3.8.0", "pytest>=8.3.2", "pooch>=1.8.2", "gcsfs>=2021.10.0"] [tool.setuptools] py-modules = ["neural_lam"] From a700350f9c0b6161ffefa06b7fa7fc7151e51f23 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 3 Dec 2024 05:14:44 +0100 Subject: [PATCH 095/190] linting --- .../era5.datastore.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml b/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml index 600a1845..7c5ffb3b 100644 --- a/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml +++ b/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml @@ -7,7 +7,7 @@ output: coord_ranges: time: start: 1990-09-01T00:00 - end: 2022-09-30T00:00 + end: 2022-09-30T00:00 step: PT6H chunking: time: 1 @@ -16,16 +16,16 @@ output: splits: train: start: 1990-09-01T00:00 - end: 2022-09-30T00:00 + end: 2022-09-30T00:00 compute_statistics: ops: [mean, std, diff_mean, diff_std] dims: [grid_index, time] val: start: 1990-09-01T00:00 - end: 2022-09-30T00:00 + end: 2022-09-30T00:00 test: start: 1990-09-01T00:00 - end: 2022-09-30T00:00 + end: 2022-09-30T00:00 inputs: era_height_levels: From 315aa0fbbb4d551b4ffb30b761743dbd95a14382 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Mon, 28 Oct 2024 11:20:41 +0100 Subject: [PATCH 096/190] Propagate separation of state and boundary change through training loop --- neural_lam/models/ar_model.py | 81 ++++++++++++++++++++------- neural_lam/models/base_graph_model.py | 38 ++++++++++++- neural_lam/vis.py | 52 ++++++++++++----- neural_lam/weather_dataset.py | 1 + 4 files changed, 135 insertions(+), 37 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 331966e4..95bd1154 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -48,14 +48,33 @@ def __init__( num_past_forcing_steps = args.num_past_forcing_steps num_future_forcing_steps = args.num_future_forcing_steps + # Set up boundary mask + boundary_mask = torch.tensor( + da_boundary_mask.values, dtype=torch.float32 + ).unsqueeze( + 1 + ) # add feature dim + + self.register_buffer("boundary_mask", boundary_mask, persistent=False) + # Pre-compute interior mask for use in loss function + self.register_buffer( + "interior_mask", 1.0 - self.boundary_mask, persistent=False + ) # (num_grid_nodes, 1), 1 for non-border + # Load static features for grid/data, NB: self.predict_step assumes # dimension order to be (grid_index, static_feature) arr_static = da_static_features.transpose( "grid_index", "static_feature" ).values + static_features_torch = torch.tensor(arr_static, dtype=torch.float32) self.register_buffer( "grid_static_features", - torch.tensor(arr_static, dtype=torch.float32), + static_features_torch[self.boundary_mask.to(torch.bool), + persistent=False, + ) + self.register_buffer( + "boundary_static_features", + static_features_torch[self.interior_mask.to(torch.bool), persistent=False, ) @@ -107,6 +126,11 @@ def __init__( grid_static_dim, ) = self.grid_static_features.shape + ( + self.num_boundary_nodes, + boundary_static_dim, # TODO Need for computation below + ) = self.boundary_static_features.shape + self.num_input_nodes = self.num_grid_nodes + self.num_boundary_nodes self.grid_dim = ( 2 * self.grid_output_dim + grid_static_dim @@ -115,6 +139,7 @@ def __init__( * num_forcing_vars * (num_past_forcing_steps + num_future_forcing_steps + 1) ) + self.boundary_dim = self.grid_dim # TODO Compute separately # Instantiate loss function self.loss = metrics.get_metric(args.loss) @@ -190,7 +215,9 @@ def expand_to_batch(x, batch_size): """ return x.unsqueeze(0).expand(batch_size, -1, -1) - def predict_step(self, prev_state, prev_prev_state, forcing): + def predict_step( + self, prev_state, prev_prev_state, forcing, boundary_forcing + ): """ Step state one step ahead using prediction model, X_{t-1}, X_t -> X_t+1 prev_state: (B, num_grid_nodes, feature_dim), X_t prev_prev_state: (B, @@ -199,29 +226,31 @@ def predict_step(self, prev_state, prev_prev_state, forcing): """ raise NotImplementedError("No prediction step implemented") - def unroll_prediction(self, init_states, forcing_features, true_states): + def unroll_prediction(self, init_states, forcing, boundary_forcing): """ Roll out prediction taking multiple autoregressive steps with model - init_states: (B, 2, num_grid_nodes, d_f) forcing_features: (B, - pred_steps, num_grid_nodes, d_static_f) true_states: (B, pred_steps, - num_grid_nodes, d_f) + init_states: (B, 2, num_grid_nodes, d_f) + forcing: (B, pred_steps, num_grid_nodes, d_static_f) + boundary_forcing: (B, pred_steps, num_boundary_nodes, d_boundary_f) """ prev_prev_state = init_states[:, 0] prev_state = init_states[:, 1] prediction_list = [] pred_std_list = [] - pred_steps = forcing_features.shape[1] + pred_steps = forcing.shape[1] for i in range(pred_steps): - forcing = forcing_features[:, i] + forcing_step = forcing[:, i] + boundary_forcing_step = boundary_forcing[:, i] pred_state, pred_std = self.predict_step( - prev_state, prev_prev_state, forcing + prev_state, prev_prev_state, forcing_step, boundary_forcing_step ) # state: (B, num_grid_nodes, d_f) pred_std: (B, num_grid_nodes, # d_f) or None prediction_list.append(pred_state) + if self.output_std: pred_std_list.append(pred_std) @@ -243,20 +272,22 @@ def unroll_prediction(self, init_states, forcing_features, true_states): def common_step(self, batch): """ - Predict on single batch batch consists of: - init_states: (B, 2,num_grid_nodes, d_features) - target_states: (B, pred_steps,num_grid_nodes, d_features) - forcing_features: (B, pred_steps,num_grid_nodes, d_forcing) - boundary_features: (B, pred_steps,num_grid_nodes, d_boundaries) - batch_times: (B, pred_steps) + Predict on single batch + batch consists of: + init_states: (B, 2, num_grid_nodes, d_features) + target_states: (B, pred_steps, num_grid_nodes, d_features) + forcing: (B, pred_steps, num_grid_nodes, d_forcing), + boundary_forcing: + (B, pred_steps, num_boundary_nodes, d_boundary_forcing), + where index 0 corresponds to index 1 of init_states """ (init_states, target_states, forcing_features, _, batch_times) = batch prediction, pred_std = self.unroll_prediction( - init_states, forcing_features, target_states - ) - # prediction: (B, pred_steps, num_grid_nodes, d_f) - # pred_std: (B, pred_steps, num_grid_nodes, d_f) or (d_f,) + init_states, forcing, boundary_forcing + ) # (B, pred_steps, num_grid_nodes, d_f) + # prediction: (B, pred_steps, num_grid_nodes, d_f) pred_std: (B, + # pred_steps, num_grid_nodes, d_f) or (d_f,) return prediction, target_states, pred_std, batch_times @@ -306,7 +337,11 @@ def validation_step(self, batch, batch_idx): prediction, target, pred_std, _ = self.common_step(batch) time_step_loss = torch.mean( - self.loss(prediction, target, pred_std), + self.loss( + prediction, + target, + pred_std, + ), dim=0, ) # (time_steps-1) mean_loss = torch.mean(time_step_loss) @@ -357,7 +392,11 @@ def test_step(self, batch, batch_idx): # pred_steps, num_grid_nodes, d_f) or (d_f,) time_step_loss = torch.mean( - self.loss(prediction, target, pred_std), + self.loss( + prediction, + target, + pred_std, + ), dim=0, ) # (time_steps-1,) mean_loss = torch.mean(time_step_loss) diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index 6233b4d1..246cd93e 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -46,6 +46,12 @@ def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): # Define sub-models # Feature embedders for grid self.mlp_blueprint_end = [args.hidden_dim] * (args.hidden_layers + 1) + # TODO Optional separate embedder for boundary nodes + assert self.grid_dim == self.boundary_dim, ( + "Grid and boundary input dimension must be the same when using " + f"the same encoder, got grid_dim={self.grid_dim}, " + f"boundary_dim={self.boundary_dim}" + ) self.grid_embedder = utils.make_mlp( [self.grid_dim] + self.mlp_blueprint_end ) @@ -103,12 +109,15 @@ def process_step(self, mesh_rep): """ raise NotImplementedError("process_step not implemented") - def predict_step(self, prev_state, prev_prev_state, forcing): + def predict_step( + self, prev_state, prev_prev_state, forcing, boundary_forcing + ): """ Step state one step ahead using prediction model, X_{t-1}, X_t -> X_t+1 prev_state: (B, num_grid_nodes, feature_dim), X_t prev_prev_state: (B, num_grid_nodes, feature_dim), X_{t-1} forcing: (B, num_grid_nodes, forcing_dim) + boundary_forcing: (B, num_boundary_nodes, boundary_forcing_dim) """ batch_size = prev_state.shape[0] @@ -122,22 +131,45 @@ def predict_step(self, prev_state, prev_prev_state, forcing): ), dim=-1, ) + # Create full boundary node features of shape + # (B, num_boundary_nodes, boundary_dim) + boundary_features = torch.cat( + ( + boundary_forcing, + self.expand_to_batch(self.boundary_static_features, batch_size), + ), + dim=-1, + ) # Embed all features grid_emb = self.grid_embedder(grid_features) # (B, num_grid_nodes, d_h) + boundary_emb = self.grid_embedder(boundary_features) + # (B, num_boundary_nodes, d_h) g2m_emb = self.g2m_embedder(self.g2m_features) # (M_g2m, d_h) m2g_emb = self.m2g_embedder(self.m2g_features) # (M_m2g, d_h) mesh_emb = self.embedd_mesh_nodes() + # Merge interior and boundary emb into input embedding + # TODO Can we enforce ordering in the graph creation process to make + # this just a concat instead? + input_emb = torch.zeros( + batch_size, + self.num_input_nodes, + grid_emb.shape[2], + device=grid_emb.device, + ) + input_emb[:, self.interior_mask] = grid_emb + input_emb[:, self.boundary_mask] = boundary_emb + # Map from grid to mesh mesh_emb_expanded = self.expand_to_batch( mesh_emb, batch_size ) # (B, num_mesh_nodes, d_h) g2m_emb_expanded = self.expand_to_batch(g2m_emb, batch_size) - # This also splits representation into grid and mesh + # Encode to mesh mesh_rep = self.g2m_gnn( - grid_emb, mesh_emb_expanded, g2m_emb_expanded + input_emb, mesh_emb_expanded, g2m_emb_expanded ) # (B, num_mesh_nodes, d_h) # Also MLP with residual for grid representation grid_rep = grid_emb + self.encoding_grid_mlp( diff --git a/neural_lam/vis.py b/neural_lam/vis.py index efab20bf..f2775328 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -64,6 +64,40 @@ def plot_error_map(errors, datastore: BaseRegularGridDatastore, title=None): return fig +def plot_on_axis( + ax, + da, + datastore, + obs_mask=None, + vmin=None, + vmax=None, + ax_title=None, + cmap="plasma", +): + """ + Plot weather state on given axis + """ + ax.set_global() + ax.coastlines() # Add coastline outlines + + extent = datastore.get_xy_extent("state") + + da.plot.imshow( + ax=ax, + origin="lower", + x="x", + extent=extent, + vmin=vmin, + vmax=vmax, + cmap=cmap, + transform=datastore.coords_projection, + ) + + if ax_title: + ax.set_title(ax_title, size=15) + return im + + @matplotlib.rc_context(utils.fractional_plot_bundle(1)) def plot_prediction( datastore: BaseRegularGridDatastore, @@ -85,8 +119,6 @@ def plot_prediction( else: vmin, vmax = vrange - extent = datastore.get_xy_extent("state") - fig, axes = plt.subplots( 1, 2, @@ -96,16 +128,12 @@ def plot_prediction( # Plot pred and target for ax, da in zip(axes, (da_target, da_prediction)): - ax.coastlines() # Add coastline outlines - da.plot.imshow( - ax=ax, - origin="lower", - x="x", - extent=extent, + im = plot_on_axis( + ax, + da, + datastore, vmin=vmin, vmax=vmax, - cmap="plasma", - transform=datastore.coords_projection, ) # Ticks and labels @@ -133,14 +161,11 @@ def plot_spatial_error( else: vmin, vmax = vrange - extent = datastore.get_xy_extent("state") - fig, ax = plt.subplots( figsize=(5, 4.8), subplot_kw={"projection": datastore.coords_projection}, ) - ax.coastlines() # Add coastline outlines error_grid = ( error.reshape( [datastore.grid_shape_state.x, datastore.grid_shape_state.y] @@ -149,6 +174,7 @@ def plot_spatial_error( .numpy() ) + # TODO: This needs to be converted to DA and use plot_on_axis im = ax.imshow( error_grid, origin="lower", diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index f02cfbd4..ed67b6f7 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -234,6 +234,7 @@ def __init__( self.da_boundary_mean = self.ds_boundary_stats.forcing_mean self.da_boundary_std = self.ds_boundary_stats.forcing_std + def __len__(self): if self.datastore.is_forecast: # for now we simply create a single sample for each analysis time From 19672210c761805e1ef5b8ff63e1ca3c4458ef19 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Mon, 4 Nov 2024 18:18:34 +0100 Subject: [PATCH 097/190] Start building graphs with wmg --- neural_lam/build_graph.py | 153 ++++++++++++++++++++++++++++++++++ neural_lam/models/ar_model.py | 4 +- neural_lam/plot_graph.py | 6 +- neural_lam/utils.py | 25 +++++- pyproject.toml | 1 + 5 files changed, 182 insertions(+), 7 deletions(-) create mode 100644 neural_lam/build_graph.py diff --git a/neural_lam/build_graph.py b/neural_lam/build_graph.py new file mode 100644 index 00000000..034f82cd --- /dev/null +++ b/neural_lam/build_graph.py @@ -0,0 +1,153 @@ +# Standard library +import argparse +import os + +# Third-party +import numpy as np +import weather_model_graphs as wmg + +# Local +from . import config, utils + +WMG_ARCHETYPES = { + "keisler": wmg.create.archetype.create_keisler_graph, + "graphcast": wmg.create.archetype.create_graphcast_graph, + "hierarchical": wmg.create.archetype.create_oskarsson_hierarchical_graph, +} + + +def main(): + parser = argparse.ArgumentParser( + description="Graph generation using WMG", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + + # Inputs and outputs + parser.add_argument( + "--data_config", + type=str, + default="neural_lam/data_config.yaml", + help="Path to data config file", + ) + parser.add_argument( + "--output_dir", + type=str, + default="graphs", + help="Directory to save graph to", + ) + + # Graph structure + parser.add_argument( + "--archetype", + type=str, + default="keisler", + help="Archetype to use to create graph (keisler/graphcast/hierarchical)", + ) + parser.add_argument( + "--mesh_node_distance", + type=float, + default=3.0, + help="Distance between created mesh nodes", + ) + parser.add_argument( + "--level_refinement_factor", + type=float, + default=3, + help="Refinement factor between grid points and bottom level of mesh hierarchy", + ) + parser.add_argument( + "--max_num_levels", + type=int, + help="Limit multi-scale mesh to given number of levels, " + "from bottom up", + ) + parser.add_argument( + "--hierarchical", + action="store_true", + help="Generate hierarchical mesh graph (default: False)", + ) + args = parser.parse_args() + + # Load grid positions + config_loader = config.Config.from_file(args.data_config) + + coords = utils.get_reordered_grid_pos(config_loader.dataset.name).numpy() + # (num_nodes_full, 2) + + # Construct mask + static_data = utils.load_static_data(config_loader.dataset.name) + decode_mask = np.concatenate( + ( + np.ones(static_data["grid_static_features"].shape[0], dtype=bool), + np.zeros( + static_data["boundary_static_features"].shape[0], dtype=bool + ), + ), + axis=0, + ) + + # Build graph + assert ( + args.archetype in WMG_ARCHETYPES + ), f"Unknown archetype: {args.archetype}" + archetype_create_func = WMG_ARCHETYPES[args.archetype] + + create_kwargs = { + "coords": coords, + "mesh_node_distance": args.mesh_node_distance, + "projection": None, + "decode_mask": decode_mask, + } + if args.archetype != "keisler": + # Add additional multi-level kwargs + create_kwargs.update( + { + "level_refinement_factor": args.level_refinement_factor, + "max_num_levels": args.max_num_levels, + } + ) + + graph = archetype_create_func(**create_kwargs) + graph_comp = wmg.split_graph_by_edge_attribute(graph, attr="component") + + print("Created graph:") + for name, subgraph in graph_comp.items(): + print(f"{name}: {subgraph}") + + # Save graph + os.makedirs(args.output_dir, exist_ok=True) + for component, graph in graph_comp.items(): + # TODO This is all hack, saving in wmg needs to be consistent with nl + if component == "m2m": + if args.archetype == "hierarchical": + # Split by direction + m2m_direction_comp = wmg.split_graph_by_edge_attribute( + graph, attr="direction" + ) + for direction, graph in m2m_direction_comp.items(): + wmg.save.to_pyg( + graph=graph, + name=f"mesh_{direction}", + list_from_attribute="level", + edge_features=["len", "vdiff"], + output_directory=args.output_dir, + ) + else: + wmg.save.to_pyg( + graph=graph, + name=component, + list_from_attribute="dummy", + edge_features=["len", "vdiff"], + output_directory=args.output_dir, + ) + else: + wmg.save.to_pyg( + graph=graph, + name=component, + edge_features=["len", "vdiff"], + output_directory=args.output_dir, + ) + + +if __name__ == "__main__": + main() diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 95bd1154..0311e542 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -69,12 +69,12 @@ def __init__( static_features_torch = torch.tensor(arr_static, dtype=torch.float32) self.register_buffer( "grid_static_features", - static_features_torch[self.boundary_mask.to(torch.bool), + static_features_torch[self.boundary_mask.to(torch.bool)], persistent=False, ) self.register_buffer( "boundary_static_features", - static_features_torch[self.interior_mask.to(torch.bool), + static_features_torch[self.interior_mask.to(torch.bool)], persistent=False, ) diff --git a/neural_lam/plot_graph.py b/neural_lam/plot_graph.py index 999c8e53..9c1fc0ef 100644 --- a/neural_lam/plot_graph.py +++ b/neural_lam/plot_graph.py @@ -47,10 +47,6 @@ def main(): config_path=args.datastore_config_path ) - xy = datastore.get_xy("state", stacked=True) # (N_grid, 2) - pos_max = np.max(np.abs(xy)) - grid_pos = xy / pos_max # Divide by maximum coordinate - # Load graph data graph_dir_path = os.path.join(datastore.root_path, "graph", args.graph) hierarchical, graph_ldict = utils.load_graph(graph_dir_path=graph_dir_path) @@ -65,6 +61,8 @@ def main(): ) mesh_static_features = graph_ldict["mesh_static_features"] + # Extract values needed, turn to numpy + grid_pos = utils.get_reordered_grid_pos(datastore).numpy() # Add in z-dimension z_grid = GRID_HEIGHT * np.ones((grid_pos.shape[0],)) grid_pos = np.concatenate( diff --git a/neural_lam/utils.py b/neural_lam/utils.py index 4a0752e4..baa55610 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -114,7 +114,7 @@ def loads_file(fn): # Load static node features mesh_static_features = loads_file( - "mesh_features.pt" + "m2m_node_features.pt" ) # List of (N_mesh[l], d_mesh_static) # Some checks for consistency @@ -241,3 +241,26 @@ def init_wandb_metrics(wandb_logger, val_steps): experiment.define_metric("val_mean_loss", summary="min") for step in val_steps: experiment.define_metric(f"val_loss_unroll{step}", summary="min") + + +def get_reordered_grid_pos(datastore): + """ + Interior nodes first, then boundary + """ + xy_np = datastore.get_xy() # np, (num_grid, 2) + xy_torch = torch.tensor(xy_np, dtype=torch.float32) + + da_boundary_mask = datastore.boundary_mask + boundary_mask = torch.tensor( + da_boundary_mask.values, dtype=torch.bool + ) + interior_mask = torch.logical_not(boundary_mask) + + return torch.cat( + ( + xy_torch[interior_mask], + xy_torch[boundary_mask], + ), + dim=0, + ) + # (num_total_grid_nodes, 2) diff --git a/pyproject.toml b/pyproject.toml index f556ef6b..9607d1da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "parse>=1.20.2", "dataclass-wizard<0.31.0", "mllam-data-prep>=0.5.0", + "weather-model-graphs>=0.2.0" ] requires-python = ">=3.9" From cb74e3f05d808608a56be1b1de927aec5c73a848 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Mon, 11 Nov 2024 14:28:06 +0100 Subject: [PATCH 098/190] Change forward pass to concat according to enforced node ordering --- neural_lam/build_graph.py | 5 +++-- neural_lam/models/base_graph_model.py | 12 ++---------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/neural_lam/build_graph.py b/neural_lam/build_graph.py index 034f82cd..a0c675ac 100644 --- a/neural_lam/build_graph.py +++ b/neural_lam/build_graph.py @@ -117,7 +117,8 @@ def main(): # Save graph os.makedirs(args.output_dir, exist_ok=True) for component, graph in graph_comp.items(): - # TODO This is all hack, saving in wmg needs to be consistent with nl + # This seems like a bit of a hack, maybe better if saving in wmg + # was made consistent with nl if component == "m2m": if args.archetype == "hierarchical": # Split by direction @@ -136,7 +137,7 @@ def main(): wmg.save.to_pyg( graph=graph, name=component, - list_from_attribute="dummy", + list_from_attribute="dummy", # Note: Needed to output list edge_features=["len", "vdiff"], output_directory=args.output_dir, ) diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index 246cd93e..481353b4 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -150,16 +150,8 @@ def predict_step( mesh_emb = self.embedd_mesh_nodes() # Merge interior and boundary emb into input embedding - # TODO Can we enforce ordering in the graph creation process to make - # this just a concat instead? - input_emb = torch.zeros( - batch_size, - self.num_input_nodes, - grid_emb.shape[2], - device=grid_emb.device, - ) - input_emb[:, self.interior_mask] = grid_emb - input_emb[:, self.boundary_mask] = boundary_emb + # We enforce ordering (interior, boundary) of nodes + input_emb = torch.cat((grid_emb, boundary_emb), dim=1) # Map from grid to mesh mesh_emb_expanded = self.expand_to_batch( From 9715ed8eb855254e4e628f3e38ab982a5878faf9 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Mon, 11 Nov 2024 18:13:40 +0100 Subject: [PATCH 099/190] wip to make tests pass --- neural_lam/build_graph.py | 35 ++++++----- neural_lam/interaction_net.py | 6 +- neural_lam/plot_graph.py | 109 +++++++++++++++++++++++++++------- neural_lam/utils.py | 30 ++++++++-- 4 files changed, 136 insertions(+), 44 deletions(-) diff --git a/neural_lam/build_graph.py b/neural_lam/build_graph.py index a0c675ac..dcbff49d 100644 --- a/neural_lam/build_graph.py +++ b/neural_lam/build_graph.py @@ -16,7 +16,7 @@ } -def main(): +def main(input_args=None): parser = argparse.ArgumentParser( description="Graph generation using WMG", formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -61,16 +61,12 @@ def main(): help="Limit multi-scale mesh to given number of levels, " "from bottom up", ) - parser.add_argument( - "--hierarchical", - action="store_true", - help="Generate hierarchical mesh graph (default: False)", - ) - args = parser.parse_args() + args = parser.parse_args(input_args) # Load grid positions config_loader = config.Config.from_file(args.data_config) + # TODO Do not get normalised positions coords = utils.get_reordered_grid_pos(config_loader.dataset.name).numpy() # (num_nodes_full, 2) @@ -126,13 +122,24 @@ def main(): graph, attr="direction" ) for direction, graph in m2m_direction_comp.items(): - wmg.save.to_pyg( - graph=graph, - name=f"mesh_{direction}", - list_from_attribute="level", - edge_features=["len", "vdiff"], - output_directory=args.output_dir, - ) + if direction == "same": + # Name just m2m to be consistent with non-hierarchical + wmg.save.to_pyg( + graph=graph, + name="m2m", + list_from_attribute="level", + edge_features=["len", "vdiff"], + output_directory=args.output_dir, + ) + else: + # up and down directions + wmg.save.to_pyg( + graph=graph, + name=f"mesh_{direction}", + list_from_attribute="levels", + edge_features=["len", "vdiff"], + output_directory=args.output_dir, + ) else: wmg.save.to_pyg( graph=graph, diff --git a/neural_lam/interaction_net.py b/neural_lam/interaction_net.py index 2f45b03f..8b8c5c85 100644 --- a/neural_lam/interaction_net.py +++ b/neural_lam/interaction_net.py @@ -30,7 +30,8 @@ def __init__( """ Create a new InteractionNet - edge_index: (2,M), Edges in pyg format + edge_index: (2,M), Edges in pyg format, with boeth sender and receiver + node indices starting at 0 input_dim: Dimensionality of input representations, for both nodes and edges update_edges: If new edge representations should be computed @@ -52,8 +53,7 @@ def __init__( # Default to input dim if not explicitly given hidden_dim = input_dim - # Make both sender and receiver indices of edge_index start at 0 - edge_index = edge_index - edge_index.min(dim=1, keepdim=True)[0] + # any edge_index used here must start sender and rec. nodes at index 0 # Store number of receiver nodes according to edge_index self.num_rec = edge_index[1].max() + 1 edge_index[0] = ( diff --git a/neural_lam/plot_graph.py b/neural_lam/plot_graph.py index 9c1fc0ef..f621d201 100644 --- a/neural_lam/plot_graph.py +++ b/neural_lam/plot_graph.py @@ -69,11 +69,9 @@ def main(): (grid_pos, np.expand_dims(z_grid, axis=1)), axis=1 ) - # List of edges to plot, (edge_index, color, line_width, label) - edge_plot_list = [ - (m2g_edge_index.numpy(), "black", 0.4, "M2G"), - (g2m_edge_index.numpy(), "black", 0.4, "G2M"), - ] + # List of edges to plot, (edge_index, from_pos, to_pos, color, + # line_width, label) + edge_plot_list = [] # Mesh positioning and edges to plot differ if we have a hierarchical graph if hierarchical: @@ -92,24 +90,80 @@ def main(): mesh_static_features, start=1 ) ] - mesh_pos = np.concatenate(mesh_level_pos, axis=0) + all_mesh_pos = np.concatenate(mesh_level_pos, axis=0) + grid_con_mesh_pos = mesh_level_pos[0] # Add inter-level mesh edges edge_plot_list += [ - (level_ei.numpy(), "blue", 1, f"M2M Level {level}") - for level, level_ei in enumerate(m2m_edge_index) + ( + level_ei.numpy(), + level_pos, + level_pos, + "blue", + 1, + f"M2M Level {level}", + ) + for level, (level_ei, level_pos) in enumerate( + zip(m2m_edge_index, mesh_level_pos) + ) ] # Add intra-level mesh edges - up_edges_ei = np.concatenate( - [level_up_ei.numpy() for level_up_ei in mesh_up_edge_index], axis=1 + up_edges_ei = [ + level_up_ei.numpy() for level_up_ei in mesh_up_edge_index + ] + down_edges_ei = [ + level_down_ei.numpy() for level_down_ei in mesh_down_edge_index + ] + # Add up edges + for level_i, (up_ei, from_pos, to_pos) in enumerate( + zip(up_edges_ei, mesh_level_pos[:-1], mesh_level_pos[1:]) + ): + edge_plot_list.append( + ( + up_ei, + from_pos, + to_pos, + "green", + 1, + f"Mesh up {level_i}-{level_i+1}", + ) + ) + # Add down edges + for level_i, (down_ei, from_pos, to_pos) in enumerate( + zip(down_edges_ei, mesh_level_pos[1:], mesh_level_pos[:-1]) + ): + edge_plot_list.append( + ( + down_ei, + from_pos, + to_pos, + "green", + 1, + f"Mesh down {level_i+1}-{level_i}", + ) + ) + + edge_plot_list.append( + ( + m2g_edge_index.numpy(), + grid_con_mesh_pos, + grid_pos, + "black", + 0.4, + "M2G", + ) ) - down_edges_ei = np.concatenate( - [level_down_ei.numpy() for level_down_ei in mesh_down_edge_index], - axis=1, + edge_plot_list.append( + ( + g2m_edge_index.numpy(), + grid_pos, + grid_con_mesh_pos, + "black", + 0.4, + "G2M", + ) ) - edge_plot_list.append((up_edges_ei, "green", 1, "Mesh up")) - edge_plot_list.append((down_edges_ei, "green", 1, "Mesh down")) mesh_node_size = 2.5 else: @@ -123,21 +177,30 @@ def main(): (mesh_pos, np.expand_dims(z_mesh, axis=1)), axis=1 ) - edge_plot_list.append((m2m_edge_index.numpy(), "blue", 1, "M2M")) + edge_plot_list.append( + (m2m_edge_index.numpy(), mesh_pos, mesh_pos, "blue", 1, "M2M") + ) + edge_plot_list.append( + (m2g_edge_index.numpy(), mesh_pos, grid_pos, "black", 0.4, "M2G") + ) + edge_plot_list.append( + (g2m_edge_index.numpy(), grid_pos, mesh_pos, "black", 0.4, "G2M") + ) - # All node positions in one array - node_pos = np.concatenate((mesh_pos, grid_pos), axis=0) + all_mesh_pos = mesh_pos # Add edges data_objs = [] for ( ei, + from_pos, + to_pos, col, width, label, ) in edge_plot_list: - edge_start = node_pos[ei[0]] # (M, 2) - edge_end = node_pos[ei[1]] # (M, 2) + edge_start = from_pos[ei[0]] # (M, 2) + edge_end = to_pos[ei[1]] # (M, 2) n_edges = edge_start.shape[0] x_edges = np.stack( @@ -174,9 +237,9 @@ def main(): ) data_objs.append( go.Scatter3d( - x=mesh_pos[:, 0], - y=mesh_pos[:, 1], - z=mesh_pos[:, 2], + x=all_mesh_pos[:, 0], + y=all_mesh_pos[:, 1], + z=all_mesh_pos[:, 2], mode="markers", marker={"color": "blue", "size": mesh_node_size}, name="Mesh nodes", diff --git a/neural_lam/utils.py b/neural_lam/utils.py index baa55610..c0207123 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -33,6 +33,13 @@ def __iter__(self): return (self[i] for i in range(len(self))) +def zero_index_edge_index(edge_index): + """ + Make both sender and receiver indices of edge_index start at 0 + """ + return edge_index - edge_index.min(dim=1, keepdim=True)[0] + + def load_graph(graph_dir_path, device="cpu"): """Load all tensors representing the graph from `graph_dir_path`. @@ -71,11 +78,13 @@ def load_graph(graph_dir_path, device="cpu"): - mesh_down_edge_index - g2m_features - m2g_features - - m2m_features + - m2m_node_features - mesh_up_features - mesh_down_features - mesh_static_features + + Load all tensors representing the graph """ def loads_file(fn): @@ -87,11 +96,16 @@ def loads_file(fn): # Load edges (edge_index) m2m_edge_index = BufferList( - loads_file("m2m_edge_index.pt"), persistent=False + [zero_index_edge_index(ei) for ei in loads_file("m2m_edge_index.pt")], + persistent=False, ) # List of (2, M_m2m[l]) g2m_edge_index = loads_file("g2m_edge_index.pt") # (2, M_g2m) m2g_edge_index = loads_file("m2g_edge_index.pt") # (2, M_m2g) + # Change first indices to 0 + g2m_edge_index = zero_index_edge_index(g2m_edge_index) + m2g_edge_index = zero_index_edge_index(m2g_edge_index) + n_levels = len(m2m_edge_index) hierarchical = n_levels > 1 # Nor just single level mesh graph @@ -128,10 +142,18 @@ def loads_file(fn): if hierarchical: # Load up and down edges and features mesh_up_edge_index = BufferList( - loads_file("mesh_up_edge_index.pt"), persistent=False + [ + zero_index_edge_index(ei) + for ei in loads_file("mesh_up_edge_index.pt") + ], + persistent=False, ) # List of (2, M_up[l]) mesh_down_edge_index = BufferList( - loads_file("mesh_down_edge_index.pt"), persistent=False + [ + zero_index_edge_index(ei) + for ei in loads_file("mesh_down_edge_index.pt") + ], + persistent=False, ) # List of (2, M_down[l]) mesh_up_features = loads_file( From 336fba9c6843838533222f7ea0618d59f44ff427 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Tue, 12 Nov 2024 14:02:22 +0100 Subject: [PATCH 100/190] Fix edge index manipulation to make training work again --- neural_lam/interaction_net.py | 4 ++-- neural_lam/models/base_graph_model.py | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/neural_lam/interaction_net.py b/neural_lam/interaction_net.py index 8b8c5c85..417aae1a 100644 --- a/neural_lam/interaction_net.py +++ b/neural_lam/interaction_net.py @@ -56,8 +56,8 @@ def __init__( # any edge_index used here must start sender and rec. nodes at index 0 # Store number of receiver nodes according to edge_index self.num_rec = edge_index[1].max() + 1 - edge_index[0] = ( - edge_index[0] + self.num_rec + edge_index = torch.stack( + (edge_index[0] + self.num_rec, edge_index[1]), dim=0 ) # Make sender indices after rec self.register_buffer("edge_index", edge_index, persistent=False) diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index 481353b4..c0b21a75 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -19,8 +19,6 @@ def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): super().__init__(args, config=config, datastore=datastore) # Load graph with static features - # NOTE: (IMPORTANT!) mesh nodes MUST have the first - # num_mesh_nodes indices, graph_dir_path = datastore.root_path / "graph" / args.graph self.hierarchical, graph_ldict = utils.load_graph( graph_dir_path=graph_dir_path From ce3ea6d7d44f5126ec088daec6665a65f04fe83b Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Tue, 12 Nov 2024 16:06:28 +0100 Subject: [PATCH 101/190] Work on fixing plotting functionality --- neural_lam/vis.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/neural_lam/vis.py b/neural_lam/vis.py index f2775328..d744f542 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -73,6 +73,7 @@ def plot_on_axis( vmax=None, ax_title=None, cmap="plasma", + grid_limits=None ): """ Plot weather state on given axis @@ -82,7 +83,7 @@ def plot_on_axis( extent = datastore.get_xy_extent("state") - da.plot.imshow( + im = da.plot.imshow( ax=ax, origin="lower", x="x", @@ -95,6 +96,7 @@ def plot_on_axis( if ax_title: ax.set_title(ax_title, size=15) + return im @@ -173,6 +175,7 @@ def plot_spatial_error( .T.cpu() .numpy() ) + extent = datastore.get_xy_extent("state") # TODO: This needs to be converted to DA and use plot_on_axis im = ax.imshow( From a520505ceac8a584c9c7e6698c0a2c3911cb8fa2 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Wed, 13 Nov 2024 13:32:16 +0100 Subject: [PATCH 102/190] Linting --- neural_lam/build_graph.py | 8 +++++--- neural_lam/vis.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/neural_lam/build_graph.py b/neural_lam/build_graph.py index dcbff49d..c13dc629 100644 --- a/neural_lam/build_graph.py +++ b/neural_lam/build_graph.py @@ -41,7 +41,8 @@ def main(input_args=None): "--archetype", type=str, default="keisler", - help="Archetype to use to create graph (keisler/graphcast/hierarchical)", + help="Archetype to use to create graph " + "(keisler/graphcast/hierarchical)", ) parser.add_argument( "--mesh_node_distance", @@ -53,7 +54,8 @@ def main(input_args=None): "--level_refinement_factor", type=float, default=3, - help="Refinement factor between grid points and bottom level of mesh hierarchy", + help="Refinement factor between grid points and bottom level of " + "mesh hierarchy", ) parser.add_argument( "--max_num_levels", @@ -144,7 +146,7 @@ def main(input_args=None): wmg.save.to_pyg( graph=graph, name=component, - list_from_attribute="dummy", # Note: Needed to output list + list_from_attribute="dummy", # Note: Needed to output list edge_features=["len", "vdiff"], output_directory=args.output_dir, ) diff --git a/neural_lam/vis.py b/neural_lam/vis.py index d744f542..7e7bbf42 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -73,7 +73,7 @@ def plot_on_axis( vmax=None, ax_title=None, cmap="plasma", - grid_limits=None + grid_limits=None, ): """ Plot weather state on given axis From 793e6c04436a3afd842bd051bde8705d405829e5 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Wed, 13 Nov 2024 13:54:53 +0100 Subject: [PATCH 103/190] Add optional separate grid embedder for boundary --- neural_lam/models/ar_model.py | 2 +- neural_lam/models/base_graph_model.py | 22 +++++++++++++++------- neural_lam/train_model.py | 7 +++++++ 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 0311e542..c3870fbc 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -128,7 +128,7 @@ def __init__( ( self.num_boundary_nodes, - boundary_static_dim, # TODO Need for computation below + boundary_static_dim, # TODO Will need for computation below ) = self.boundary_static_features.shape self.num_input_nodes = self.num_grid_nodes + self.num_boundary_nodes self.grid_dim = ( diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index c0b21a75..de8d87db 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -44,15 +44,23 @@ def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): # Define sub-models # Feature embedders for grid self.mlp_blueprint_end = [args.hidden_dim] * (args.hidden_layers + 1) - # TODO Optional separate embedder for boundary nodes - assert self.grid_dim == self.boundary_dim, ( - "Grid and boundary input dimension must be the same when using " - f"the same encoder, got grid_dim={self.grid_dim}, " - f"boundary_dim={self.boundary_dim}" - ) self.grid_embedder = utils.make_mlp( [self.grid_dim] + self.mlp_blueprint_end ) + # Optional separate embedder for boundary nodes + print(args.shared_grid_embedder) + if args.shared_grid_embedder: + assert self.grid_dim == self.boundary_dim, ( + "Grid and boundary input dimension must be the same when using " + f"the same embedder, got grid_dim={self.grid_dim}, " + f"boundary_dim={self.boundary_dim}" + ) + self.boundary_embedder = self.grid_embedder + else: + self.boundary_embedder = utils.make_mlp( + [self.boundary_dim] + self.mlp_blueprint_end + ) + self.g2m_embedder = utils.make_mlp([g2m_dim] + self.mlp_blueprint_end) self.m2g_embedder = utils.make_mlp([m2g_dim] + self.mlp_blueprint_end) @@ -141,7 +149,7 @@ def predict_step( # Embed all features grid_emb = self.grid_embedder(grid_features) # (B, num_grid_nodes, d_h) - boundary_emb = self.grid_embedder(boundary_features) + boundary_emb = self.boundary_embedder(boundary_features) # (B, num_boundary_nodes, d_h) g2m_emb = self.g2m_embedder(self.g2m_features) # (M_g2m, d_h) m2g_emb = self.m2g_embedder(self.m2g_features) # (M_m2g, d_h) diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index 2a61e86c..7e0b47c6 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -116,6 +116,13 @@ def main(input_args=None): "output dimensions " "(default: False (no))", ) + parser.add_argument( + "--shared_grid_embedder", + action="store_true", # Default to separate embedders + help="If the same embedder MLP should be used for interior and boundary" + " grid nodes. Note that this requires the same dimensionality for " + "both kinds of grid inputs. (default: False (no))", + ) # Training options parser.add_argument( From 3515460cfc6959a26f2de47b9770c2d738c94d73 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Wed, 13 Nov 2024 13:58:39 +0100 Subject: [PATCH 104/190] Make new graph creation script main and only one --- ...ld_graph.py => build_rectangular_graph.py} | 2 +- neural_lam/create_graph.py | 610 ------------------ 2 files changed, 1 insertion(+), 611 deletions(-) rename neural_lam/{build_graph.py => build_rectangular_graph.py} (98%) delete mode 100644 neural_lam/create_graph.py diff --git a/neural_lam/build_graph.py b/neural_lam/build_rectangular_graph.py similarity index 98% rename from neural_lam/build_graph.py rename to neural_lam/build_rectangular_graph.py index c13dc629..84585540 100644 --- a/neural_lam/build_graph.py +++ b/neural_lam/build_rectangular_graph.py @@ -18,7 +18,7 @@ def main(input_args=None): parser = argparse.ArgumentParser( - description="Graph generation using WMG", + description="Rectangular graph generation using weather-models-graph", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) diff --git a/neural_lam/create_graph.py b/neural_lam/create_graph.py deleted file mode 100644 index ef979be3..00000000 --- a/neural_lam/create_graph.py +++ /dev/null @@ -1,610 +0,0 @@ -# Standard library -import os -from argparse import ArgumentParser - -# Third-party -import matplotlib -import matplotlib.pyplot as plt -import networkx -import numpy as np -import scipy.spatial -import torch -import torch_geometric as pyg -from torch_geometric.utils.convert import from_networkx - -# Local -from .config import load_config_and_datastore -from .datastore.base import BaseRegularGridDatastore - - -def plot_graph(graph, title=None): - fig, axis = plt.subplots(figsize=(8, 8), dpi=200) # W,H - edge_index = graph.edge_index - pos = graph.pos - - # Fix for re-indexed edge indices only containing mesh nodes at - # higher levels in hierarchy - edge_index = edge_index - edge_index.min() - - if pyg.utils.is_undirected(edge_index): - # Keep only 1 direction of edge_index - edge_index = edge_index[:, edge_index[0] < edge_index[1]] # (2, M/2) - # TODO: indicate direction of directed edges - - # Move all to cpu and numpy, compute (in)-degrees - degrees = ( - pyg.utils.degree(edge_index[1], num_nodes=pos.shape[0]).cpu().numpy() - ) - edge_index = edge_index.cpu().numpy() - pos = pos.cpu().numpy() - - # Plot edges - from_pos = pos[edge_index[0]] # (M/2, 2) - to_pos = pos[edge_index[1]] # (M/2, 2) - edge_lines = np.stack((from_pos, to_pos), axis=1) - axis.add_collection( - matplotlib.collections.LineCollection( - edge_lines, lw=0.4, colors="black", zorder=1 - ) - ) - - # Plot nodes - node_scatter = axis.scatter( - pos[:, 0], - pos[:, 1], - c=degrees, - s=3, - marker="o", - zorder=2, - cmap="viridis", - clim=None, - ) - - plt.colorbar(node_scatter, aspect=50) - - if title is not None: - axis.set_title(title) - - return fig, axis - - -def sort_nodes_internally(nx_graph): - # For some reason the networkx .nodes() return list can not be sorted, - # but this is the ordering used by pyg when converting. - # This function fixes this. - H = networkx.DiGraph() - H.add_nodes_from(sorted(nx_graph.nodes(data=True))) - H.add_edges_from(nx_graph.edges(data=True)) - return H - - -def save_edges(graph, name, base_path): - torch.save( - graph.edge_index, os.path.join(base_path, f"{name}_edge_index.pt") - ) - edge_features = torch.cat((graph.len.unsqueeze(1), graph.vdiff), dim=1).to( - torch.float32 - ) # Save as float32 - torch.save(edge_features, os.path.join(base_path, f"{name}_features.pt")) - - -def save_edges_list(graphs, name, base_path): - torch.save( - [graph.edge_index for graph in graphs], - os.path.join(base_path, f"{name}_edge_index.pt"), - ) - edge_features = [ - torch.cat((graph.len.unsqueeze(1), graph.vdiff), dim=1).to( - torch.float32 - ) - for graph in graphs - ] # Save as float32 - torch.save(edge_features, os.path.join(base_path, f"{name}_features.pt")) - - -def from_networkx_with_start_index(nx_graph, start_index): - pyg_graph = from_networkx(nx_graph) - pyg_graph.edge_index += start_index - return pyg_graph - - -def mk_2d_graph(xy, nx, ny): - xm, xM = np.amin(xy[:, :, 0][:, 0]), np.amax(xy[:, :, 0][:, 0]) - ym, yM = np.amin(xy[:, :, 1][0, :]), np.amax(xy[:, :, 1][0, :]) - - # avoid nodes on border - dx = (xM - xm) / nx - dy = (yM - ym) / ny - lx = np.linspace(xm + dx / 2, xM - dx / 2, nx) - ly = np.linspace(ym + dy / 2, yM - dy / 2, ny) - - mg = np.meshgrid(lx, ly, indexing="ij") # Use 'ij' indexing for (Nx,Ny) - g = networkx.grid_2d_graph(len(lx), len(ly)) - - for node in g.nodes: - g.nodes[node]["pos"] = np.array([mg[0][node], mg[1][node]]) - - # add diagonal edges - g.add_edges_from( - [((x, y), (x + 1, y + 1)) for y in range(ny - 1) for x in range(nx - 1)] - + [ - ((x + 1, y), (x, y + 1)) - for y in range(ny - 1) - for x in range(nx - 1) - ] - ) - - # turn into directed graph - dg = networkx.DiGraph(g) - for u, v in g.edges(): - d = np.sqrt(np.sum((g.nodes[u]["pos"] - g.nodes[v]["pos"]) ** 2)) - dg.edges[u, v]["len"] = d - dg.edges[u, v]["vdiff"] = g.nodes[u]["pos"] - g.nodes[v]["pos"] - dg.add_edge(v, u) - dg.edges[v, u]["len"] = d - dg.edges[v, u]["vdiff"] = g.nodes[v]["pos"] - g.nodes[u]["pos"] - - return dg - - -def prepend_node_index(graph, new_index): - # Relabel node indices in graph, insert (graph_level, i, j) - ijk = [tuple((new_index,) + x) for x in graph.nodes] - to_mapping = dict(zip(graph.nodes, ijk)) - return networkx.relabel_nodes(graph, to_mapping, copy=True) - - -def create_graph( - graph_dir_path: str, - xy: np.ndarray, - n_max_levels: int, - hierarchical: bool, - create_plot: bool, -): - """ - Create graph components from `xy` grid coordinates and store in - `graph_dir_path`. - - Creates the following files for all graphs: - - g2m_edge_index.pt [2, N_g2m_edges] - - g2m_features.pt [N_g2m_edges, d_features] - - m2g_edge_index.pt [2, N_m2m_edges] - - m2g_features.pt [N_m2m_edges, d_features] - - m2m_edge_index.pt list of [2, N_m2m_edges_level], length==n_levels - - m2m_features.pt list of [N_m2m_edges_level, d_features], - length==n_levels - - mesh_features.pt list of [N_mesh_nodes_level, d_mesh_static], - length==n_levels - - where - d_features: - number of features per edge (currently d_features==3, for - edge-length, x and y) - N_g2m_edges: - number of edges in the graph from grid-to-mesh - N_m2g_edges: - number of edges in the graph from mesh-to-grid - N_m2m_edges_level: - number of edges in the graph from mesh-to-mesh at a given level - (list index corresponds to the level) - d_mesh_static: - number of static features per mesh node (currently - d_mesh_static==2, for x and y) - N_mesh_nodes_level: - number of nodes in the mesh at a given level - - And in addition for hierarchical graphs: - - mesh_up_edge_index.pt - list of [2, N_mesh_updown_edges_level], length==n_levels-1 - - mesh_up_features.pt - list of [N_mesh_updown_edges_level, d_features], length==n_levels-1 - - mesh_down_edge_index.pt - list of [2, N_mesh_updown_edges_level], length==n_levels-1 - - mesh_down_features.pt - list of [N_mesh_updown_edges_level, d_features], length==n_levels-1 - - where N_mesh_updown_edges_level is the number of edges in the graph from - mesh-to-mesh between two consecutive levels (list index corresponds index - of lower level) - - - Parameters - ---------- - graph_dir_path : str - Path to store the graph components. - xy : np.ndarray - Grid coordinates, expected to be of shape (Nx, Ny, 2). - n_max_levels : int - Limit multi-scale mesh to given number of levels, from bottom up - (default: None (no limit)). - hierarchical : bool - Generate hierarchical mesh graph (default: False). - create_plot : bool - If graphs should be plotted during generation (default: False). - - Returns - ------- - None - - """ - os.makedirs(graph_dir_path, exist_ok=True) - - print(f"Writing graph components to {graph_dir_path}") - - grid_xy = torch.tensor(xy) - pos_max = torch.max(torch.abs(grid_xy)) - - # - # Mesh - # - - # graph geometry - nx = 3 # number of children =nx**2 - nlev = int(np.log(max(xy.shape[:2])) / np.log(nx)) - nleaf = nx**nlev # leaves at the bottom = nleaf**2 - - mesh_levels = nlev - 1 - if n_max_levels: - # Limit the levels in mesh graph - mesh_levels = min(mesh_levels, n_max_levels) - - # print(f"nlev: {nlev}, nleaf: {nleaf}, mesh_levels: {mesh_levels}") - - # multi resolution tree levels - G = [] - for lev in range(1, mesh_levels + 1): - n = int(nleaf / (nx**lev)) - g = mk_2d_graph(xy, n, n) - if create_plot: - plot_graph(from_networkx(g), title=f"Mesh graph, level {lev}") - plt.show() - - G.append(g) - - if hierarchical: - # Relabel nodes of each level with level index first - G = [ - prepend_node_index(graph, level_i) - for level_i, graph in enumerate(G) - ] - - num_nodes_level = np.array([len(g_level.nodes) for g_level in G]) - # First node index in each level in the hierarchical graph - first_index_level = np.concatenate( - (np.zeros(1, dtype=int), np.cumsum(num_nodes_level[:-1])) - ) - - # Create inter-level mesh edges - up_graphs = [] - down_graphs = [] - for from_level, to_level, G_from, G_to, start_index in zip( - range(1, mesh_levels), - range(0, mesh_levels - 1), - G[1:], - G[:-1], - first_index_level[: mesh_levels - 1], - ): - # start out from graph at from level - G_down = G_from.copy() - G_down.clear_edges() - G_down = networkx.DiGraph(G_down) - - # Add nodes of to level - G_down.add_nodes_from(G_to.nodes(data=True)) - - # build kd tree for mesh point pos - # order in vm should be same as in vm_xy - v_to_list = list(G_to.nodes) - v_from_list = list(G_from.nodes) - v_from_xy = np.array([xy for _, xy in G_from.nodes.data("pos")]) - kdt_m = scipy.spatial.KDTree(v_from_xy) - - # add edges from mesh to grid - for v in v_to_list: - # find 1(?) nearest neighbours (index to vm_xy) - neigh_idx = kdt_m.query(G_down.nodes[v]["pos"], 1)[1] - u = v_from_list[neigh_idx] - - # add edge from mesh to grid - G_down.add_edge(u, v) - d = np.sqrt( - np.sum( - (G_down.nodes[u]["pos"] - G_down.nodes[v]["pos"]) ** 2 - ) - ) - G_down.edges[u, v]["len"] = d - G_down.edges[u, v]["vdiff"] = ( - G_down.nodes[u]["pos"] - G_down.nodes[v]["pos"] - ) - - # relabel nodes to integers (sorted) - G_down_int = networkx.convert_node_labels_to_integers( - G_down, first_label=start_index, ordering="sorted" - ) # Issue with sorting here - G_down_int = sort_nodes_internally(G_down_int) - pyg_down = from_networkx_with_start_index(G_down_int, start_index) - - # Create up graph, invert downwards edges - up_edges = torch.stack( - (pyg_down.edge_index[1], pyg_down.edge_index[0]), dim=0 - ) - pyg_up = pyg_down.clone() - pyg_up.edge_index = up_edges - - up_graphs.append(pyg_up) - down_graphs.append(pyg_down) - - if create_plot: - plot_graph( - pyg_down, title=f"Down graph, {from_level} -> {to_level}" - ) - plt.show() - - plot_graph( - pyg_down, title=f"Up graph, {to_level} -> {from_level}" - ) - plt.show() - - # Save up and down edges - save_edges_list(up_graphs, "mesh_up", graph_dir_path) - save_edges_list(down_graphs, "mesh_down", graph_dir_path) - - # Extract intra-level edges for m2m - m2m_graphs = [ - from_networkx_with_start_index( - networkx.convert_node_labels_to_integers( - level_graph, first_label=start_index, ordering="sorted" - ), - start_index, - ) - for level_graph, start_index in zip(G, first_index_level) - ] - - mesh_pos = [graph.pos.to(torch.float32) for graph in m2m_graphs] - - # For use in g2m and m2g - G_bottom_mesh = G[0] - - joint_mesh_graph = networkx.union_all([graph for graph in G]) - all_mesh_nodes = joint_mesh_graph.nodes(data=True) - - else: - # combine all levels to one graph - G_tot = G[0] - for lev in range(1, len(G)): - nodes = list(G[lev - 1].nodes) - n = int(np.sqrt(len(nodes))) - ij = ( - np.array(nodes) - .reshape((n, n, 2))[1::nx, 1::nx, :] - .reshape(int(n / nx) ** 2, 2) - ) - ij = [tuple(x) for x in ij] - G[lev] = networkx.relabel_nodes(G[lev], dict(zip(G[lev].nodes, ij))) - G_tot = networkx.compose(G_tot, G[lev]) - - # Relabel mesh nodes to start with 0 - G_tot = prepend_node_index(G_tot, 0) - - # relabel nodes to integers (sorted) - G_int = networkx.convert_node_labels_to_integers( - G_tot, first_label=0, ordering="sorted" - ) - - # Graph to use in g2m and m2g - G_bottom_mesh = G_tot - all_mesh_nodes = G_tot.nodes(data=True) - - # export the nx graph to PyTorch geometric - pyg_m2m = from_networkx(G_int) - m2m_graphs = [pyg_m2m] - mesh_pos = [pyg_m2m.pos.to(torch.float32)] - - if create_plot: - plot_graph(pyg_m2m, title="Mesh-to-mesh") - plt.show() - - # Save m2m edges - save_edges_list(m2m_graphs, "m2m", graph_dir_path) - - # Divide mesh node pos by max coordinate of grid cell - mesh_pos = [pos / pos_max for pos in mesh_pos] - - # Save mesh positions - torch.save( - mesh_pos, os.path.join(graph_dir_path, "mesh_features.pt") - ) # mesh pos, in float32 - - # - # Grid2Mesh - # - - # radius within which grid nodes are associated with a mesh node - # (in terms of mesh distance) - DM_SCALE = 0.67 - - # mesh nodes on lowest level - vm = G_bottom_mesh.nodes - vm_xy = np.array([xy for _, xy in vm.data("pos")]) - # distance between mesh nodes - dm = np.sqrt( - np.sum((vm.data("pos")[(0, 1, 0)] - vm.data("pos")[(0, 0, 0)]) ** 2) - ) - - # grid nodes - Nx, Ny = xy.shape[:2] - - G_grid = networkx.grid_2d_graph(Ny, Nx) - G_grid.clear_edges() - - # vg features (only pos introduced here) - for node in G_grid.nodes: - # pos is in feature but here explicit for convenience - G_grid.nodes[node]["pos"] = xy[ - node[1], node[0] - ] # xy is already (Nx,Ny,2) - - # add 1000 to node key to separate grid nodes (1000,i,j) from mesh nodes - # (i,j) and impose sorting order such that vm are the first nodes - G_grid = prepend_node_index(G_grid, 1000) - - # build kd tree for grid point pos - # order in vg_list should be same as in vg_xy - vg_list = list(G_grid.nodes) - vg_xy = np.array( - [xy[node[2], node[1]] for node in vg_list] - ) # xy is already (Nx,Ny,2) - kdt_g = scipy.spatial.KDTree(vg_xy) - - # now add (all) mesh nodes, include features (pos) - G_grid.add_nodes_from(all_mesh_nodes) - - # Re-create graph with sorted node indices - # Need to do sorting of nodes this way for indices to map correctly to pyg - G_g2m = networkx.Graph() - G_g2m.add_nodes_from(sorted(G_grid.nodes(data=True))) - - # turn into directed graph - G_g2m = networkx.DiGraph(G_g2m) - - # add edges - for v in vm: - # find neighbours (index to vg_xy) - neigh_idxs = kdt_g.query_ball_point(vm[v]["pos"], dm * DM_SCALE) - for i in neigh_idxs: - u = vg_list[i] - # add edge from grid to mesh - G_g2m.add_edge(u, v) - d = np.sqrt( - np.sum((G_g2m.nodes[u]["pos"] - G_g2m.nodes[v]["pos"]) ** 2) - ) - G_g2m.edges[u, v]["len"] = d - G_g2m.edges[u, v]["vdiff"] = ( - G_g2m.nodes[u]["pos"] - G_g2m.nodes[v]["pos"] - ) - - pyg_g2m = from_networkx(G_g2m) - - if create_plot: - plot_graph(pyg_g2m, title="Grid-to-mesh") - plt.show() - - # - # Mesh2Grid - # - - # start out from Grid2Mesh and then replace edges - G_m2g = G_g2m.copy() - G_m2g.clear_edges() - - # build kd tree for mesh point pos - # order in vm should be same as in vm_xy - vm_list = list(vm) - kdt_m = scipy.spatial.KDTree(vm_xy) - - # add edges from mesh to grid - for v in vg_list: - # find 4 nearest neighbours (index to vm_xy) - neigh_idxs = kdt_m.query(G_m2g.nodes[v]["pos"], 4)[1] - for i in neigh_idxs: - u = vm_list[i] - # add edge from mesh to grid - G_m2g.add_edge(u, v) - d = np.sqrt( - np.sum((G_m2g.nodes[u]["pos"] - G_m2g.nodes[v]["pos"]) ** 2) - ) - G_m2g.edges[u, v]["len"] = d - G_m2g.edges[u, v]["vdiff"] = ( - G_m2g.nodes[u]["pos"] - G_m2g.nodes[v]["pos"] - ) - - # relabel nodes to integers (sorted) - G_m2g_int = networkx.convert_node_labels_to_integers( - G_m2g, first_label=0, ordering="sorted" - ) - pyg_m2g = from_networkx(G_m2g_int) - - if create_plot: - plot_graph(pyg_m2g, title="Mesh-to-grid") - plt.show() - - # Save g2m and m2g everything - # g2m - save_edges(pyg_g2m, "g2m", graph_dir_path) - # m2g - save_edges(pyg_m2g, "m2g", graph_dir_path) - - -def create_graph_from_datastore( - datastore: BaseRegularGridDatastore, - output_root_path: str, - n_max_levels: int = None, - hierarchical: bool = False, - create_plot: bool = False, -): - if isinstance(datastore, BaseRegularGridDatastore): - xy = datastore.get_xy(category="state", stacked=False) - else: - raise NotImplementedError( - "Only graph creation for BaseRegularGridDatastore is supported" - ) - - create_graph( - graph_dir_path=output_root_path, - xy=xy, - n_max_levels=n_max_levels, - hierarchical=hierarchical, - create_plot=create_plot, - ) - - -def cli(input_args=None): - parser = ArgumentParser(description="Graph generation arguments") - parser.add_argument( - "--config_path", - type=str, - help="Path to neural-lam configuration file", - ) - parser.add_argument( - "--name", - type=str, - default="multiscale", - help="Name to save graph as (default: multiscale)", - ) - parser.add_argument( - "--plot", - action="store_true", - help="If graphs should be plotted during generation " - "(default: False)", - ) - parser.add_argument( - "--levels", - type=int, - help="Limit multi-scale mesh to given number of levels, " - "from bottom up (default: None (no limit))", - ) - parser.add_argument( - "--hierarchical", - action="store_true", - help="Generate hierarchical mesh graph (default: False)", - ) - args = parser.parse_args(input_args) - - assert ( - args.config_path is not None - ), "Specify your config with --config_path" - - # Load neural-lam configuration and datastore to use - _, datastore = load_config_and_datastore(config_path=args.config_path) - - create_graph_from_datastore( - datastore=datastore, - output_root_path=os.path.join(datastore.root_path, "graph", args.name), - n_max_levels=args.levels, - hierarchical=args.hierarchical, - create_plot=args.plot, - ) - - -if __name__ == "__main__": - cli() From 05d91f1c0065428ae2e572b6c59cffd61a2c5e1d Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Wed, 13 Nov 2024 14:06:48 +0100 Subject: [PATCH 105/190] Fix some typos and forgot code --- neural_lam/interaction_net.py | 2 +- neural_lam/models/base_graph_model.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/neural_lam/interaction_net.py b/neural_lam/interaction_net.py index 417aae1a..46223b88 100644 --- a/neural_lam/interaction_net.py +++ b/neural_lam/interaction_net.py @@ -30,7 +30,7 @@ def __init__( """ Create a new InteractionNet - edge_index: (2,M), Edges in pyg format, with boeth sender and receiver + edge_index: (2,M), Edges in pyg format, with both sender and receiver node indices starting at 0 input_dim: Dimensionality of input representations, for both nodes and edges diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index de8d87db..d5b39bf7 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -48,7 +48,6 @@ def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): [self.grid_dim] + self.mlp_blueprint_end ) # Optional separate embedder for boundary nodes - print(args.shared_grid_embedder) if args.shared_grid_embedder: assert self.grid_dim == self.boundary_dim, ( "Grid and boundary input dimension must be the same when using " From 3eba43c2f8072e755a39ac9cb8de9a50320bd578 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Wed, 27 Nov 2024 12:23:37 +0100 Subject: [PATCH 106/190] Correct handling of node indices for m2g when using decode_mask --- neural_lam/build_rectangular_graph.py | 5 ++-- neural_lam/utils.py | 38 ++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/neural_lam/build_rectangular_graph.py b/neural_lam/build_rectangular_graph.py index 84585540..7c3151f4 100644 --- a/neural_lam/build_rectangular_graph.py +++ b/neural_lam/build_rectangular_graph.py @@ -93,8 +93,8 @@ def main(input_args=None): create_kwargs = { "coords": coords, "mesh_node_distance": args.mesh_node_distance, - "projection": None, "decode_mask": decode_mask, + "return_components": True, } if args.archetype != "keisler": # Add additional multi-level kwargs @@ -105,8 +105,7 @@ def main(input_args=None): } ) - graph = archetype_create_func(**create_kwargs) - graph_comp = wmg.split_graph_by_edge_attribute(graph, attr="component") + graph_comp = archetype_create_func(**create_kwargs) print("Created graph:") for name, subgraph in graph_comp.items(): diff --git a/neural_lam/utils.py b/neural_lam/utils.py index c0207123..6241c1ca 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -94,6 +94,11 @@ def loads_file(fn): weights_only=True, ) + # Load static node features + mesh_static_features = loads_file( + "m2m_node_features.pt" + ) # List of (N_mesh[l], d_mesh_static) + # Load edges (edge_index) m2m_edge_index = BufferList( [zero_index_edge_index(ei) for ei in loads_file("m2m_edge_index.pt")], @@ -104,7 +109,33 @@ def loads_file(fn): # Change first indices to 0 g2m_edge_index = zero_index_edge_index(g2m_edge_index) - m2g_edge_index = zero_index_edge_index(m2g_edge_index) + # m2g has to be handled specially as not all mesh nodes might be indexed in + # m2g_edge_index + m2g_min_indices = m2g_edge_index.min(dim=1, keepdim=True)[0] + if m2g_min_indices[0] < m2g_min_indices[1]: + # mesh has the first indices + # Number of mesh nodes at level that connects to grid + num_mesh_nodes = mesh_static_features[0].shape[0] + + m2g_edge_index = torch.stack( + ( + m2g_edge_index[0], + m2g_edge_index[1] - num_mesh_nodes, + ), + dim=0, + ) + else: + # grid (interior) has the first indices + # NOTE: Below works, but would be good with a better way to get this + num_interior_nodes = m2g_edge_index[1].max() + 1 + + m2g_edge_index = torch.stack( + ( + m2g_edge_index[0] - num_interior_nodes, + m2g_edge_index[1], + ), + dim=0, + ) n_levels = len(m2m_edge_index) hierarchical = n_levels > 1 # Nor just single level mesh graph @@ -126,11 +157,6 @@ def loads_file(fn): g2m_features = g2m_features / longest_edge m2g_features = m2g_features / longest_edge - # Load static node features - mesh_static_features = loads_file( - "m2m_node_features.pt" - ) # List of (N_mesh[l], d_mesh_static) - # Some checks for consistency assert ( len(m2m_features) == n_levels From f1b73592f09db5e177f57f3f44a188ccaa129250 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Thu, 28 Nov 2024 10:53:17 +0100 Subject: [PATCH 107/190] Linting and bugfixes --- neural_lam/models/ar_model.py | 8 +++++++- neural_lam/utils.py | 6 ++---- neural_lam/vis.py | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index c3870fbc..f8eef057 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -281,7 +281,13 @@ def common_step(self, batch): (B, pred_steps, num_boundary_nodes, d_boundary_forcing), where index 0 corresponds to index 1 of init_states """ - (init_states, target_states, forcing_features, _, batch_times) = batch + ( + init_states, + target_states, + forcing, + boundary_forcing, + batch_times, + ) = batch prediction, pred_std = self.unroll_prediction( init_states, forcing, boundary_forcing diff --git a/neural_lam/utils.py b/neural_lam/utils.py index 6241c1ca..8e43fa40 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -295,13 +295,11 @@ def get_reordered_grid_pos(datastore): """ Interior nodes first, then boundary """ - xy_np = datastore.get_xy() # np, (num_grid, 2) + xy_np = datastore.get_xy() # np, (num_grid, 2) xy_torch = torch.tensor(xy_np, dtype=torch.float32) da_boundary_mask = datastore.boundary_mask - boundary_mask = torch.tensor( - da_boundary_mask.values, dtype=torch.bool - ) + boundary_mask = torch.tensor(da_boundary_mask.values, dtype=torch.bool) interior_mask = torch.logical_not(boundary_mask) return torch.cat( diff --git a/neural_lam/vis.py b/neural_lam/vis.py index 7e7bbf42..10b84fb7 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -130,7 +130,7 @@ def plot_prediction( # Plot pred and target for ax, da in zip(axes, (da_target, da_prediction)): - im = plot_on_axis( + plot_on_axis( ax, da, datastore, From fa6c9e3071627769112ed2fb3f872e4f019ea62f Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Mon, 2 Dec 2024 14:31:01 +0100 Subject: [PATCH 108/190] Make graph creation and plotting work with datastores --- neural_lam/build_rectangular_graph.py | 47 ++++++++++++++-------- neural_lam/datastore/mdp.py | 2 +- neural_lam/datastore/npyfilesmeps/store.py | 2 +- neural_lam/models/base_graph_model.py | 2 +- neural_lam/plot_graph.py | 41 ++++++++++--------- neural_lam/utils.py | 2 +- 6 files changed, 57 insertions(+), 39 deletions(-) diff --git a/neural_lam/build_rectangular_graph.py b/neural_lam/build_rectangular_graph.py index 7c3151f4..e4570397 100644 --- a/neural_lam/build_rectangular_graph.py +++ b/neural_lam/build_rectangular_graph.py @@ -7,7 +7,8 @@ import weather_model_graphs as wmg # Local -from . import config, utils +from . import utils +from .config import load_config_and_datastore WMG_ARCHETYPES = { "keisler": wmg.create.archetype.create_keisler_graph, @@ -24,10 +25,14 @@ def main(input_args=None): # Inputs and outputs parser.add_argument( - "--data_config", + "--config_path", type=str, - default="neural_lam/data_config.yaml", - help="Path to data config file", + help="Path to the configuration for neural-lam", + ) + parser.add_argument( + "--name", + type=str, + help="Name to save graph as (default: multiscale)", ) parser.add_argument( "--output_dir", @@ -65,21 +70,28 @@ def main(input_args=None): ) args = parser.parse_args(input_args) - # Load grid positions - config_loader = config.Config.from_file(args.data_config) + assert ( + args.config_path is not None + ), "Specify your config with --config_path" + assert ( + args.name is not None + ), "Specify the name to save graph as with --name" + _, datastore = load_config_and_datastore(config_path=args.config_path) + + # Load grid positions # TODO Do not get normalised positions - coords = utils.get_reordered_grid_pos(config_loader.dataset.name).numpy() + coords = utils.get_reordered_grid_pos(datastore).numpy() # (num_nodes_full, 2) # Construct mask - static_data = utils.load_static_data(config_loader.dataset.name) + num_full_grid = coords.shape[0] + num_boundary = datastore.boundary_mask.to_numpy().sum() + num_interior = num_full_grid - num_boundary decode_mask = np.concatenate( ( - np.ones(static_data["grid_static_features"].shape[0], dtype=bool), - np.zeros( - static_data["boundary_static_features"].shape[0], dtype=bool - ), + np.ones(num_interior, dtype=bool), + np.zeros(num_boundary, dtype=bool), ), axis=0, ) @@ -112,7 +124,8 @@ def main(input_args=None): print(f"{name}: {subgraph}") # Save graph - os.makedirs(args.output_dir, exist_ok=True) + graph_dir_path = os.path.join(datastore.root_path, "graphs", args.name) + os.makedirs(graph_dir_path, exist_ok=True) for component, graph in graph_comp.items(): # This seems like a bit of a hack, maybe better if saving in wmg # was made consistent with nl @@ -130,7 +143,7 @@ def main(input_args=None): name="m2m", list_from_attribute="level", edge_features=["len", "vdiff"], - output_directory=args.output_dir, + output_directory=graph_dir_path, ) else: # up and down directions @@ -139,7 +152,7 @@ def main(input_args=None): name=f"mesh_{direction}", list_from_attribute="levels", edge_features=["len", "vdiff"], - output_directory=args.output_dir, + output_directory=graph_dir_path, ) else: wmg.save.to_pyg( @@ -147,14 +160,14 @@ def main(input_args=None): name=component, list_from_attribute="dummy", # Note: Needed to output list edge_features=["len", "vdiff"], - output_directory=args.output_dir, + output_directory=graph_dir_path, ) else: wmg.save.to_pyg( graph=graph, name=component, edge_features=["len", "vdiff"], - output_directory=args.output_dir, + output_directory=graph_dir_path, ) diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 809bbdb8..0ed92129 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -394,7 +394,7 @@ def grid_shape_state(self): assert da_x.ndim == da_y.ndim == 1 return CartesianGridShape(x=da_x.size, y=da_y.size) - def get_xy(self, category: str, stacked: bool) -> ndarray: + def get_xy(self, category: str, stacked: bool = True) -> ndarray: """Return the x, y coordinates of the dataset. Parameters diff --git a/neural_lam/datastore/npyfilesmeps/store.py b/neural_lam/datastore/npyfilesmeps/store.py index 24349e7e..dfb0b9c9 100644 --- a/neural_lam/datastore/npyfilesmeps/store.py +++ b/neural_lam/datastore/npyfilesmeps/store.py @@ -614,7 +614,7 @@ def get_vars_long_names(self, category: str) -> List[str]: def get_num_data_vars(self, category: str) -> int: return len(self.get_vars_names(category=category)) - def get_xy(self, category: str, stacked: bool) -> np.ndarray: + def get_xy(self, category: str, stacked: bool = True) -> np.ndarray: """Return the x, y coordinates of the dataset. Parameters diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index d5b39bf7..1feec63d 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -19,7 +19,7 @@ def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): super().__init__(args, config=config, datastore=datastore) # Load graph with static features - graph_dir_path = datastore.root_path / "graph" / args.graph + graph_dir_path = datastore.root_path / "graphs" / args.graph self.hierarchical, graph_ldict = utils.load_graph( graph_dir_path=graph_dir_path ) diff --git a/neural_lam/plot_graph.py b/neural_lam/plot_graph.py index f621d201..9d04f3e3 100644 --- a/neural_lam/plot_graph.py +++ b/neural_lam/plot_graph.py @@ -11,25 +11,20 @@ from . import utils from .config import load_config_and_datastore -MESH_HEIGHT = 0.1 -MESH_LEVEL_DIST = 0.2 -GRID_HEIGHT = 0 - def main(): """Plot graph structure in 3D using plotly.""" parser = ArgumentParser(description="Plot graph") parser.add_argument( - "--datastore_config_path", + "--config_path", type=str, - default="tests/datastore_examples/mdp/config.yaml", - help="Path for the datastore config", + help="Path to the configuration for neural-lam", ) parser.add_argument( - "--graph", + "--name", type=str, default="multiscale", - help="Graph to plot (default: multiscale)", + help="Name of saved graph to plot (default: multiscale)", ) parser.add_argument( "--save", @@ -43,12 +38,15 @@ def main(): ) args = parser.parse_args() - _, datastore = load_config_and_datastore( - config_path=args.datastore_config_path - ) + + assert ( + args.config_path is not None + ), "Specify your config with --config_path" + + _, datastore = load_config_and_datastore(config_path=args.config_path) # Load graph data - graph_dir_path = os.path.join(datastore.root_path, "graph", args.graph) + graph_dir_path = os.path.join(datastore.root_path, "graphs", args.name) hierarchical, graph_ldict = utils.load_graph(graph_dir_path=graph_dir_path) (g2m_edge_index, m2g_edge_index, m2m_edge_index,) = ( graph_ldict["g2m_edge_index"], @@ -63,12 +61,18 @@ def main(): # Extract values needed, turn to numpy grid_pos = utils.get_reordered_grid_pos(datastore).numpy() - # Add in z-dimension - z_grid = GRID_HEIGHT * np.ones((grid_pos.shape[0],)) + grid_scale = np.ptp(grid_pos) + + # Add in z-dimension for grid + z_grid = np.zeros((grid_pos.shape[0],)) # Grid sits at z=0 grid_pos = np.concatenate( (grid_pos, np.expand_dims(z_grid, axis=1)), axis=1 ) + # Compute z-coordinate height of mesh nodes + mesh_base_height = 0.05 * grid_scale + mesh_level_height_diff = 0.1 * grid_scale + # List of edges to plot, (edge_index, from_pos, to_pos, color, # line_width, label) edge_plot_list = [] @@ -79,8 +83,8 @@ def main(): np.concatenate( ( level_static_features.numpy(), - MESH_HEIGHT - + MESH_LEVEL_DIST + mesh_base_height + + mesh_level_height_diff * height_level * np.ones((level_static_features.shape[0], 1)), ), @@ -170,7 +174,8 @@ def main(): mesh_pos = mesh_static_features.numpy() mesh_degrees = pyg.utils.degree(m2m_edge_index[1]).numpy() - z_mesh = MESH_HEIGHT + 0.01 * mesh_degrees + # 1% higher per neighbor + z_mesh = (1 + 0.01 * mesh_degrees) * mesh_base_height mesh_node_size = mesh_degrees / 2 mesh_pos = np.concatenate( diff --git a/neural_lam/utils.py b/neural_lam/utils.py index 8e43fa40..6f910cee 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -295,7 +295,7 @@ def get_reordered_grid_pos(datastore): """ Interior nodes first, then boundary """ - xy_np = datastore.get_xy() # np, (num_grid, 2) + xy_np = datastore.get_xy("state") # np, (num_grid, 2) xy_torch = torch.tensor(xy_np, dtype=torch.float32) da_boundary_mask = datastore.boundary_mask From 4d853843e61dc64614a7415aa72c8f0ecbc63441 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Mon, 2 Dec 2024 16:25:01 +0100 Subject: [PATCH 109/190] Fix graph loading and boundary mask --- neural_lam/build_rectangular_graph.py | 10 ++++++---- neural_lam/models/ar_model.py | 4 ++-- neural_lam/models/base_graph_model.py | 2 +- neural_lam/plot_graph.py | 6 ++++-- neural_lam/train_model.py | 2 +- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/neural_lam/build_rectangular_graph.py b/neural_lam/build_rectangular_graph.py index e4570397..df7f8ba8 100644 --- a/neural_lam/build_rectangular_graph.py +++ b/neural_lam/build_rectangular_graph.py @@ -30,7 +30,7 @@ def main(input_args=None): help="Path to the configuration for neural-lam", ) parser.add_argument( - "--name", + "--graph_name", type=str, help="Name to save graph as (default: multiscale)", ) @@ -74,8 +74,8 @@ def main(input_args=None): args.config_path is not None ), "Specify your config with --config_path" assert ( - args.name is not None - ), "Specify the name to save graph as with --name" + args.graph_name is not None + ), "Specify the name to save graph as with --graph_name" _, datastore = load_config_and_datastore(config_path=args.config_path) @@ -124,7 +124,9 @@ def main(input_args=None): print(f"{name}: {subgraph}") # Save graph - graph_dir_path = os.path.join(datastore.root_path, "graphs", args.name) + graph_dir_path = os.path.join( + datastore.root_path, "graphs", args.graph_name + ) os.makedirs(graph_dir_path, exist_ok=True) for component, graph in graph_comp.items(): # This seems like a bit of a hack, maybe better if saving in wmg diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index f8eef057..1a24136f 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -69,12 +69,12 @@ def __init__( static_features_torch = torch.tensor(arr_static, dtype=torch.float32) self.register_buffer( "grid_static_features", - static_features_torch[self.boundary_mask.to(torch.bool)], + static_features_torch[self.boundary_mask[:, 0].to(torch.bool)], persistent=False, ) self.register_buffer( "boundary_static_features", - static_features_torch[self.interior_mask.to(torch.bool)], + static_features_torch[self.interior_mask[:, 0].to(torch.bool)], persistent=False, ) diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index 1feec63d..52f2d7a3 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -19,7 +19,7 @@ def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): super().__init__(args, config=config, datastore=datastore) # Load graph with static features - graph_dir_path = datastore.root_path / "graphs" / args.graph + graph_dir_path = datastore.root_path / "graphs" / args.graph_name self.hierarchical, graph_ldict = utils.load_graph( graph_dir_path=graph_dir_path ) diff --git a/neural_lam/plot_graph.py b/neural_lam/plot_graph.py index 9d04f3e3..11bd795a 100644 --- a/neural_lam/plot_graph.py +++ b/neural_lam/plot_graph.py @@ -21,7 +21,7 @@ def main(): help="Path to the configuration for neural-lam", ) parser.add_argument( - "--name", + "--graph_name", type=str, default="multiscale", help="Name of saved graph to plot (default: multiscale)", @@ -46,7 +46,9 @@ def main(): _, datastore = load_config_and_datastore(config_path=args.config_path) # Load graph data - graph_dir_path = os.path.join(datastore.root_path, "graphs", args.name) + graph_dir_path = os.path.join( + datastore.root_path, "graphs", args.graph_name + ) hierarchical, graph_ldict = utils.load_graph(graph_dir_path=graph_dir_path) (g2m_edge_index, m2g_edge_index, m2m_edge_index,) = ( graph_ldict["g2m_edge_index"], diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index 7e0b47c6..3c2dbece 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -78,7 +78,7 @@ def main(input_args=None): # Model architecture parser.add_argument( - "--graph", + "--graph_name", type=str, default="multiscale", help="Graph to load and use in graph-based model " From 9edfec37af343be4675de402a1b7d11f7731ddd7 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Mon, 2 Dec 2024 16:33:41 +0100 Subject: [PATCH 110/190] Fix boundary masking bug for static features --- neural_lam/models/ar_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 1a24136f..ceadb856 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -69,12 +69,12 @@ def __init__( static_features_torch = torch.tensor(arr_static, dtype=torch.float32) self.register_buffer( "grid_static_features", - static_features_torch[self.boundary_mask[:, 0].to(torch.bool)], + static_features_torch[self.interior_mask[:, 0].to(torch.bool)], persistent=False, ) self.register_buffer( "boundary_static_features", - static_features_torch[self.interior_mask[:, 0].to(torch.bool)], + static_features_torch[self.boundary_mask[:, 0].to(torch.bool)], persistent=False, ) From 6e1c53ca70678c559d4a37324221105beb799cea Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Tue, 3 Dec 2024 11:35:16 +0100 Subject: [PATCH 111/190] Add flag making boundary forcing optional in models --- neural_lam/models/ar_model.py | 27 +++++++---- neural_lam/models/base_graph_model.py | 66 ++++++++++++++++----------- 2 files changed, 59 insertions(+), 34 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index ceadb856..ef766113 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -48,6 +48,10 @@ def __init__( num_past_forcing_steps = args.num_past_forcing_steps num_future_forcing_steps = args.num_future_forcing_steps + # TODO: Set based on existing of boundary forcing datastore + # TODO: Adjust what is stored here based on self.boundary_forced + self.boundary_forced = False + # Set up boundary mask boundary_mask = torch.tensor( da_boundary_mask.values, dtype=torch.float32 @@ -125,12 +129,6 @@ def __init__( self.num_grid_nodes, grid_static_dim, ) = self.grid_static_features.shape - - ( - self.num_boundary_nodes, - boundary_static_dim, # TODO Will need for computation below - ) = self.boundary_static_features.shape - self.num_input_nodes = self.num_grid_nodes + self.num_boundary_nodes self.grid_dim = ( 2 * self.grid_output_dim + grid_static_dim @@ -139,7 +137,16 @@ def __init__( * num_forcing_vars * (num_past_forcing_steps + num_future_forcing_steps + 1) ) - self.boundary_dim = self.grid_dim # TODO Compute separately + if self.boundary_forced: + self.boundary_dim = self.grid_dim # TODO Compute separately + ( + self.num_boundary_nodes, + boundary_static_dim, # TODO Will need for computation below + ) = self.boundary_static_features.shape + self.num_input_nodes = self.num_grid_nodes + self.num_boundary_nodes + else: + # Only interior grid nodes + self.num_input_nodes = self.num_grid_nodes # Instantiate loss function self.loss = metrics.get_metric(args.loss) @@ -241,7 +248,11 @@ def unroll_prediction(self, init_states, forcing, boundary_forcing): for i in range(pred_steps): forcing_step = forcing[:, i] - boundary_forcing_step = boundary_forcing[:, i] + + if self.boundary_forced: + boundary_forcing_step = boundary_forcing[:, i] + else: + boundary_forcing_step = None pred_state, pred_std = self.predict_step( prev_state, prev_prev_state, forcing_step, boundary_forcing_step diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index 52f2d7a3..61c1a681 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -47,18 +47,22 @@ def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): self.grid_embedder = utils.make_mlp( [self.grid_dim] + self.mlp_blueprint_end ) - # Optional separate embedder for boundary nodes - if args.shared_grid_embedder: - assert self.grid_dim == self.boundary_dim, ( - "Grid and boundary input dimension must be the same when using " - f"the same embedder, got grid_dim={self.grid_dim}, " - f"boundary_dim={self.boundary_dim}" - ) - self.boundary_embedder = self.grid_embedder - else: - self.boundary_embedder = utils.make_mlp( - [self.boundary_dim] + self.mlp_blueprint_end - ) + + if self.boundary_forced: + # Define embedder for boundary nodes + # Optional separate embedder for boundary nodes + if args.shared_grid_embedder: + assert self.grid_dim == self.boundary_dim, ( + "Grid and boundary input dimension must " + "be the same when using " + f"the same embedder, got grid_dim={self.grid_dim}, " + f"boundary_dim={self.boundary_dim}" + ) + self.boundary_embedder = self.grid_embedder + else: + self.boundary_embedder = utils.make_mlp( + [self.boundary_dim] + self.mlp_blueprint_end + ) self.g2m_embedder = utils.make_mlp([g2m_dim] + self.mlp_blueprint_end) self.m2g_embedder = utils.make_mlp([m2g_dim] + self.mlp_blueprint_end) @@ -136,27 +140,37 @@ def predict_step( ), dim=-1, ) - # Create full boundary node features of shape - # (B, num_boundary_nodes, boundary_dim) - boundary_features = torch.cat( - ( - boundary_forcing, - self.expand_to_batch(self.boundary_static_features, batch_size), - ), - dim=-1, - ) + + if self.boundary_forced: + # Create full boundary node features of shape + # (B, num_boundary_nodes, boundary_dim) + boundary_features = torch.cat( + ( + boundary_forcing, + self.expand_to_batch( + self.boundary_static_features, batch_size + ), + ), + dim=-1, + ) + + # Embed boundary features + boundary_emb = self.boundary_embedder(boundary_features) + # (B, num_boundary_nodes, d_h) # Embed all features grid_emb = self.grid_embedder(grid_features) # (B, num_grid_nodes, d_h) - boundary_emb = self.boundary_embedder(boundary_features) - # (B, num_boundary_nodes, d_h) g2m_emb = self.g2m_embedder(self.g2m_features) # (M_g2m, d_h) m2g_emb = self.m2g_embedder(self.m2g_features) # (M_m2g, d_h) mesh_emb = self.embedd_mesh_nodes() - # Merge interior and boundary emb into input embedding - # We enforce ordering (interior, boundary) of nodes - input_emb = torch.cat((grid_emb, boundary_emb), dim=1) + if self.boundary_forced: + # Merge interior and boundary emb into input embedding + # We enforce ordering (interior, boundary) of nodes + input_emb = torch.cat((grid_emb, boundary_emb), dim=1) + else: + # Only maps from interior to mesh + input_emb = grid_emb # Map from grid to mesh mesh_emb_expanded = self.expand_to_batch( From 4bcaa4b48c9a70a753599423b72dc5cd889cbd52 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Tue, 3 Dec 2024 11:58:34 +0100 Subject: [PATCH 112/190] Linting --- neural_lam/weather_dataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index ed67b6f7..f02cfbd4 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -234,7 +234,6 @@ def __init__( self.da_boundary_mean = self.ds_boundary_stats.forcing_mean self.da_boundary_std = self.ds_boundary_stats.forcing_std - def __len__(self): if self.datastore.is_forecast: # for now we simply create a single sample for each analysis time From 16d5d04bbd9e49a1fe53e56ff95e22d565326c67 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 3 Dec 2024 12:23:15 +0100 Subject: [PATCH 113/190] Fixed issue with temporal encoding dimensions + some more comments --- neural_lam/weather_dataset.py | 38 +++++++++++++++++++++++------------ tests/test_datasets.py | 19 +++++++----------- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index f02cfbd4..93988ed7 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -287,6 +287,11 @@ def _get_time_step(self, times): ---------- times : xr.DataArray The time dataarray to calculate the time step from. + + Returns + ------- + time_step : float + The time step in the the format of the times dataarray. """ time_diffs = np.diff(times) if not np.all(time_diffs == time_diffs[0]): @@ -368,6 +373,7 @@ def _slice_time( {"elapsed_forecast_duration": "time"} ) # Asserting that the forecast time step is consistent + # In init this was only done for the analysis_time self._get_time_step(da_state_sliced.time) else: @@ -382,7 +388,8 @@ def _slice_time( return da_state_sliced, None # Get the state times and its temporal resolution for matching with - # forcing data + # forcing data. No need to self._get_time_step as we have already + # checked the time step consistency in the state data. state_times = da_state_sliced["time"] state_time_step = state_times.values[1] - state_times.values[0] @@ -440,12 +447,14 @@ def _slice_time( da_forcing_boundary_matched.time.values[1] - da_forcing_boundary_matched.time.values[0] ) + # Since all time, grid_index and forcing_features share the same + # temporal_embedding we can just use the first one da_forcing_boundary_matched["window"] = da_forcing_boundary_matched[ "window" ] * (forcing_time_step / state_time_step) time_diff_steps = da_forcing_boundary_matched.isel( - grid_index=0, forcing_feature=0 - ).data + grid_index=0, forcing_feature=0, time=0 + ).window.values else: # For analysis data, we slice the time dimension directly. The @@ -462,15 +471,16 @@ def _slice_time( ) / state_time_step idx_min = np.abs(time_deltas).argmin(axis=0) - time_diff_steps = np.stack( - [ - time_deltas[ - idx_i - num_past_steps : idx_i + num_future_steps + 1, - init_steps + step_i, - ] - for (step_i, idx_i) in enumerate(idx_min[init_steps:]) - ], - ) + # Get the time differences for windowed time steps - they are + # used as temporal embeddings and concatenated to the forcing + # features later. All features share the same temporal embedding + time_diff_steps = time_deltas[ + idx_min[init_steps] + - num_past_steps : idx_min[init_steps] + + num_future_steps + + 1, + init_steps, + ] # Create window dimension for forcing data to stack later window_size = num_past_steps + num_future_steps + 1 @@ -484,7 +494,7 @@ def _slice_time( # Add time difference as a new coordinate to concatenate to the # forcing features later as temporal embedding da_forcing_boundary_matched["time_diff_steps"] = ( - ("time", "window"), + ("window"), time_diff_steps, ) @@ -519,6 +529,8 @@ def _process_windowed_data(self, da_windowed, da_state, da_target_times): da_windowed = da_windowed.stack( {stacked_dim: ("forcing_feature", "window")} ) + # All data variables share the same temporal embedding, hence + # only the first one is used da_windowed = xr.concat( [da_windowed, da_windowed.time_diff_steps], dim="forcing_feature_windowed", diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 063ec147..aa7b645d 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -82,24 +82,19 @@ def test_dataset_item_shapes(datastore_name, datastore_boundary_name): assert forcing.ndim == 3 assert forcing.shape[0] == N_pred_steps assert forcing.shape[1] == N_gridpoints - # each stacked forcing feature has one corresponding temporal embedding - assert ( - forcing.shape[2] - == datastore.get_num_data_vars("forcing") - * (num_past_forcing_steps + num_future_forcing_steps + 1) - * 2 + # each time step in the window has one corresponding temporal embedding + # that is shared across all grid points, times and variables + assert forcing.shape[2] == (datastore.get_num_data_vars("forcing") + 1) * ( + num_past_forcing_steps + num_future_forcing_steps + 1 ) # boundary assert boundary.ndim == 3 assert boundary.shape[0] == N_pred_steps assert boundary.shape[1] == N_gridpoints_boundary - assert ( - boundary.shape[2] - == datastore_boundary.get_num_data_vars("forcing") - * (num_past_boundary_steps + num_future_boundary_steps + 1) - * 2 - ) + assert boundary.shape[2] == ( + datastore_boundary.get_num_data_vars("forcing") + 1 + ) * (num_past_boundary_steps + num_future_boundary_steps + 1) # batch times assert target_times.ndim == 1 From f1f3f73e8269ffe20bc7acb771037fa8b9410d4f Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 3 Dec 2024 12:23:23 +0100 Subject: [PATCH 114/190] format docstrings --- neural_lam/models/ar_model.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 331966e4..6074a024 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -193,18 +193,18 @@ def expand_to_batch(x, batch_size): def predict_step(self, prev_state, prev_prev_state, forcing): """ Step state one step ahead using prediction model, X_{t-1}, X_t -> X_t+1 - prev_state: (B, num_grid_nodes, feature_dim), X_t prev_prev_state: (B, - num_grid_nodes, feature_dim), X_{t-1} forcing: (B, num_grid_nodes, - forcing_dim) + prev_state: (B, num_grid_nodes, feature_dim), X_t + prev_prev_state: (B, num_grid_nodes, feature_dim), X_{t-1} + forcing: (B, num_grid_nodes, forcing_dim) """ raise NotImplementedError("No prediction step implemented") def unroll_prediction(self, init_states, forcing_features, true_states): """ Roll out prediction taking multiple autoregressive steps with model - init_states: (B, 2, num_grid_nodes, d_f) forcing_features: (B, - pred_steps, num_grid_nodes, d_static_f) true_states: (B, pred_steps, - num_grid_nodes, d_f) + init_states: (B, 2, num_grid_nodes, d_f) + forcing_features: (B, pred_steps, num_grid_nodes, d_static_f) + true_states: (B, pred_steps, num_grid_nodes, d_f) """ prev_prev_state = init_states[:, 0] prev_state = init_states[:, 1] From 8fd7a10fc1d01c998641baf228dfa66f8630280c Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 3 Dec 2024 12:23:33 +0100 Subject: [PATCH 115/190] introduced time slicing test for forecast type data --- tests/test_time_slicing.py | 186 +++++++++++++++++++++++++++++++------ 1 file changed, 158 insertions(+), 28 deletions(-) diff --git a/tests/test_time_slicing.py b/tests/test_time_slicing.py index 4a59c81e..57e468db 100644 --- a/tests/test_time_slicing.py +++ b/tests/test_time_slicing.py @@ -16,40 +16,76 @@ class SinglePointDummyDatastore(BaseDatastore): root_path = None def __init__(self, time_values, state_data, forcing_data, is_forecast): - self._time_values = np.array(time_values) - self._state_data = np.array(state_data) - self._forcing_data = np.array(forcing_data) self.is_forecast = is_forecast - if is_forecast: - assert self._state_data.ndim == 2 + self._analysis_times, self._forecast_times = time_values + self._state_data = np.array(state_data) + self._forcing_data = np.array(forcing_data) + # state_data and forcing_data should be 2D arrays with shape + # (n_analysis_times, n_forecast_times) else: - assert self._state_data.ndim == 1 + self._time_values = np.array(time_values) + self._state_data = np.array(state_data) + self._forcing_data = np.array(forcing_data) + + if is_forecast: + assert self._state_data.ndim == 2 + else: + assert self._state_data.ndim == 1 def get_num_data_vars(self, category): return 1 def get_dataarray(self, category, split): - if category == "state": - values = self._state_data - elif category == "forcing": - values = self._forcing_data - else: - raise NotImplementedError(category) - if self.is_forecast: - raise NotImplementedError() + if category == "state": + # Create DataArray with dims ('analysis_time', + # 'elapsed_forecast_duration') + da = xr.DataArray( + self._state_data, + dims=["analysis_time", "elapsed_forecast_duration"], + coords={ + "analysis_time": self._analysis_times, + "elapsed_forecast_duration": self._forecast_times, + }, + ) + elif category == "forcing": + da = xr.DataArray( + self._forcing_data, + dims=["analysis_time", "elapsed_forecast_duration"], + coords={ + "analysis_time": self._analysis_times, + "elapsed_forecast_duration": self._forecast_times, + }, + ) + else: + raise NotImplementedError(category) + # Add 'grid_index' and '{category}_feature' dimensions + da = da.expand_dims("grid_index") + da = da.expand_dims(f"{category}_feature") + dim_order = self.expected_dim_order(category=category) + return da.transpose(*dim_order) else: - da = xr.DataArray( - values, dims=["time"], coords={"time": self._time_values} - ) - # add `{category}_feature` and `grid_index` dimensions + if category == "state": + values = self._state_data + elif category == "forcing": + values = self._forcing_data + else: + raise NotImplementedError(category) + + if self.is_forecast: + raise NotImplementedError() + else: + da = xr.DataArray( + values, dims=["time"], coords={"time": self._time_values} + ) + # add `{category}_feature` and `grid_index` dimensions - da = da.expand_dims("grid_index") - da = da.expand_dims(f"{category}_feature") + da = da.expand_dims("grid_index") + da = da.expand_dims(f"{category}_feature") - dim_order = self.expected_dim_order(category=category) - return da.transpose(*dim_order) + dim_order = self.expected_dim_order(category=category) + return da.transpose(*dim_order) def get_standardization_dataarray(self, category): raise NotImplementedError() @@ -70,6 +106,32 @@ def get_vars_long_names(self, category): ANALYSIS_STATE_VALUES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] FORCING_VALUES = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +# Constants for forecast data +FORECAST_ANALYSIS_TIMES = np.datetime64("2020-01-01") + np.arange(3) +FORECAST_FORECAST_TIMES = np.timedelta64(0, "D") + np.arange(7) + +FORECAST_STATE_VALUES = np.array( + [ + # Analysis time 0 + [0, 1, 2, 3, 4, 5, 6], + # Analysis time 1 + [10, 11, 12, 13, 14, 15, 16], + # Analysis time 2 + [20, 21, 22, 23, 24, 25, 26], + ] +) + +FORECAST_FORCING_VALUES = np.array( + [ + # Analysis time 0 + [100, 101, 102, 103, 104, 105, 106], + # Analysis time 1 + [110, 111, 112, 113, 114, 115, 116], + # Analysis time 2 + [120, 121, 122, 123, 124, 125, 126], + ] +) + @pytest.mark.parametrize( "ar_steps,num_past_forcing_steps,num_future_forcing_steps", @@ -79,9 +141,7 @@ def test_time_slicing_analysis( ar_steps, num_past_forcing_steps, num_future_forcing_steps ): # state and forcing variables have only one dimension, `time` - time_values = np.datetime64("2020-01-01") + np.arange( - len(ANALYSIS_STATE_VALUES) - ) + time_values = np.datetime64("2020-01-01") + np.arange(len(ANALYSIS_STATE_VALUES)) assert len(ANALYSIS_STATE_VALUES) == len(FORCING_VALUES) == len(time_values) datastore = SinglePointDummyDatastore( @@ -102,9 +162,7 @@ def test_time_slicing_analysis( sample = dataset[0] - init_states, target_states, forcing, _, _ = [ - tensor.numpy() for tensor in sample - ] + init_states, target_states, forcing, _, _ = [tensor.numpy() for tensor in sample] expected_init_states = [0, 1] if ar_steps == 3: @@ -149,3 +207,75 @@ def test_time_slicing_analysis( forcing[:, 0, : num_past_forcing_steps + num_future_forcing_steps + 1], np.array(expected_forcing_values), ) + + +@pytest.mark.parametrize( + "ar_steps,num_past_forcing_steps,num_future_forcing_steps", + [ + [3, 0, 0], + [3, 1, 0], + [3, 2, 0], + [3, 0, 1], + [3, 0, 2], + ], +) +def test_time_slicing_forecast( + ar_steps, num_past_forcing_steps, num_future_forcing_steps +): + # Create a dummy datastore with forecast data + time_values = (FORECAST_ANALYSIS_TIMES, FORECAST_FORECAST_TIMES) + datastore = SinglePointDummyDatastore( + state_data=FORECAST_STATE_VALUES, + forcing_data=FORECAST_FORCING_VALUES, + time_values=time_values, + is_forecast=True, + ) + + dataset = WeatherDataset( + datastore=datastore, + datastore_boundary=None, + split="train", + ar_steps=ar_steps, + num_past_forcing_steps=num_past_forcing_steps, + num_future_forcing_steps=num_future_forcing_steps, + standardize=False, + ) + + # Test the dataset length + assert len(dataset) == len(FORECAST_ANALYSIS_TIMES) + + sample = dataset[0] + + init_states, target_states, forcing, _, _ = [tensor.numpy() for tensor in sample] + + # Expected initial states and target states + expected_init_states = FORECAST_STATE_VALUES[0][:2] + expected_target_states = FORECAST_STATE_VALUES[0][2 : 2 + ar_steps] + + # Expected forcing values + total_forcing_window = num_past_forcing_steps + num_future_forcing_steps + 1 + expected_forcing_values = [] + for i in range(ar_steps): + start_idx = max(0, i + 2 - num_past_forcing_steps) + end_idx = i + 2 + num_future_forcing_steps + 1 + forcing_window = FORECAST_FORCING_VALUES[0][start_idx:end_idx] + expected_forcing_values.append(forcing_window) + + # Assertions + np.testing.assert_array_equal(init_states[:, 0, 0], expected_init_states) + np.testing.assert_array_equal(target_states[:, 0, 0], expected_target_states) + + # Verify the shape of the forcing data + expected_forcing_shape = ( + ar_steps, + 1, + total_forcing_window * 2, # Each windowed feature includes temporal embedding + ) + assert forcing.shape == expected_forcing_shape + + # Extract the forcing values from the tensor (excluding temporal embeddings) + forcing_values = forcing[:, 0, :total_forcing_window] + + # Compare with expected forcing values + for i in range(ar_steps): + np.testing.assert_array_equal(forcing_values[i], expected_forcing_values[i]) From 252a33cd5903aca793f480e871912cc8b8616df2 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 3 Dec 2024 13:05:05 +0100 Subject: [PATCH 116/190] bugfix temporal embedding dimension --- neural_lam/models/ar_model.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 6074a024..81d5a623 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -110,9 +110,8 @@ def __init__( self.grid_dim = ( 2 * self.grid_output_dim + grid_static_dim - # Factor 2 because of temporal embedding or windowed features - + 2 - * num_forcing_vars + # Temporal Embedding counts as one additional forcing_feature + + (num_forcing_vars + 1) * (num_past_forcing_steps + num_future_forcing_steps + 1) ) From 8a9114a3629d0840b6c5d4fb9b18f080f61e751d Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 3 Dec 2024 13:05:08 +0100 Subject: [PATCH 117/190] linting --- tests/test_time_slicing.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/test_time_slicing.py b/tests/test_time_slicing.py index 57e468db..48860161 100644 --- a/tests/test_time_slicing.py +++ b/tests/test_time_slicing.py @@ -141,7 +141,9 @@ def test_time_slicing_analysis( ar_steps, num_past_forcing_steps, num_future_forcing_steps ): # state and forcing variables have only one dimension, `time` - time_values = np.datetime64("2020-01-01") + np.arange(len(ANALYSIS_STATE_VALUES)) + time_values = np.datetime64("2020-01-01") + np.arange( + len(ANALYSIS_STATE_VALUES) + ) assert len(ANALYSIS_STATE_VALUES) == len(FORCING_VALUES) == len(time_values) datastore = SinglePointDummyDatastore( @@ -162,7 +164,9 @@ def test_time_slicing_analysis( sample = dataset[0] - init_states, target_states, forcing, _, _ = [tensor.numpy() for tensor in sample] + init_states, target_states, forcing, _, _ = [ + tensor.numpy() for tensor in sample + ] expected_init_states = [0, 1] if ar_steps == 3: @@ -246,7 +250,9 @@ def test_time_slicing_forecast( sample = dataset[0] - init_states, target_states, forcing, _, _ = [tensor.numpy() for tensor in sample] + init_states, target_states, forcing, _, _ = [ + tensor.numpy() for tensor in sample + ] # Expected initial states and target states expected_init_states = FORECAST_STATE_VALUES[0][:2] @@ -263,13 +269,16 @@ def test_time_slicing_forecast( # Assertions np.testing.assert_array_equal(init_states[:, 0, 0], expected_init_states) - np.testing.assert_array_equal(target_states[:, 0, 0], expected_target_states) + np.testing.assert_array_equal( + target_states[:, 0, 0], expected_target_states + ) # Verify the shape of the forcing data expected_forcing_shape = ( ar_steps, 1, - total_forcing_window * 2, # Each windowed feature includes temporal embedding + total_forcing_window + * 2, # Each windowed feature includes temporal embedding ) assert forcing.shape == expected_forcing_shape @@ -278,4 +287,6 @@ def test_time_slicing_forecast( # Compare with expected forcing values for i in range(ar_steps): - np.testing.assert_array_equal(forcing_values[i], expected_forcing_values[i]) + np.testing.assert_array_equal( + forcing_values[i], expected_forcing_values[i] + ) From 6afc50c831589148642f9f72beb892cd1ab562b7 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Tue, 3 Dec 2024 14:40:01 +0100 Subject: [PATCH 118/190] Get boundary static features from second datastore --- neural_lam/models/ar_model.py | 71 +++++++++++++++-------------------- neural_lam/train_model.py | 7 +++- 2 files changed, 36 insertions(+), 42 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index ef766113..c4c4bc62 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -32,6 +32,7 @@ def __init__( args, config: NeuralLAMConfig, datastore: BaseDatastore, + datastore_boundary: Union[BaseDatastore, None], ): super().__init__() self.save_hyperparameters(ignore=["datastore"]) @@ -39,49 +40,24 @@ def __init__( self._datastore = datastore num_state_vars = datastore.get_num_data_vars(category="state") num_forcing_vars = datastore.get_num_data_vars(category="forcing") - da_static_features = datastore.get_dataarray( - category="static", split=None - ) - da_state_stats = datastore.get_standardization_dataarray( - category="state" - ) + num_past_forcing_steps = args.num_past_forcing_steps num_future_forcing_steps = args.num_future_forcing_steps - # TODO: Set based on existing of boundary forcing datastore - # TODO: Adjust what is stored here based on self.boundary_forced - self.boundary_forced = False - - # Set up boundary mask - boundary_mask = torch.tensor( - da_boundary_mask.values, dtype=torch.float32 - ).unsqueeze( - 1 - ) # add feature dim - - self.register_buffer("boundary_mask", boundary_mask, persistent=False) - # Pre-compute interior mask for use in loss function - self.register_buffer( - "interior_mask", 1.0 - self.boundary_mask, persistent=False - ) # (num_grid_nodes, 1), 1 for non-border - - # Load static features for grid/data, NB: self.predict_step assumes - # dimension order to be (grid_index, static_feature) - arr_static = da_static_features.transpose( - "grid_index", "static_feature" - ).values - static_features_torch = torch.tensor(arr_static, dtype=torch.float32) - self.register_buffer( - "grid_static_features", - static_features_torch[self.interior_mask[:, 0].to(torch.bool)], - persistent=False, + # Load static features for grid + da_static_features = datastore.get_dataarray( + category="static", split=None ) self.register_buffer( - "boundary_static_features", - static_features_torch[self.boundary_mask[:, 0].to(torch.bool)], + "grid_static_features", + torch.tensor(da_static_features.values, dtype=torch.float32), persistent=False, ) + # Load stats for rescaling and weights + da_state_stats = datastore.get_standardization_dataarray( + category="state" + ) state_stats = { "state_mean": torch.tensor( da_state_stats.state_mean.values, dtype=torch.float32 @@ -137,16 +113,29 @@ def __init__( * num_forcing_vars * (num_past_forcing_steps + num_future_forcing_steps + 1) ) + + # If datastore_boundary is given, the model is forced from the boundary + self.boundary_forced = datastore_boundary is not None + if self.boundary_forced: - self.boundary_dim = self.grid_dim # TODO Compute separately + # Load static features for grid + da_boundary_static_features = datastore_boundary.get_dataarray( + category="static", split=None + ) + self.register_buffer( + "boundary_static_features", + torch.tensor( + da_boundary_static_features.values, dtype=torch.float32 + ), + persistent=False, + ) + ( self.num_boundary_nodes, - boundary_static_dim, # TODO Will need for computation below + boundary_static_dim, ) = self.boundary_static_features.shape - self.num_input_nodes = self.num_grid_nodes + self.num_boundary_nodes - else: - # Only interior grid nodes - self.num_input_nodes = self.num_grid_nodes + # TODO Compute boundary input dim separately + self.boundary_dim = self.grid_dim # Instantiate loss function self.loss = metrics.get_metric(args.loss) diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index 3c2dbece..df590e3e 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -275,7 +275,12 @@ def main(input_args=None): # Load model parameters Use new args for model ModelClass = MODELS[args.model] - model = ModelClass(args, config=config, datastore=datastore) + model = ModelClass( + args, + config=config, + datastore=datastore, + datastore_boundary=datastore_boundary, + ) if args.eval: prefix = f"eval-{args.eval}-" From deb33389b228013e3dda93716200637fa48d4e3e Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Tue, 3 Dec 2024 14:47:42 +0100 Subject: [PATCH 119/190] Compute boundary forcing dimensions separately --- neural_lam/models/ar_model.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 6dc8500b..1edb7931 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -129,12 +129,24 @@ def __init__( persistent=False, ) + # Compute dimensionalities (e.g. to instantiate MLPs) ( self.num_boundary_nodes, boundary_static_dim, ) = self.boundary_static_features.shape - # TODO Compute boundary input dim separately - self.boundary_dim = self.grid_dim + + # Compute boundary input dim separately + num_boundary_forcing_vars = datastore_boundary.get_num_data_vars( + category="forcing" + ) + num_past_boundary_steps = args.num_past_boundary_steps + num_future_boundary_steps = args.num_future_boundary_steps + self.boundary_dim = ( + boundary_static_dim + # Temporal Embedding counts as one additional forcing_feature + + (num_boundary_forcing_vars + 1) + * (num_past_boundary_steps + num_future_boundary_steps + 1) + ) # Instantiate loss function self.loss = metrics.get_metric(args.loss) From 8c7709a3c4b7bbd14c5736713dee6af1cd6e2b80 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 3 Dec 2024 17:36:05 +0100 Subject: [PATCH 120/190] switched to low-res data --- .../mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml | 4 ++-- .../era5.datastore.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml index 7c5ffb3b..587d7879 100644 --- a/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml +++ b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml @@ -29,7 +29,7 @@ output: inputs: era_height_levels: - path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721_with_derived_variables.zarr' + path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-6h-64x32_equiangular_conservative.zarr' dims: [time, longitude, latitude, level] variables: u_component_of_wind: @@ -56,7 +56,7 @@ inputs: target_output_variable: forcing era5_surface: - path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721_with_derived_variables.zarr' + path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-6h-64x32_equiangular_conservative.zarr' dims: [time, longitude, latitude, level] variables: - mean_surface_net_short_wave_radiation_flux diff --git a/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml b/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml index 7c5ffb3b..587d7879 100644 --- a/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml +++ b/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml @@ -29,7 +29,7 @@ output: inputs: era_height_levels: - path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721_with_derived_variables.zarr' + path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-6h-64x32_equiangular_conservative.zarr' dims: [time, longitude, latitude, level] variables: u_component_of_wind: @@ -56,7 +56,7 @@ inputs: target_output_variable: forcing era5_surface: - path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721_with_derived_variables.zarr' + path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-6h-64x32_equiangular_conservative.zarr' dims: [time, longitude, latitude, level] variables: - mean_surface_net_short_wave_radiation_flux From 24cbf13b1e51e42adaf7bd4aeab95a77bb479a1e Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Tue, 3 Dec 2024 17:36:27 +0100 Subject: [PATCH 121/190] add datastore_boundary as explicit attribute --- neural_lam/config.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/neural_lam/config.py b/neural_lam/config.py index 914ebb38..f8879811 100644 --- a/neural_lam/config.py +++ b/neural_lam/config.py @@ -97,11 +97,15 @@ class NeuralLAMConfig(dataclass_wizard.JSONWizard, dataclass_wizard.YAMLWizard): ---------- datastore : DatastoreSelection The configuration for the datastore to use. + datastore_boundary : Union[DatastoreSelection, None] + The configuration for the boundary datastore to use, if any. If None, + no boundary datastore is used. training : TrainingConfig The configuration for training the model. """ datastore: DatastoreSelection + datastore_boundary: Union[DatastoreSelection, None] = None training: TrainingConfig = dataclasses.field(default_factory=TrainingConfig) class _(dataclass_wizard.JSONWizard.Meta): From 556b24b3a14b9f2c11e61131cc0721b0c8ee5eda Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Tue, 3 Dec 2024 17:57:38 +0100 Subject: [PATCH 122/190] Make graph creation and plotting work with dual datastore setup --- neural_lam/build_rectangular_graph.py | 32 +++++++++++++----------- neural_lam/plot_graph.py | 7 ++++-- neural_lam/utils.py | 36 ++++++++++++++++----------- 3 files changed, 44 insertions(+), 31 deletions(-) diff --git a/neural_lam/build_rectangular_graph.py b/neural_lam/build_rectangular_graph.py index df7f8ba8..fa0875d0 100644 --- a/neural_lam/build_rectangular_graph.py +++ b/neural_lam/build_rectangular_graph.py @@ -77,24 +77,28 @@ def main(input_args=None): args.graph_name is not None ), "Specify the name to save graph as with --graph_name" - _, datastore = load_config_and_datastore(config_path=args.config_path) + _, datastore, datastore_boundary = load_config_and_datastore( + config_path=args.config_path + ) # Load grid positions - # TODO Do not get normalised positions - coords = utils.get_reordered_grid_pos(datastore).numpy() + coords = utils.stack_all_grid_coords(datastore, datastore_boundary) # (num_nodes_full, 2) - # Construct mask - num_full_grid = coords.shape[0] - num_boundary = datastore.boundary_mask.to_numpy().sum() - num_interior = num_full_grid - num_boundary - decode_mask = np.concatenate( - ( - np.ones(num_interior, dtype=bool), - np.zeros(num_boundary, dtype=bool), - ), - axis=0, - ) + if datastore_boundary is None: + # No mask + decode_mask = None + else: + # Construct mask to decode only to interior + num_interior = datastore.num_grid_points + num_boundary = datastore_boundary.num_grid_points + decode_mask = np.concatenate( + ( + np.ones(num_interior, dtype=bool), + np.zeros(num_boundary, dtype=bool), + ), + axis=0, + ) # Build graph assert ( diff --git a/neural_lam/plot_graph.py b/neural_lam/plot_graph.py index 11bd795a..b805f673 100644 --- a/neural_lam/plot_graph.py +++ b/neural_lam/plot_graph.py @@ -43,7 +43,9 @@ def main(): args.config_path is not None ), "Specify your config with --config_path" - _, datastore = load_config_and_datastore(config_path=args.config_path) + _, datastore, datastore_boundary = load_config_and_datastore( + config_path=args.config_path + ) # Load graph data graph_dir_path = os.path.join( @@ -62,7 +64,8 @@ def main(): mesh_static_features = graph_ldict["mesh_static_features"] # Extract values needed, turn to numpy - grid_pos = utils.get_reordered_grid_pos(datastore).numpy() + grid_pos = utils.stack_all_grid_coords(datastore, datastore_boundary) + # (num_nodes_full, 2) grid_scale = np.ptp(grid_pos) # Add in z-dimension for grid diff --git a/neural_lam/utils.py b/neural_lam/utils.py index 6f910cee..78697fd0 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -3,6 +3,7 @@ import shutil # Third-party +import numpy as np import torch from torch import nn from tueplots import bundles, figsizes @@ -291,22 +292,27 @@ def init_wandb_metrics(wandb_logger, val_steps): experiment.define_metric(f"val_loss_unroll{step}", summary="min") -def get_reordered_grid_pos(datastore): +def stack_all_grid_coords(datastore, datastore_boundary=None): """ - Interior nodes first, then boundary + Stack the coordinates of all grid nodes in the correct ordering + + Parameters + ---------- + datastore : BaseDatastore + The datastore containing data for the interior region of the grid + datastore_boundary : BaseDatastore or None + (Optional) The datastore containing data for boundary forcing + + Returns + ------- + stacked_coords : np.ndarray + Array of all coordinates, shaped (num_total_grid_nodes, 2) """ - xy_np = datastore.get_xy("state") # np, (num_grid, 2) - xy_torch = torch.tensor(xy_np, dtype=torch.float32) + grid_xy = datastore.get_xy(category="state") - da_boundary_mask = datastore.boundary_mask - boundary_mask = torch.tensor(da_boundary_mask.values, dtype=torch.bool) - interior_mask = torch.logical_not(boundary_mask) + if datastore_boundary is None: + return grid_xy - return torch.cat( - ( - xy_torch[interior_mask], - xy_torch[boundary_mask], - ), - dim=0, - ) - # (num_total_grid_nodes, 2) + # Append boundary forcing positions last + boundary_xy = datastore_boundary.get_xy(category="forcing") + return np.concatenate((grid_xy, boundary_xy), axis=0) From 71cfdf918de59b6a40b570508618ce7c9bcda867 Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Wed, 4 Dec 2024 09:00:10 +0100 Subject: [PATCH 123/190] Fix evaluation example visualisation plots (#91) Fix bugs in recently introduced datastore functionality #66 (error in calculation in `BaseDatastore.get_xy_extent()` and overlooked in-place modification of config dict in `MDPDatastore.coords_projection`), and also fix issue in `ARModel.plot_examples` by using newly introduced (#66) `WeatherDataset.create_dataarray_from_tensor()` to create `xr.DataArray` from prediction tensor and calling plot methods directly on `xr.DataArray` rather than using bare numpy arrays with `matplotlib`. --- CHANGELOG.md | 4 ++ neural_lam/datastore/base.py | 9 +++- neural_lam/datastore/mdp.py | 5 ++- neural_lam/models/ar_model.py | 81 +++++++++++++++++++++++++++++++---- neural_lam/vis.py | 42 ++++++++---------- neural_lam/weather_dataset.py | 5 ++- pyproject.toml | 2 +- 7 files changed, 109 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 12cf54f6..01d4cac9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [\#66](https://github.com/mllam/neural-lam/pull/66) @leifdenby @sadamov +### Fixed + +- Fix bugs introduced with datastores functionality relating visualation plots [\#91](https://github.com/mllam/neural-lam/pull/91) @leifdenby + ## [v0.2.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.2.0) ### Added diff --git a/neural_lam/datastore/base.py b/neural_lam/datastore/base.py index 0317c2e5..b0055e39 100644 --- a/neural_lam/datastore/base.py +++ b/neural_lam/datastore/base.py @@ -295,8 +295,13 @@ def get_xy_extent(self, category: str) -> List[float]: The extent of the x, y coordinates. """ - xy = self.get_xy(category, stacked=False) - extent = [xy[0].min(), xy[0].max(), xy[1].min(), xy[1].max()] + xy = self.get_xy(category, stacked=True) + extent = [ + xy[:, 0].min(), + xy[:, 0].max(), + xy[:, 1].min(), + xy[:, 1].max(), + ] return [float(v) for v in extent] @property diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 10593a82..0d1aac7b 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -1,4 +1,5 @@ # Standard library +import copy import warnings from functools import cached_property from pathlib import Path @@ -394,7 +395,9 @@ def coords_projection(self) -> ccrs.Projection: class_name = projection_info["class_name"] ProjectionClass = getattr(ccrs, class_name) - kwargs = projection_info["kwargs"] + # need to copy otherwise we modify the dict stored in the dataclass + # in-place + kwargs = copy.deepcopy(projection_info["kwargs"]) globe_kwargs = kwargs.pop("globe", {}) if len(globe_kwargs) > 0: diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index bc4c6719..44baf9c2 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -1,5 +1,6 @@ # Standard library import os +from typing import List, Union # Third-party import matplotlib.pyplot as plt @@ -7,12 +8,14 @@ import pytorch_lightning as pl import torch import wandb +import xarray as xr # Local from .. import metrics, vis from ..config import NeuralLAMConfig from ..datastore import BaseDatastore from ..loss_weighting import get_state_feature_weighting +from ..weather_dataset import WeatherDataset class ARModel(pl.LightningModule): @@ -147,6 +150,44 @@ def __init__( # For storing spatial loss maps during evaluation self.spatial_loss_maps = [] + def _create_dataarray_from_tensor( + self, + tensor: torch.Tensor, + time: Union[int, List[int]], + split: str, + category: str, + ) -> xr.DataArray: + """ + Create an `xr.DataArray` from a tensor, with the correct dimensions and + coordinates to match the datastore used by the model. This function in + in effect is the inverse of what is returned by + `WeatherDataset.__getitem__`. + + Parameters + ---------- + tensor : torch.Tensor + The tensor to convert to a `xr.DataArray` with dimensions [time, + grid_index, feature]. The tensor will be copied to the CPU if it is + not already there. + time : Union[int,List[int]] + The time index or indices for the data, given as integers or a list + of integers representing epoch time in nanoseconds. The ints will be + copied to the CPU memory if they are not already there. + split : str + The split of the data, either 'train', 'val', or 'test' + category : str + The category of the data, either 'state' or 'forcing' + """ + # TODO: creating an instance of WeatherDataset here on every call is + # not how this should be done but whether WeatherDataset should be + # provided to ARModel or where to put plotting still needs discussion + weather_dataset = WeatherDataset(datastore=self._datastore, split=split) + time = np.array(time.cpu(), dtype="datetime64[ns]") + da = weather_dataset.create_dataarray_from_tensor( + tensor=tensor.cpu().numpy(), time=time, category=category + ) + return da + def configure_optimizers(self): opt = torch.optim.AdamW( self.parameters(), lr=self.args.lr, betas=(0.9, 0.95) @@ -406,10 +447,13 @@ def test_step(self, batch, batch_idx): ) self.plot_examples( - batch, n_additional_examples, prediction=prediction + batch, + n_additional_examples, + prediction=prediction, + split="test", ) - def plot_examples(self, batch, n_examples, prediction=None): + def plot_examples(self, batch, n_examples, split, prediction=None): """ Plot the first n_examples forecasts from batch @@ -422,18 +466,34 @@ def plot_examples(self, batch, n_examples, prediction=None): prediction, target, _, _ = self.common_step(batch) target = batch[1] + time = batch[3] # Rescale to original data scale prediction_rescaled = prediction * self.state_std + self.state_mean target_rescaled = target * self.state_std + self.state_mean # Iterate over the examples - for pred_slice, target_slice in zip( - prediction_rescaled[:n_examples], target_rescaled[:n_examples] + for pred_slice, target_slice, time_slice in zip( + prediction_rescaled[:n_examples], + target_rescaled[:n_examples], + time[:n_examples], ): # Each slice is (pred_steps, num_grid_nodes, d_f) self.plotted_examples += 1 # Increment already here + da_prediction = self._create_dataarray_from_tensor( + tensor=pred_slice, + time=time_slice, + split=split, + category="state", + ).unstack("grid_index") + da_target = self._create_dataarray_from_tensor( + tensor=target_slice, + time=time_slice, + split=split, + category="state", + ).unstack("grid_index") + var_vmin = ( torch.minimum( pred_slice.flatten(0, 1).min(dim=0)[0], @@ -453,18 +513,20 @@ def plot_examples(self, batch, n_examples, prediction=None): var_vranges = list(zip(var_vmin, var_vmax)) # Iterate over prediction horizon time steps - for t_i, (pred_t, target_t) in enumerate( - zip(pred_slice, target_slice), start=1 - ): + for t_i, _ in enumerate(zip(pred_slice, target_slice), start=1): # Create one figure per variable at this time step var_figs = [ vis.plot_prediction( - pred=pred_t[:, var_i], - target=target_t[:, var_i], datastore=self._datastore, title=f"{var_name} ({var_unit}), " f"t={t_i} ({self._datastore.step_length * t_i} h)", vrange=var_vrange, + da_prediction=da_prediction.isel( + state_feature=var_i, time=t_i - 1 + ).squeeze(), + da_target=da_target.isel( + state_feature=var_i, time=t_i - 1 + ).squeeze(), ) for var_i, (var_name, var_unit, var_vrange) in enumerate( zip( @@ -476,6 +538,7 @@ def plot_examples(self, batch, n_examples, prediction=None): ] example_i = self.plotted_examples + wandb.log( { f"{var_name}_example_{example_i}": wandb.Image(fig) diff --git a/neural_lam/vis.py b/neural_lam/vis.py index b9d18b39..d6b57f88 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -2,6 +2,7 @@ import matplotlib import matplotlib.pyplot as plt import numpy as np +import xarray as xr # Local from . import utils @@ -65,9 +66,9 @@ def plot_error_map(errors, datastore: BaseRegularGridDatastore, title=None): @matplotlib.rc_context(utils.fractional_plot_bundle(1)) def plot_prediction( - pred, - target, datastore: BaseRegularGridDatastore, + da_prediction: xr.DataArray = None, + da_target: xr.DataArray = None, title=None, vrange=None, ): @@ -79,8 +80,8 @@ def plot_prediction( """ # Get common scale for values if vrange is None: - vmin = min(vals.min().cpu().item() for vals in (pred, target)) - vmax = max(vals.max().cpu().item() for vals in (pred, target)) + vmin = min(da_prediction.min(), da_target.min()) + vmax = max(da_prediction.max(), da_target.max()) else: vmin, vmax = vrange @@ -88,10 +89,8 @@ def plot_prediction( # Set up masking of border region da_mask = datastore.unstack_grid_coords(datastore.boundary_mask) - mask_reshaped = da_mask.values - pixel_alpha = ( - mask_reshaped.clamp(0.7, 1).cpu().numpy() - ) # Faded border region + mask_values = np.invert(da_mask.values.astype(bool)).astype(float) + pixel_alpha = mask_values.clip(0.7, 1) # Faded border region fig, axes = plt.subplots( 1, @@ -101,28 +100,23 @@ def plot_prediction( ) # Plot pred and target - for ax, data in zip(axes, (target, pred)): + for ax, da in zip(axes, (da_target, da_prediction)): ax.coastlines() # Add coastline outlines - data_grid = ( - data.reshape(list(datastore.grid_shape_state.values.values())) - .cpu() - .numpy() - ) - im = ax.imshow( - data_grid, + da.plot.imshow( + ax=ax, origin="lower", + x="x", extent=extent, - alpha=pixel_alpha, + alpha=pixel_alpha.T, vmin=vmin, vmax=vmax, cmap="plasma", + transform=datastore.coords_projection, ) # Ticks and labels axes[0].set_title("Ground Truth", size=15) axes[1].set_title("Prediction", size=15) - cbar = fig.colorbar(im, aspect=30) - cbar.ax.tick_params(labelsize=10) if title: fig.suptitle(title, size=20) @@ -150,9 +144,7 @@ def plot_spatial_error( # Set up masking of border region da_mask = datastore.unstack_grid_coords(datastore.boundary_mask) mask_reshaped = da_mask.values - pixel_alpha = ( - mask_reshaped.clamp(0.7, 1).cpu().numpy() - ) # Faded border region + pixel_alpha = mask_reshaped.clip(0.7, 1) # Faded border region fig, ax = plt.subplots( figsize=(5, 4.8), @@ -161,8 +153,10 @@ def plot_spatial_error( ax.coastlines() # Add coastline outlines error_grid = ( - error.reshape(list(datastore.grid_shape_state.values.values())) - .cpu() + error.reshape( + [datastore.grid_shape_state.x, datastore.grid_shape_state.y] + ) + .T.cpu() .numpy() ) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 532e3c90..b5f85580 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -529,7 +529,8 @@ def create_dataarray_from_tensor( tensor : torch.Tensor The tensor to construct the DataArray from, this assumed to have the same dimension ordering as returned by the __getitem__ method - (i.e. time, grid_index, {category}_feature). + (i.e. time, grid_index, {category}_feature). The tensor will be + copied to the CPU before constructing the DataArray. time : datetime.datetime or list[datetime.datetime] The time or times of the tensor. category : str @@ -581,7 +582,7 @@ def _is_listlike(obj): coords["time"] = time da = xr.DataArray( - tensor.numpy(), + tensor.cpu().numpy(), dims=dims, coords=coords, ) diff --git a/pyproject.toml b/pyproject.toml index f0bc0851..fdcb7f3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ "torch>=2.3.0", "torch-geometric==2.3.1", "parse>=1.20.2", - "dataclass-wizard>=0.22.3", + "dataclass-wizard<0.31.0", "mllam-data-prep>=0.5.0", ] requires-python = ">=3.9" From 7c382b8df444e045968448fa48ee6d0b883ab6d3 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Wed, 4 Dec 2024 16:53:38 +0100 Subject: [PATCH 124/190] Use lat-lons + crs for graph construction --- neural_lam/build_rectangular_graph.py | 8 +++- neural_lam/datastore/base.py | 25 ++++++++++++ neural_lam/plot_graph.py | 3 +- neural_lam/utils.py | 59 +++++++++++++++++++++++---- 4 files changed, 84 insertions(+), 11 deletions(-) diff --git a/neural_lam/build_rectangular_graph.py b/neural_lam/build_rectangular_graph.py index fa0875d0..6cdd3ba7 100644 --- a/neural_lam/build_rectangular_graph.py +++ b/neural_lam/build_rectangular_graph.py @@ -3,6 +3,7 @@ import os # Third-party +import cartopy.crs as ccrs import numpy as np import weather_model_graphs as wmg @@ -82,8 +83,11 @@ def main(input_args=None): ) # Load grid positions - coords = utils.stack_all_grid_coords(datastore, datastore_boundary) + coords = utils.get_stacked_lat_lons(datastore, datastore_boundary) # (num_nodes_full, 2) + # Project using crs from datastore for graph building + coords_crs = ccrs.PlateCarree() + graph_crs = datastore.coords_projection if datastore_boundary is None: # No mask @@ -110,6 +114,8 @@ def main(input_args=None): "coords": coords, "mesh_node_distance": args.mesh_node_distance, "decode_mask": decode_mask, + "graph_crs": graph_crs, + "coords_crs": coords_crs, "return_components": True, } if args.archetype != "keisler": diff --git a/neural_lam/datastore/base.py b/neural_lam/datastore/base.py index e2d21404..f49d6dfe 100644 --- a/neural_lam/datastore/base.py +++ b/neural_lam/datastore/base.py @@ -244,6 +244,7 @@ def get_xy(self, category: str) -> np.ndarray: np.ndarray The x, y coordinates of the dataset with shape `[n_grid_points, 2]`. """ + pass @property @abc.abstractmethod @@ -260,6 +261,30 @@ def coords_projection(self) -> ccrs.Projection: """ pass + @functools.lru_cache + def get_lat_lon(self, category: str) -> np.ndarray: + """ + Return the longitude, latitude coordinates of the dataset as numpy + array for a given category of data. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + + Returns + ------- + np.ndarray + The longitude, latitude coordinates of the dataset + with shape `[n_grid_points, 2]`. + """ + xy = self.get_xy(category=category) + + transformed_points = ccrs.PlateCarree().transform_points( + self.coords_projection, xy[:, 0], xy[:, 1] + ) + return transformed_points[:, :2] # Remove z-dim + @functools.lru_cache def get_xy_extent(self, category: str) -> List[float]: """ diff --git a/neural_lam/plot_graph.py b/neural_lam/plot_graph.py index b805f673..4f72ab16 100644 --- a/neural_lam/plot_graph.py +++ b/neural_lam/plot_graph.py @@ -64,7 +64,8 @@ def main(): mesh_static_features = graph_ldict["mesh_static_features"] # Extract values needed, turn to numpy - grid_pos = utils.stack_all_grid_coords(datastore, datastore_boundary) + # Now plotting is in the 2d CRS of datastore + grid_pos = utils.get_stacked_xy(datastore, datastore_boundary) # (num_nodes_full, 2) grid_scale = np.ptp(grid_pos) diff --git a/neural_lam/utils.py b/neural_lam/utils.py index 78697fd0..9b169a9f 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -3,6 +3,7 @@ import shutil # Third-party +import cartopy.crs as ccrs import numpy as np import torch from torch import nn @@ -109,9 +110,8 @@ def loads_file(fn): m2g_edge_index = loads_file("m2g_edge_index.pt") # (2, M_m2g) # Change first indices to 0 - g2m_edge_index = zero_index_edge_index(g2m_edge_index) - # m2g has to be handled specially as not all mesh nodes might be indexed in - # m2g_edge_index + # m2g and g2m has to be handled specially as not all mesh nodes + # might be indexed m2g_min_indices = m2g_edge_index.min(dim=1, keepdim=True)[0] if m2g_min_indices[0] < m2g_min_indices[1]: # mesh has the first indices @@ -125,10 +125,18 @@ def loads_file(fn): ), dim=0, ) + g2m_edge_index = torch.stack( + ( + g2m_edge_index[0] - num_mesh_nodes, + g2m_edge_index[1], + ), + dim=0, + ) else: # grid (interior) has the first indices # NOTE: Below works, but would be good with a better way to get this num_interior_nodes = m2g_edge_index[1].max() + 1 + num_grid_nodes = g2m_edge_index[0].max() + 1 m2g_edge_index = torch.stack( ( @@ -137,6 +145,13 @@ def loads_file(fn): ), dim=0, ) + g2m_edge_index = torch.stack( + ( + g2m_edge_index[0], + g2m_edge_index[1] - num_grid_nodes, + ), + dim=0, + ) n_levels = len(m2m_edge_index) hierarchical = n_levels > 1 # Nor just single level mesh graph @@ -292,9 +307,9 @@ def init_wandb_metrics(wandb_logger, val_steps): experiment.define_metric(f"val_loss_unroll{step}", summary="min") -def stack_all_grid_coords(datastore, datastore_boundary=None): +def get_stacked_lat_lons(datastore, datastore_boundary=None): """ - Stack the coordinates of all grid nodes in the correct ordering + Stack the lat-lon coordinates of all grid nodes in the correct ordering Parameters ---------- @@ -308,11 +323,37 @@ def stack_all_grid_coords(datastore, datastore_boundary=None): stacked_coords : np.ndarray Array of all coordinates, shaped (num_total_grid_nodes, 2) """ - grid_xy = datastore.get_xy(category="state") + grid_coords = datastore.get_lat_lon(category="state") if datastore_boundary is None: - return grid_xy + return grid_coords # Append boundary forcing positions last - boundary_xy = datastore_boundary.get_xy(category="forcing") - return np.concatenate((grid_xy, boundary_xy), axis=0) + boundary_coords = datastore_boundary.get_lat_lon(category="forcing") + return np.concatenate((grid_coords, boundary_coords), axis=0) + + +def get_stacked_xy(datastore, datastore_boundary=None): + """ + Stack the xy coordinates of all grid nodes in the correct ordering, + with xy coordinates being in the CRS of the datastore + + Parameters + ---------- + datastore : BaseDatastore + The datastore containing data for the interior region of the grid + datastore_boundary : BaseDatastore or None + (Optional) The datastore containing data for boundary forcing + + Returns + ------- + stacked_coords : np.ndarray + Array of all coordinates, shaped (num_total_grid_nodes, 2) + """ + lat_lons = get_stacked_lat_lons(datastore, datastore_boundary) + + # transform to datastore CRS + xyz = datastore.coords_projection.transform_points( + ccrs.PlateCarree(), lat_lons[:, 0], lat_lons[:, 1] + ) + return xyz[:, :2] From 14d3912675a985760a7d78c1d4eb39689d42e11f Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Wed, 4 Dec 2024 17:07:23 +0100 Subject: [PATCH 125/190] Fix model constructor signatures --- neural_lam/models/base_graph_model.py | 18 ++++++++++++++++-- neural_lam/models/base_hi_graph_model.py | 18 ++++++++++++++++-- neural_lam/models/graph_lam.py | 18 ++++++++++++++++-- neural_lam/models/hi_lam.py | 18 ++++++++++++++++-- neural_lam/models/hi_lam_parallel.py | 18 ++++++++++++++++-- 5 files changed, 80 insertions(+), 10 deletions(-) diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index 61c1a681..83f4cb74 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -1,3 +1,6 @@ +# Standard library +from typing import Union + # Third-party import torch @@ -15,8 +18,19 @@ class BaseGraphModel(ARModel): the encode-process-decode idea. """ - def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): - super().__init__(args, config=config, datastore=datastore) + def __init__( + self, + args, + config: NeuralLAMConfig, + datastore: BaseDatastore, + datastore_boundary: Union[BaseDatastore, None], + ): + super().__init__( + args, + config=config, + datastore=datastore, + datastore_boundary=datastore_boundary, + ) # Load graph with static features graph_dir_path = datastore.root_path / "graphs" / args.graph_name diff --git a/neural_lam/models/base_hi_graph_model.py b/neural_lam/models/base_hi_graph_model.py index 8ec46b4f..275281e1 100644 --- a/neural_lam/models/base_hi_graph_model.py +++ b/neural_lam/models/base_hi_graph_model.py @@ -1,3 +1,6 @@ +# Standard library +from typing import Union + # Third-party from torch import nn @@ -14,8 +17,19 @@ class BaseHiGraphModel(BaseGraphModel): Base class for hierarchical graph models. """ - def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): - super().__init__(args, config=config, datastore=datastore) + def __init__( + self, + args, + config: NeuralLAMConfig, + datastore: BaseDatastore, + datastore_boundary: Union[BaseDatastore, None], + ): + super().__init__( + args, + config=config, + datastore=datastore, + datastore_boundary=datastore_boundary, + ) # Track number of nodes, edges on each level # Flatten lists for efficient embedding diff --git a/neural_lam/models/graph_lam.py b/neural_lam/models/graph_lam.py index 68b7d01e..c2311b2e 100644 --- a/neural_lam/models/graph_lam.py +++ b/neural_lam/models/graph_lam.py @@ -1,3 +1,6 @@ +# Standard library +from typing import Union + # Third-party import torch_geometric as pyg @@ -17,8 +20,19 @@ class GraphLAM(BaseGraphModel): Oskarsson et al. (2023). """ - def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): - super().__init__(args, config=config, datastore=datastore) + def __init__( + self, + args, + config: NeuralLAMConfig, + datastore: BaseDatastore, + datastore_boundary: Union[BaseDatastore, None], + ): + super().__init__( + args, + config=config, + datastore=datastore, + datastore_boundary=datastore_boundary, + ) assert ( not self.hierarchical diff --git a/neural_lam/models/hi_lam.py b/neural_lam/models/hi_lam.py index c340c95d..8ab420e8 100644 --- a/neural_lam/models/hi_lam.py +++ b/neural_lam/models/hi_lam.py @@ -1,3 +1,6 @@ +# Standard library +from typing import Union + # Third-party from torch import nn @@ -15,8 +18,19 @@ class HiLAM(BaseHiGraphModel): The Hi-LAM model from Oskarsson et al. (2023) """ - def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): - super().__init__(args, config=config, datastore=datastore) + def __init__( + self, + args, + config: NeuralLAMConfig, + datastore: BaseDatastore, + datastore_boundary: Union[BaseDatastore, None], + ): + super().__init__( + args, + config=config, + datastore=datastore, + datastore_boundary=datastore_boundary, + ) # Make down GNNs, both for down edges and same level self.mesh_down_gnns = nn.ModuleList( diff --git a/neural_lam/models/hi_lam_parallel.py b/neural_lam/models/hi_lam_parallel.py index a0a84d29..c0be48e9 100644 --- a/neural_lam/models/hi_lam_parallel.py +++ b/neural_lam/models/hi_lam_parallel.py @@ -1,3 +1,6 @@ +# Standard library +from typing import Union + # Third-party import torch import torch_geometric as pyg @@ -18,8 +21,19 @@ class HiLAMParallel(BaseHiGraphModel): of Hi-LAM. """ - def __init__(self, args, config: NeuralLAMConfig, datastore: BaseDatastore): - super().__init__(args, config=config, datastore=datastore) + def __init__( + self, + args, + config: NeuralLAMConfig, + datastore: BaseDatastore, + datastore_boundary: Union[BaseDatastore, None], + ): + super().__init__( + args, + config=config, + datastore=datastore, + datastore_boundary=datastore_boundary, + ) # Processor GNNs # Create the complete edge_index combining all edges for processing From ebfd0bd2f883301939aafd72d08909a976ec0cea Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Wed, 4 Dec 2024 17:46:52 +0100 Subject: [PATCH 126/190] Fix dataset issue in npy stat script --- .../datastore/npyfilesmeps/compute_standardization_stats.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py b/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py index 4207812f..1f1c6943 100644 --- a/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py +++ b/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py @@ -202,7 +202,7 @@ def main( print("Computing mean and std.-dev. for parameters...") means, squares, flux_means, flux_squares = [], [], [], [] - for init_batch, target_batch, forcing_batch, _ in tqdm(loader): + for init_batch, target_batch, forcing_batch, _, _ in tqdm(loader): if distributed: init_batch, target_batch, forcing_batch = ( init_batch.to(device), @@ -276,6 +276,7 @@ def main( print("Computing mean and std.-dev. for one-step differences...") ds_standard = WeatherDataset( datastore=datastore, + datastore_boundary=None, split="train", ar_steps=ar_steps, standardize=True, @@ -304,7 +305,7 @@ def main( diff_means, diff_squares = [], [] - for init_batch, target_batch, _, _ in tqdm( + for init_batch, target_batch, _, _, _ in tqdm( loader_standard, disable=rank != 0 ): if distributed: From 698991f8c71070c4780616d8d3a07dfb7dde4cb7 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Thu, 5 Dec 2024 13:25:52 +0100 Subject: [PATCH 127/190] Fix Inets not figuring out number of receiver nodes for g2m and m2g --- neural_lam/interaction_net.py | 12 ++++++++++-- neural_lam/models/ar_model.py | 4 ++-- neural_lam/models/base_graph_model.py | 21 ++++++++++++++++----- neural_lam/models/base_hi_graph_model.py | 19 ++++++++++++------- neural_lam/models/graph_lam.py | 16 ++++++++++++---- 5 files changed, 52 insertions(+), 20 deletions(-) diff --git a/neural_lam/interaction_net.py b/neural_lam/interaction_net.py index 46223b88..14a0d1c7 100644 --- a/neural_lam/interaction_net.py +++ b/neural_lam/interaction_net.py @@ -25,6 +25,7 @@ def __init__( hidden_dim=None, edge_chunk_sizes=None, aggr_chunk_sizes=None, + num_rec=None, aggr="sum", ): """ @@ -44,6 +45,8 @@ def __init__( aggr_chunk_sizes: List of chunks sizes to split aggregated node representation into and use separate MLPs for (None = no chunking, same MLP) + num_rec: Number of receiver nodes. If None, derive from edge_index under + assumption that all receiver nodes have at least one incoming edge. aggr: Message aggregation method (sum/mean) """ assert aggr in ("sum", "mean"), f"Unknown aggregation method: {aggr}" @@ -53,9 +56,14 @@ def __init__( # Default to input dim if not explicitly given hidden_dim = input_dim - # any edge_index used here must start sender and rec. nodes at index 0 # Store number of receiver nodes according to edge_index - self.num_rec = edge_index[1].max() + 1 + if num_rec is None: + # Derive from edge_index + self.num_rec = edge_index[1].max() + 1 + else: + self.num_rec = num_rec + + # any edge_index used here must start sender and rec. nodes at index 0 edge_index = torch.stack( (edge_index[0] + self.num_rec, edge_index[1]), dim=0 ) # Make sender indices after rec diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 1edb7931..9773c7b1 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -109,7 +109,7 @@ def __init__( 2 * self.grid_output_dim + grid_static_dim # Temporal Embedding counts as one additional forcing_feature - + (num_forcing_vars + 1) + + 2*(num_forcing_vars) * (num_past_forcing_steps + num_future_forcing_steps + 1) ) @@ -144,7 +144,7 @@ def __init__( self.boundary_dim = ( boundary_static_dim # Temporal Embedding counts as one additional forcing_feature - + (num_boundary_forcing_vars + 1) + + 2*(num_boundary_forcing_vars) * (num_past_boundary_steps + num_future_boundary_steps + 1) ) diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index 83f4cb74..2bdb83ac 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -45,7 +45,6 @@ def __init__( setattr(self, name, attr_value) # Specify dimensions of data - self.num_mesh_nodes, _ = self.get_num_mesh() print( f"Loaded graph with {self.num_grid_nodes + self.num_mesh_nodes} " f"nodes ({self.num_grid_nodes} grid, {self.num_mesh_nodes} mesh)" @@ -88,6 +87,7 @@ def __init__( args.hidden_dim, hidden_layers=args.hidden_layers, update_edges=False, + num_rec=self.num_grid_connected_mesh_nodes, ) self.encoding_grid_mlp = utils.make_mlp( [args.hidden_dim] + self.mlp_blueprint_end @@ -99,6 +99,7 @@ def __init__( args.hidden_dim, hidden_layers=args.hidden_layers, update_edges=False, + num_rec=self.num_grid_nodes, ) # Output mapping (hidden_dim -> output_dim) @@ -108,12 +109,22 @@ def __init__( layer_norm=False, ) # No layer norm on this one - def get_num_mesh(self): + @property + def num_mesh_nodes(self): """ - Compute number of mesh nodes from loaded features, - and number of mesh nodes that should be ignored in encoding/decoding + Get the total number of mesh nodes in the used mesh graph """ - raise NotImplementedError("get_num_mesh not implemented") + raise NotImplementedError("num_mesh_nodes not implemented") + + @property + def num_grid_connected_mesh_nodes(self): + """ + Get the total number of mesh nodes that have a connection to + the grid (e.g. bottom level in a hierarchy) + """ + raise NotImplementedError( + "num_grid_connected_mesh_nodes not implemented" + ) def embedd_mesh_nodes(self): """ diff --git a/neural_lam/models/base_hi_graph_model.py b/neural_lam/models/base_hi_graph_model.py index 275281e1..59cdcc8e 100644 --- a/neural_lam/models/base_hi_graph_model.py +++ b/neural_lam/models/base_hi_graph_model.py @@ -113,18 +113,23 @@ def __init__( ] ) - def get_num_mesh(self): + @property + def num_mesh_nodes(self): """ - Compute number of mesh nodes from loaded features, - and number of mesh nodes that should be ignored in encoding/decoding + Get the total number of mesh nodes in the used mesh graph """ num_mesh_nodes = sum( node_feat.shape[0] for node_feat in self.mesh_static_features ) - num_mesh_nodes_ignore = ( - num_mesh_nodes - self.mesh_static_features[0].shape[0] - ) - return num_mesh_nodes, num_mesh_nodes_ignore + return num_mesh_nodes + + @property + def num_grid_connected_mesh_nodes(self): + """ + Get the total number of mesh nodes that have a connection to + the grid (e.g. bottom level in a hierarchy) + """ + return self.mesh_static_features[0].shape[0] # Bottom level def embedd_mesh_nodes(self): """ diff --git a/neural_lam/models/graph_lam.py b/neural_lam/models/graph_lam.py index c2311b2e..7adb02a6 100644 --- a/neural_lam/models/graph_lam.py +++ b/neural_lam/models/graph_lam.py @@ -70,12 +70,20 @@ def __init__( ], ) - def get_num_mesh(self): + @property + def num_mesh_nodes(self): """ - Compute number of mesh nodes from loaded features, - and number of mesh nodes that should be ignored in encoding/decoding + Get the total number of mesh nodes in the used mesh graph """ - return self.mesh_static_features.shape[0], 0 + return self.mesh_static_features.shape[0] + + @property + def num_grid_connected_mesh_nodes(self): + """ + Get the total number of mesh nodes that have a connection to + the grid (e.g. bottom level in a hierarchy) + """ + return self.num_mesh_nodes # All nodes def embedd_mesh_nodes(self): """ From 1d53ce7b86ee8c936a1c8c2fd9bad58bb672b844 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Thu, 5 Dec 2024 13:26:06 +0100 Subject: [PATCH 128/190] fixing up forecast type data tests, more and better defined scenarios --- tests/test_time_slicing.py | 175 ++++++++++++++++++++++--------------- 1 file changed, 105 insertions(+), 70 deletions(-) diff --git a/tests/test_time_slicing.py b/tests/test_time_slicing.py index 48860161..21038e7b 100644 --- a/tests/test_time_slicing.py +++ b/tests/test_time_slicing.py @@ -79,8 +79,8 @@ def get_dataarray(self, category, split): da = xr.DataArray( values, dims=["time"], coords={"time": self._time_values} ) - # add `{category}_feature` and `grid_index` dimensions + # add `{category}_feature` and `grid_index` dimensions da = da.expand_dims("grid_index") da = da.expand_dims(f"{category}_feature") @@ -103,51 +103,55 @@ def get_vars_long_names(self, category): raise NotImplementedError() -ANALYSIS_STATE_VALUES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] -FORCING_VALUES = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +INIT_STEPS = 2 -# Constants for forecast data -FORECAST_ANALYSIS_TIMES = np.datetime64("2020-01-01") + np.arange(3) -FORECAST_FORECAST_TIMES = np.timedelta64(0, "D") + np.arange(7) - -FORECAST_STATE_VALUES = np.array( - [ - # Analysis time 0 - [0, 1, 2, 3, 4, 5, 6], - # Analysis time 1 - [10, 11, 12, 13, 14, 15, 16], - # Analysis time 2 - [20, 21, 22, 23, 24, 25, 26], - ] -) +STATE_VALUES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +FORCING_VALUES = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19] -FORECAST_FORCING_VALUES = np.array( - [ - # Analysis time 0 - [100, 101, 102, 103, 104, 105, 106], - # Analysis time 1 - [110, 111, 112, 113, 114, 115, 116], - # Analysis time 2 - [120, 121, 122, 123, 124, 125, 126], - ] -) +STATE_VALUES_FORECAST = [ + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], # Analysis time 0 + [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], # Analysis time 1 + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29], # Analysis time 2 +] +FORCING_VALUES_FORECAST = [ + [100, 101, 102, 103, 104, 105, 106, 107, 108, 109], # Analysis time 0 + [110, 111, 112, 113, 114, 115, 116, 117, 118, 119], # Analysis time 1 + [120, 121, 122, 123, 124, 125, 126, 127, 128, 129], # Analysis time 2 +] + +SCENARIOS = [ + [3, 0, 0], + [3, 1, 0], + [3, 2, 0], + [3, 3, 0], + [3, 0, 1], + [3, 0, 2], + [3, 0, 3], + [3, 1, 1], + [3, 2, 1], + [3, 3, 1], + [3, 1, 2], + [3, 1, 3], + [3, 2, 2], + [3, 2, 3], + [3, 3, 2], + [3, 3, 3], +] @pytest.mark.parametrize( "ar_steps,num_past_forcing_steps,num_future_forcing_steps", - [[3, 0, 0], [3, 1, 0], [3, 2, 0], [3, 3, 0]], + SCENARIOS, ) def test_time_slicing_analysis( ar_steps, num_past_forcing_steps, num_future_forcing_steps ): # state and forcing variables have only one dimension, `time` - time_values = np.datetime64("2020-01-01") + np.arange( - len(ANALYSIS_STATE_VALUES) - ) - assert len(ANALYSIS_STATE_VALUES) == len(FORCING_VALUES) == len(time_values) + time_values = np.datetime64("2020-01-01") + np.arange(len(STATE_VALUES)) + assert len(STATE_VALUES) == len(FORCING_VALUES) == len(time_values) datastore = SinglePointDummyDatastore( - state_data=ANALYSIS_STATE_VALUES, + state_data=STATE_VALUES, forcing_data=FORCING_VALUES, time_values=time_values, is_forecast=False, @@ -168,12 +172,10 @@ def test_time_slicing_analysis( tensor.numpy() for tensor in sample ] + # Some scenarios for the human reader expected_init_states = [0, 1] if ar_steps == 3: expected_target_states = [2, 3, 4] - else: - raise NotImplementedError() - if num_past_forcing_steps == num_future_forcing_steps == 0: expected_forcing_values = [[12], [13], [14]] elif num_past_forcing_steps == 1 and num_future_forcing_steps == 0: @@ -188,49 +190,72 @@ def test_time_slicing_analysis( [11, 12, 13, 14], [12, 13, 14, 15], ] - else: - raise NotImplementedError() + + # Compute expected initial states and target states based on ar_steps + offset = max(0, num_past_forcing_steps - INIT_STEPS) + init_idx = INIT_STEPS + offset + # Compute expected forcing values based on num_past_forcing_steps and + # num_future_forcing_steps for all scenarios + expected_init_states = STATE_VALUES[offset:init_idx] + expected_target_states = STATE_VALUES[init_idx : init_idx + ar_steps] + total_forcing_window = num_past_forcing_steps + num_future_forcing_steps + 1 + expected_forcing_values = [] + for i in range(ar_steps): + start_idx = i + init_idx - num_past_forcing_steps + end_idx = i + init_idx + num_future_forcing_steps + 1 + forcing_window = FORCING_VALUES[start_idx:end_idx] + expected_forcing_values.append(forcing_window) # init_states: (2, N_grid, d_features) # target_states: (ar_steps, N_grid, d_features) # forcing: (ar_steps, N_grid, d_windowed_forcing * 2) # target_times: (ar_steps,) - assert init_states.shape == (2, 1, 1) - assert init_states[:, 0, 0].tolist() == expected_init_states - assert target_states.shape == (3, 1, 1) - assert target_states[:, 0, 0].tolist() == expected_target_states + # Adjust assertions to use computed expected values + assert init_states.shape == (INIT_STEPS, 1, 1) + np.testing.assert_array_equal(init_states[:, 0, 0], expected_init_states) + + assert target_states.shape == (ar_steps, 1, 1) + np.testing.assert_array_equal( + target_states[:, 0, 0], expected_target_states + ) assert forcing.shape == ( - 3, + ar_steps, 1, - # Factor 2 because each window step has a temporal embedding - (1 + num_past_forcing_steps + num_future_forcing_steps) * 2, - ) - np.testing.assert_equal( - forcing[:, 0, : num_past_forcing_steps + num_future_forcing_steps + 1], - np.array(expected_forcing_values), + total_forcing_window + * 2, # Each windowed feature includes temporal embedding ) + # Extract the forcing values from the tensor (excluding temporal embeddings) + forcing_values = forcing[:, 0, :total_forcing_window] + + # Compare with expected forcing values + for i in range(ar_steps): + np.testing.assert_array_equal( + forcing_values[i], expected_forcing_values[i] + ) + @pytest.mark.parametrize( "ar_steps,num_past_forcing_steps,num_future_forcing_steps", - [ - [3, 0, 0], - [3, 1, 0], - [3, 2, 0], - [3, 0, 1], - [3, 0, 2], - ], + SCENARIOS, ) def test_time_slicing_forecast( ar_steps, num_past_forcing_steps, num_future_forcing_steps ): + # Constants for forecast data + ANALYSIS_TIMES = np.datetime64("2020-01-01") + np.arange( + len(STATE_VALUES_FORECAST) + ) + ELAPSED_FORECAST_DURATION = np.timedelta64(0, "D") + np.arange( + len(FORCING_VALUES_FORECAST[0]) + ) # Create a dummy datastore with forecast data - time_values = (FORECAST_ANALYSIS_TIMES, FORECAST_FORECAST_TIMES) + time_values = (ANALYSIS_TIMES, ELAPSED_FORECAST_DURATION) datastore = SinglePointDummyDatastore( - state_data=FORECAST_STATE_VALUES, - forcing_data=FORECAST_FORCING_VALUES, + state_data=STATE_VALUES_FORECAST, + forcing_data=FORCING_VALUES_FORECAST, time_values=time_values, is_forecast=True, ) @@ -246,7 +271,7 @@ def test_time_slicing_forecast( ) # Test the dataset length - assert len(dataset) == len(FORECAST_ANALYSIS_TIMES) + assert len(dataset) == len(ANALYSIS_TIMES) sample = dataset[0] @@ -254,19 +279,29 @@ def test_time_slicing_forecast( tensor.numpy() for tensor in sample ] - # Expected initial states and target states - expected_init_states = FORECAST_STATE_VALUES[0][:2] - expected_target_states = FORECAST_STATE_VALUES[0][2 : 2 + ar_steps] + # Compute expected initial states and target states based on ar_steps + offset = max(0, num_past_forcing_steps - INIT_STEPS) + init_idx = INIT_STEPS + offset + expected_init_states = STATE_VALUES_FORECAST[0][offset:init_idx] + expected_target_states = STATE_VALUES_FORECAST[0][ + init_idx : init_idx + ar_steps + ] - # Expected forcing values + # Compute expected forcing values based on num_past_forcing_steps and + # num_future_forcing_steps total_forcing_window = num_past_forcing_steps + num_future_forcing_steps + 1 expected_forcing_values = [] for i in range(ar_steps): - start_idx = max(0, i + 2 - num_past_forcing_steps) - end_idx = i + 2 + num_future_forcing_steps + 1 - forcing_window = FORECAST_FORCING_VALUES[0][start_idx:end_idx] + start_idx = i + init_idx - num_past_forcing_steps + end_idx = i + init_idx + num_future_forcing_steps + 1 + forcing_window = FORCING_VALUES_FORECAST[INIT_STEPS][start_idx:end_idx] expected_forcing_values.append(forcing_window) + # init_states: (2, N_grid, d_features) + # target_states: (ar_steps, N_grid, d_features) + # forcing: (ar_steps, N_grid, d_windowed_forcing * 2) + # target_times: (ar_steps,) + # Assertions np.testing.assert_array_equal(init_states[:, 0, 0], expected_init_states) np.testing.assert_array_equal( @@ -275,9 +310,9 @@ def test_time_slicing_forecast( # Verify the shape of the forcing data expected_forcing_shape = ( - ar_steps, - 1, - total_forcing_window + ar_steps, # Number of AR steps + 1, # Number of grid points + total_forcing_window # Total number of forcing steps in the window * 2, # Each windowed feature includes temporal embedding ) assert forcing.shape == expected_forcing_shape From cfe1e278ae16a4ec8e19dc3f2db79e976e8014d6 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Thu, 5 Dec 2024 13:28:34 +0100 Subject: [PATCH 129/190] time step can and should be retrieved in __init__ match of state with forcing/boundary is now done with .sel and "pad" renaming some variables to make the code easier to read fixing the temporal encoding to only include embeddings for window-size --- neural_lam/weather_dataset.py | 218 +++++++++++++++++++--------------- 1 file changed, 120 insertions(+), 98 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 93988ed7..0ddad878 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -145,9 +145,12 @@ def __init__( # Check time step consistency in state data if self.datastore.is_forecast: state_times = self.da_state.analysis_time + self.forecast_step_state = self._get_time_step( + self.da_state.elapsed_forecast_duration + ) else: state_times = self.da_state.time - _ = self._get_time_step(state_times) + self.time_step_state = self._get_time_step(state_times) # Check time coverage for forcing and boundary data if self.da_forcing is not None or self.da_boundary is not None: @@ -164,9 +167,14 @@ def __init__( # is matched to the state data if self.datastore.is_forecast: forcing_times = self.da_forcing.analysis_time + self.forecast_step_forcing = self._get_time_step( + self.da_forcing.elapsed_forecast_duration + ) else: forcing_times = self.da_forcing.time - self._get_time_step(forcing_times.values) + self.time_step_forcing = self._get_time_step( + forcing_times.values + ) if self.da_boundary is not None: # Boundary data is part of a separate datastore @@ -174,20 +182,25 @@ def __init__( # Check that the boundary data covers the required time range if self.datastore_boundary.is_forecast: boundary_times = self.da_boundary.analysis_time + self.forecast_step_boundary = self._get_time_step( + self.da_boundary.elapsed_forecast_duration + ) else: boundary_times = self.da_boundary.time - boundary_time_step = self._get_time_step(boundary_times.values) + self.time_step_boundary = self._get_time_step( + boundary_times.values + ) boundary_time_min = boundary_times.min().values boundary_time_max = boundary_times.max().values # Calculate required bounds for boundary using its time step boundary_required_time_min = ( state_time_min - - self.num_past_forcing_steps * boundary_time_step + - self.num_past_forcing_steps * self.time_step_boundary ) boundary_required_time_max = ( state_time_max - + self.num_future_forcing_steps * boundary_time_step + + self.num_future_forcing_steps * self.time_step_boundary ) if boundary_time_min > boundary_required_time_min: @@ -306,13 +319,14 @@ def _slice_time( da_state, idx, n_steps: int, - da_forcing_boundary=None, + da_forcing=None, num_past_steps=None, num_future_steps=None, + is_boundary=False, ): """ Produce time slices of the given dataarrays `da_state` (state) and - `da_forcing_boundary`. For the state data, slicing is done + `da_forcing`. For the state data, slicing is done based on `idx`. For the forcing/boundary data, nearest neighbor matching is performed based on the state times. Additionally, the time difference between the matched forcing/boundary times and state times (in multiples @@ -328,7 +342,7 @@ def _slice_time( data. n_steps : int The number of time steps to include in the sample. - da_forcing_boundary : xr.DataArray + da_forcing : xr.DataArray The forcing/boundary dataarray to slice. num_past_steps : int, optional The number of past time steps to include in the forcing/boundary @@ -336,13 +350,15 @@ def _slice_time( num_future_steps : int, optional The number of future time steps to include in the forcing/boundary data. Default is `None`. + is_boundary : bool, optional + Whether the data is boundary data. Default is `False`. Returns ------- da_state_sliced : xr.DataArray The sliced state dataarray with dims ('time', 'grid_index', 'state_feature'). - da_forcing_boundary_matched : xr.DataArray + da_forcing_matched : xr.DataArray The sliced state dataarray with dims ('time', 'grid_index', 'forcing/boundary_feature_windowed'). If no forcing/boundary data is provided, this will be `None`. @@ -372,9 +388,6 @@ def _slice_time( da_state_sliced = da_state_sliced.swap_dims( {"elapsed_forecast_duration": "time"} ) - # Asserting that the forecast time step is consistent - # In init this was only done for the analysis_time - self._get_time_step(da_state_sliced.time) else: # For analysis data we slice the time dimension directly. The offset @@ -384,43 +397,36 @@ def _slice_time( end_idx = idx + max(init_steps, num_past_steps) + n_steps da_state_sliced = da_state.isel(time=slice(start_idx, end_idx)) - if da_forcing_boundary is None: + if da_forcing is None: return da_state_sliced, None # Get the state times and its temporal resolution for matching with - # forcing data. No need to self._get_time_step as we have already - # checked the time step consistency in the state data. + # forcing data. state_times = da_state_sliced["time"] - state_time_step = state_times.values[1] - state_times.values[0] - + da_list = [] # Here we cannot check 'self.datastore.is_forecast' directly because we # might be dealing with a datastore_boundary - if "analysis_time" in da_forcing_boundary.dims: - # Select the closest analysis time in the forcing/boundary data - # This is mostly relevant for boundary data where the time steps - # are not necessarily the same as the state data. But still fast - # enough for forcing data where the time steps are the same. - idx = np.abs( - da_forcing_boundary.analysis_time.values - - self.da_state.analysis_time.values[idx] - ).argmin() - # Add a 'time' dimension using the actual forecast times - offset = max(init_steps, num_past_steps) - da_list = [] - for step in range(n_steps): - start_idx = offset + step - num_past_steps - end_idx = offset + step + num_future_steps + if "analysis_time" in da_forcing.dims: + # For forecast data with analysis_time and elapsed_forecast_duration + # Select the closest analysis_time in the past in the + # forcing/boundary data + offset = max(0, num_past_steps - init_steps) + state_time = state_times[init_steps].values + forcing_analysis_time_idx = da_forcing.analysis_time.get_index( + "analysis_time" + ).get_indexer([state_time], method="pad")[0] + for step_idx in range(init_steps, len(state_times)): + start_idx = offset + step_idx - num_past_steps + end_idx = offset + step_idx + num_future_steps + 1 current_time = ( - da_forcing_boundary.analysis_time[idx] - + da_forcing_boundary.elapsed_forecast_duration[ - offset + step - ] + forcing_analysis_time_idx + + da_forcing.elapsed_forecast_duration[step_idx] ) - da_sliced = da_forcing_boundary.isel( - analysis_time=idx, - elapsed_forecast_duration=slice(start_idx, end_idx + 1), + da_sliced = da_forcing.isel( + analysis_time=forcing_analysis_time_idx, + elapsed_forecast_duration=slice(start_idx, end_idx), ) da_sliced = da_sliced.rename( @@ -438,67 +444,75 @@ def _slice_time( da_list.append(da_sliced) - # Generate temporal embedding `time_diff_steps` for the - # forcing/boundary data. This is the time difference in multiples - # of state time steps between the forcing/boundary time and the - # state time. - da_forcing_boundary_matched = xr.concat(da_list, dim="time") - forcing_time_step = ( - da_forcing_boundary_matched.time.values[1] - - da_forcing_boundary_matched.time.values[0] - ) - # Since all time, grid_index and forcing_features share the same - # temporal_embedding we can just use the first one - da_forcing_boundary_matched["window"] = da_forcing_boundary_matched[ - "window" - ] * (forcing_time_step / state_time_step) - time_diff_steps = da_forcing_boundary_matched.isel( - grid_index=0, forcing_feature=0, time=0 - ).window.values + else: + for idx_time in range(init_steps, len(state_times)): + state_time = state_times[idx_time].values + + # Select the closest time in the past from forcing data using + # sel with method="pad" + forcing_time_idx = da_forcing.time.get_index( + "time" + ).get_indexer([state_time], method="pad")[0] + + # Use isel to select the window + da_window = da_forcing.isel( + time=slice( + forcing_time_idx - num_past_steps, + forcing_time_idx + num_future_steps + 1, + ), + ) + da_window = da_window.rename({"time": "window"}) + + # Assign 'window' coordinate + da_window = da_window.assign_coords( + window=np.arange(-num_past_steps, num_future_steps + 1) + ) + + da_window = da_window.expand_dims(dim={"time": [state_time]}) + + da_list.append(da_window) + + da_forcing_matched = xr.concat(da_list, dim="time") + + # Generate temporal embedding `time_diff_steps` for the + # forcing/boundary data. This is the time difference in multiples + # of state time steps between the forcing/boundary time and the + # state time + + if is_boundary: + if self.datastore_boundary.is_forecast: + boundary_time_step = self.forecast_step_boundary + state_time_step = self.forecast_step_state + else: + boundary_time_step = self.time_step_boundary + state_time_step = self.time_step_state + time_diff_steps = ( + da_forcing_matched["window"] + * (boundary_time_step / state_time_step), + ) else: - # For analysis data, we slice the time dimension directly. The - # offset is only relevant for the very first (and last) samples in - # the dataset. - forcing_times = da_forcing_boundary["time"] - - # Compute time differences between forcing and state times - # (in multiples of state time steps) - # Retrieve the indices of the closest times in the forcing data - time_deltas = ( - forcing_times.values[:, np.newaxis] - - state_times.values[np.newaxis, :] - ) / state_time_step - idx_min = np.abs(time_deltas).argmin(axis=0) - - # Get the time differences for windowed time steps - they are - # used as temporal embeddings and concatenated to the forcing - # features later. All features share the same temporal embedding - time_diff_steps = time_deltas[ - idx_min[init_steps] - - num_past_steps : idx_min[init_steps] - + num_future_steps - + 1, - init_steps, - ] - - # Create window dimension for forcing data to stack later - window_size = num_past_steps + num_future_steps + 1 - da_forcing_boundary_windowed = da_forcing_boundary.rolling( - time=window_size, center=False - ).construct(window_dim="window") - da_forcing_boundary_matched = da_forcing_boundary_windowed.isel( - time=idx_min[init_steps:] + if self.datastore.is_forecast: + forcing_time_step = self.forecast_step_forcing + state_time_step = self.forecast_step_state + else: + forcing_time_step = self.time_step_forcing + state_time_step = self.time_step_state + time_diff_steps = ( + da_forcing_matched["window"] + * (forcing_time_step / state_time_step), ) - + time_diff_steps = da_forcing_matched.isel( + grid_index=0, forcing_feature=0 + ).window.values # Add time difference as a new coordinate to concatenate to the # forcing features later as temporal embedding - da_forcing_boundary_matched["time_diff_steps"] = ( + da_forcing_matched["time_diff_steps"] = ( ("window"), time_diff_steps, ) - return da_state_sliced, da_forcing_boundary_matched + return da_state_sliced, da_forcing_matched def _process_windowed_data(self, da_windowed, da_state, da_target_times): """Helper function to process windowed data. This function stacks the @@ -523,16 +537,21 @@ def _process_windowed_data(self, da_windowed, da_state, da_target_times): """ stacked_dim = "forcing_feature_windowed" if da_windowed is not None: + window_size = da_windowed.window.size # Stack the 'feature' and 'window' dimensions and add the # time step differences to the existing features as a temporal # embedding da_windowed = da_windowed.stack( {stacked_dim: ("forcing_feature", "window")} ) - # All data variables share the same temporal embedding, hence - # only the first one is used + # Add the time step differences as a new feature to the windowed + # data + time_diff_steps = da_windowed["time_diff_steps"].isel( + forcing_feature_windowed=slice(0, window_size) + ) + # All data variables share the same temporal embedding da_windowed = xr.concat( - [da_windowed, da_windowed.time_diff_steps], + [da_windowed, time_diff_steps], dim="forcing_feature_windowed", ) else: @@ -587,16 +606,19 @@ def _build_item_dataarrays(self, idx): else: da_boundary = None - # if da_forcing_boundary is None, the function will return None for - # da_forcing_windowed + # This function will return a slice of the state data and the forcing + # and boundary data (if provided) for one sample (idx). + # If da_forcing is None, the function will return None for + # da_forcing_windowed. if da_boundary is not None: _, da_boundary_windowed = self._slice_time( da_state=da_state, idx=idx, n_steps=self.ar_steps, - da_forcing_boundary=da_boundary, + da_forcing=da_boundary, num_future_steps=self.num_future_boundary_steps, num_past_steps=self.num_past_boundary_steps, + is_boundary=True, ) else: da_boundary_windowed = None @@ -607,7 +629,7 @@ def _build_item_dataarrays(self, idx): da_state=da_state, idx=idx, n_steps=self.ar_steps, - da_forcing_boundary=da_forcing, + da_forcing=da_forcing, num_future_steps=self.num_future_forcing_steps, num_past_steps=self.num_past_forcing_steps, ) From e4e4e3789764c1a08270b41fb4c15dcace146fa5 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Wed, 4 Dec 2024 17:46:52 +0100 Subject: [PATCH 130/190] Fix dataset issue in npy stat script --- .../datastore/npyfilesmeps/compute_standardization_stats.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py b/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py index 4207812f..1f1c6943 100644 --- a/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py +++ b/neural_lam/datastore/npyfilesmeps/compute_standardization_stats.py @@ -202,7 +202,7 @@ def main( print("Computing mean and std.-dev. for parameters...") means, squares, flux_means, flux_squares = [], [], [], [] - for init_batch, target_batch, forcing_batch, _ in tqdm(loader): + for init_batch, target_batch, forcing_batch, _, _ in tqdm(loader): if distributed: init_batch, target_batch, forcing_batch = ( init_batch.to(device), @@ -276,6 +276,7 @@ def main( print("Computing mean and std.-dev. for one-step differences...") ds_standard = WeatherDataset( datastore=datastore, + datastore_boundary=None, split="train", ar_steps=ar_steps, standardize=True, @@ -304,7 +305,7 @@ def main( diff_means, diff_squares = [], [] - for init_batch, target_batch, _, _ in tqdm( + for init_batch, target_batch, _, _, _ in tqdm( loader_standard, disable=rank != 0 ): if distributed: From 29063c8f3227bbf5fc054983d485155eab9cc06c Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Thu, 5 Dec 2024 14:16:17 +0100 Subject: [PATCH 131/190] Adjust forcing dimensionalities after fix --- neural_lam/models/ar_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 9773c7b1..1edb7931 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -109,7 +109,7 @@ def __init__( 2 * self.grid_output_dim + grid_static_dim # Temporal Embedding counts as one additional forcing_feature - + 2*(num_forcing_vars) + + (num_forcing_vars + 1) * (num_past_forcing_steps + num_future_forcing_steps + 1) ) @@ -144,7 +144,7 @@ def __init__( self.boundary_dim = ( boundary_static_dim # Temporal Embedding counts as one additional forcing_feature - + 2*(num_boundary_forcing_vars) + + (num_boundary_forcing_vars + 1) * (num_past_boundary_steps + num_future_boundary_steps + 1) ) From f8613da77e0e2a040a05307210c7439f1660e43d Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Thu, 5 Dec 2024 15:14:28 +0100 Subject: [PATCH 132/190] added static feature to era5 boundary test datastore --- .../era5.datastore.yaml | 23 ++++++++++++++++++- .../era5.datastore.yaml | 23 ++++++++++++++++++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml index 587d7879..c83489c6 100644 --- a/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml +++ b/tests/datastore_examples/mdp/era5_1000hPa_danra_100m_winds/era5.datastore.yaml @@ -3,6 +3,7 @@ dataset_version: v1.0.0 output: variables: + static: [grid_index, static_feature] forcing: [time, grid_index, forcing_feature] coord_ranges: time: @@ -59,7 +60,7 @@ inputs: path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-6h-64x32_equiangular_conservative.zarr' dims: [time, longitude, latitude, level] variables: - - mean_surface_net_short_wave_radiation_flux + - mean_sea_level_pressure dim_mapping: time: method: rename @@ -78,6 +79,26 @@ inputs: dims: [x, y] target_output_variable: forcing + era5_static: + path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-6h-64x32_equiangular_conservative.zarr' + dims: [time, longitude, latitude, level] + variables: + - land_sea_mask + dim_mapping: + x: + method: rename + dim: longitude + y: + method: rename + dim: latitude + static_feature: + method: stack_variables_by_var_name + name_format: "{var_name}" + grid_index: + method: stack + dims: [x, y] + target_output_variable: static + extra: projection: class_name: PlateCarree diff --git a/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml b/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml index 587d7879..c83489c6 100644 --- a/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml +++ b/tests/datastore_examples/npyfilesmeps/era5_1000hPa_temp_meps_example_reduced/era5.datastore.yaml @@ -3,6 +3,7 @@ dataset_version: v1.0.0 output: variables: + static: [grid_index, static_feature] forcing: [time, grid_index, forcing_feature] coord_ranges: time: @@ -59,7 +60,7 @@ inputs: path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-6h-64x32_equiangular_conservative.zarr' dims: [time, longitude, latitude, level] variables: - - mean_surface_net_short_wave_radiation_flux + - mean_sea_level_pressure dim_mapping: time: method: rename @@ -78,6 +79,26 @@ inputs: dims: [x, y] target_output_variable: forcing + era5_static: + path: 'gs://weatherbench2/datasets/era5/1959-2023_01_10-6h-64x32_equiangular_conservative.zarr' + dims: [time, longitude, latitude, level] + variables: + - land_sea_mask + dim_mapping: + x: + method: rename + dim: longitude + y: + method: rename + dim: latitude + static_feature: + method: stack_variables_by_var_name + name_format: "{var_name}" + grid_index: + method: stack + dims: [x, y] + target_output_variable: static + extra: projection: class_name: PlateCarree From 48558b5f40ff9220fb6494faff499a17bd0253b6 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Thu, 5 Dec 2024 17:29:16 +0100 Subject: [PATCH 133/190] Expand graph creation script with flexible python interface to wmg --- neural_lam/build_rectangular_graph.py | 136 +++++++++++++++++++++----- 1 file changed, 113 insertions(+), 23 deletions(-) diff --git a/neural_lam/build_rectangular_graph.py b/neural_lam/build_rectangular_graph.py index 6cdd3ba7..551b5e75 100644 --- a/neural_lam/build_rectangular_graph.py +++ b/neural_lam/build_rectangular_graph.py @@ -19,6 +19,9 @@ def main(input_args=None): + """ + Build rectangular graph from archetype, using cmd-line arguments. + """ parser = argparse.ArgumentParser( description="Rectangular graph generation using weather-models-graph", formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -82,6 +85,73 @@ def main(input_args=None): config_path=args.config_path ) + assert ( + args.archetype in WMG_ARCHETYPES + ), f"Unknown archetype: {args.archetype}" + archetype_create_func = WMG_ARCHETYPES[args.archetype] + + create_kwargs = { + "mesh_node_distance": args.mesh_node_distance, + } + + if args.archetype != "keisler": + # Add additional multi-level kwargs + create_kwargs.update( + { + "level_refinement_factor": args.level_refinement_factor, + "max_num_levels": args.max_num_levels, + } + ) + + return _build_wmg_graph( + datastore=datastore, + datastore_boundary=datastore_boundary, + graph_build_func=archetype_create_func, + kwargs=create_kwargs, + graph_name=args.graph_name, + ) + + +def _build_wmg_graph( + datastore, + datastore_boundary, + graph_build_func, + kwargs, + graph_name, +): + """ + Build a graph using WMG in a way that's compatible with neural-lam. + Given datastores are used for coordinates and decode masking. + The given graph building function from WMG should be used, with kwargs. + + Parameters + ---------- + datastore : BaseDatastore + Datastore representing interior region of grid + datastore_boundary : BaseDatastore or None + Datastore representing boundary region, or None if no boundary forcing + graph_build_func + Function from WMG to use to build graph + kwargs : dict + Keyword arguments to feed to graph_build_func. Should not include + coords, coords_crs, graph_crs, return_components or decode_mask, as + these are here derived in a consistent way from the datastores. + graph_name : str + Name to save the graph as. + """ + + for derived_kwarg in ( + "coords", + "coords_crs", + "graph_crs", + "return_components", + "decode_mask", + ): + assert derived_kwarg not in kwargs, ( + f"Argument {derived_kwarg} should not be manually given when " + "building rectangular graph." + ) + # Load grid positions coords = utils.get_stacked_lat_lons(datastore, datastore_boundary) # (num_nodes_full, 2) @@ -104,54 +174,46 @@ def main(input_args=None): axis=0, ) - # Build graph - assert ( - args.archetype in WMG_ARCHETYPES - ), f"Unknown archetype: {args.archetype}" - archetype_create_func = WMG_ARCHETYPES[args.archetype] - + # Set up all kwargs create_kwargs = { "coords": coords, - "mesh_node_distance": args.mesh_node_distance, "decode_mask": decode_mask, "graph_crs": graph_crs, "coords_crs": coords_crs, "return_components": True, } - if args.archetype != "keisler": - # Add additional multi-level kwargs - create_kwargs.update( - { - "level_refinement_factor": args.level_refinement_factor, - "max_num_levels": args.max_num_levels, - } - ) + create_kwargs.update(kwargs) - graph_comp = archetype_create_func(**create_kwargs) + # Build graph + graph_comp = graph_build_func(**create_kwargs) print("Created graph:") for name, subgraph in graph_comp.items(): print(f"{name}: {subgraph}") - # Save graph - graph_dir_path = os.path.join( - datastore.root_path, "graphs", args.graph_name + # Need to know if hierarchical for saving + hierarchical = (graph_build_func == WMG_ARCHETYPES["hierarchical"]) or ( + "m2m_connectivity" in kwargs + and kwargs["m2m_connectivity"] == "hierarchical" ) + + # Save graph + graph_dir_path = os.path.join(datastore.root_path, "graphs", graph_name) os.makedirs(graph_dir_path, exist_ok=True) for component, graph in graph_comp.items(): # This seems like a bit of a hack, maybe better if saving in wmg # was made consistent with nl if component == "m2m": - if args.archetype == "hierarchical": + if hierarchical: # Split by direction m2m_direction_comp = wmg.split_graph_by_edge_attribute( graph, attr="direction" ) - for direction, graph in m2m_direction_comp.items(): + for direction, dir_graph in m2m_direction_comp.items(): if direction == "same": # Name just m2m to be consistent with non-hierarchical wmg.save.to_pyg( - graph=graph, + graph=dir_graph, name="m2m", list_from_attribute="level", edge_features=["len", "vdiff"], @@ -160,7 +222,7 @@ def main(input_args=None): else: # up and down directions wmg.save.to_pyg( - graph=graph, + graph=dir_graph, name=f"mesh_{direction}", list_from_attribute="levels", edge_features=["len", "vdiff"], @@ -183,5 +245,33 @@ def main(input_args=None): ) +def build_graph(datastore, datastore_boundary, graph_name, **kwargs): + """ + Function that can be used for more fine-grained control of graph + construction. Directly uses wmg.create.base.create_all_graph_components, + with kwargs being passed on directly to there. + + Parameters + ---------- + datastore : BaseDatastore + Datastore representing interior region of grid + datastore_boundary : BaseDatastore or None + Datastore representing boundary region, or None if no boundary forcing + graph_name : str + Name to save the graph as. + **kwargs + Keyword arguments that are passed on to + wmg.create.base.create_all_graph_components. See WMG for accepted + values for these. + """ + return _build_wmg_graph( + datastore=datastore, + datastore_boundary=datastore_boundary, + graph_build_func=wmg.create.base.create_all_graph_components, + kwargs=kwargs, + graph_name=graph_name, + ) + + if __name__ == "__main__": main() From 1a128266fd144eada9d8e74724402c6e0e84267d Mon Sep 17 00:00:00 2001 From: Joel Oskarsson Date: Fri, 6 Dec 2024 16:04:49 +0100 Subject: [PATCH 134/190] Change wandb env var to properly disable at start of testing (#94) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Describe your changes Tests are not working properly as wandb without any user logged in are making them crash. This should not happen, since we disable WANDB with env var when running the tests, but that variable is wrong. This is an attempt to fix that, to make sure wandb is disabled when running tests. ## Issue Link None ## Type of change - [x] 🐛 Bug fix (non-breaking change that fixes an issue) - [ ] ✨ New feature (non-breaking change that adds functionality) - [ ] 💥 Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] 📖 Documentation (Addition or improvements to documentation) ## Checklist before requesting a review - [x] My branch is up-to-date with the target branch - if not update your fork with the changes from the target branch (use `pull` with `--rebase` option if possible). - [ ] I have performed a self-review of my code - [ ] For any new/modified functions/classes I have added docstrings that clearly describe its purpose, expected inputs and returned values - [ ] I have placed in-line comments to clarify the intent of any hard-to-understand passages of my code - [ ] I have updated the [README](README.MD) to cover introduced code changes - [ ] I have added tests that prove my fix is effective or that my feature works - [x] I have given the PR a name that clearly describes the change, written in imperative form ([context](https://www.gitkraken.com/learn/git/best-practices/git-commit-message#using-imperative-verb-form)). - [x] I have requested a reviewer and an assignee (assignee is responsible for merging). This applies only if you have write access to the repo, otherwise feel free to tag a maintainer to add a reviewer and assignee. ## Checklist for reviewers Each PR comes with its own improvements and flaws. The reviewer should check the following: - [ ] the code is readable - [ ] the code is well tested - [ ] the code is documented (including return types and parameters) - [ ] the code is easy to maintain ## Author checklist after completed review - [x] I have added a line to the CHANGELOG describing this change, in a section reflecting type of change (add section where missing): - *added*: when you have added new functionality - *changed*: when default behaviour of the code has been changed - *fixes*: when your contribution fixes a bug ## Checklist for assignee - [x] PR is up to date with the base branch - [x] the tests pass - [x] author has added an entry to the changelog (and designated the change as *added*, *changed* or *fixed*) - Once the PR is ready to be merged, squash commits and merge the PR. --- CHANGELOG.md | 2 ++ tests/conftest.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01d4cac9..32961b16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fix wandb environment variable disabling wandb during tests. Now correctly uses WANDB_MODE=disabled. [\#94](https://github.com/mllam/neural-lam/pull/94) @joeloskarsson + - Fix bugs introduced with datastores functionality relating visualation plots [\#91](https://github.com/mllam/neural-lam/pull/91) @leifdenby ## [v0.2.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.2.0) diff --git a/tests/conftest.py b/tests/conftest.py index 6f579621..5d799c73 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,7 +17,7 @@ # Disable weights and biases to avoid unnecessary logging # and to avoid having to deal with authentication -os.environ["WANDB_DISABLED"] = "true" +os.environ["WANDB_MODE"] = "disabled" DATASTORE_EXAMPLES_ROOT_PATH = Path("tests/datastore_examples") From f48d2b096ddefad8b15861424935a6b04351d487 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Fri, 6 Dec 2024 17:40:44 +0100 Subject: [PATCH 135/190] Change graph creation test to use new script --- neural_lam/build_rectangular_graph.py | 52 +++++++-- neural_lam/utils.py | 2 +- tests/test_graph_creation.py | 161 ++++++++++++++------------ tests/test_training.py | 2 +- 4 files changed, 133 insertions(+), 84 deletions(-) diff --git a/neural_lam/build_rectangular_graph.py b/neural_lam/build_rectangular_graph.py index 551b5e75..7934e98c 100644 --- a/neural_lam/build_rectangular_graph.py +++ b/neural_lam/build_rectangular_graph.py @@ -85,11 +85,6 @@ def main(input_args=None): config_path=args.config_path ) - assert ( - args.archetype in WMG_ARCHETYPES - ), f"Unknown archetype: {args.archetype}" - archetype_create_func = WMG_ARCHETYPES[args.archetype] - create_kwargs = { "mesh_node_distance": args.mesh_node_distance, } @@ -103,12 +98,12 @@ def main(input_args=None): } ) - return _build_wmg_graph( + return build_graph_from_archetype( datastore=datastore, datastore_boundary=datastore_boundary, - graph_build_func=archetype_create_func, - kwargs=create_kwargs, graph_name=args.graph_name, + archetype=args.archetype, + **create_kwargs, ) @@ -116,8 +111,8 @@ def _build_wmg_graph( datastore, datastore_boundary, graph_build_func, - kwargs, graph_name, + kwargs, ): """ Build a graph using WMG in a way that's compatible with neural-lam. @@ -245,6 +240,43 @@ def _build_wmg_graph( ) +def build_graph_from_archetype( + datastore, datastore_boundary, graph_name, archetype, **kwargs +): + """ + Function that builds graph using wmg archetype. + Uses archetype functions from wmg.create.archetype with kwargs being passed + on directly to those functions. + + Parameters + ---------- + datastore : BaseDatastore + Datastore representing interior region of grid + datastore_boundary : BaseDatastore or None + Datastore representing boundary region, or None if no boundary forcing + graph_name : str + Name to save the graph as. + archetype : str + Archetype to build. Must be one of "keisler", "graphcast" + or "hierarchical" + **kwargs + Keyword arguments that are passed on to + wmg.create.base.create_all_graph_components. See WMG for accepted + values for these. + """ + + assert archetype in WMG_ARCHETYPES, f"Unknown archetype: {archetype}" + archetype_create_func = WMG_ARCHETYPES[archetype] + + return _build_wmg_graph( + datastore=datastore, + datastore_boundary=datastore_boundary, + graph_build_func=archetype_create_func, + graph_name=graph_name, + kwargs=kwargs, + ) + + def build_graph(datastore, datastore_boundary, graph_name, **kwargs): """ Function that can be used for more fine-grained control of graph @@ -268,8 +300,8 @@ def build_graph(datastore, datastore_boundary, graph_name, **kwargs): datastore=datastore, datastore_boundary=datastore_boundary, graph_build_func=wmg.create.base.create_all_graph_components, - kwargs=kwargs, graph_name=graph_name, + kwargs=kwargs, ) diff --git a/neural_lam/utils.py b/neural_lam/utils.py index 9b169a9f..1ba36fd6 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -52,7 +52,7 @@ def load_graph(graph_dir_path, device="cpu"): - m2m_features.pt - g2m_features.pt - m2g_features.pt - - mesh_features.pt + - m2m_node_features.pt And in addition for hierarchical graphs: - mesh_up_edge_index.pt diff --git a/tests/test_graph_creation.py b/tests/test_graph_creation.py index 93a7a55f..1c826de5 100644 --- a/tests/test_graph_creation.py +++ b/tests/test_graph_creation.py @@ -1,5 +1,4 @@ # Standard library -import tempfile from pathlib import Path # Third-party @@ -7,39 +6,55 @@ import torch # First-party -from neural_lam.create_graph import create_graph_from_datastore +from neural_lam.build_rectangular_graph import ( + build_graph, + build_graph_from_archetype, +) from neural_lam.datastore import DATASTORES -from neural_lam.datastore.base import BaseRegularGridDatastore -from tests.conftest import init_datastore_example +from tests.conftest import ( + DATASTORES_BOUNDARY_EXAMPLES, + init_datastore_boundary_example, + init_datastore_example, +) -@pytest.mark.parametrize("graph_name", ["1level", "multiscale", "hierarchical"]) @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) -def test_graph_creation(datastore_name, graph_name): +@pytest.mark.parametrize( + "datastore_boundary_name", + list(DATASTORES_BOUNDARY_EXAMPLES.keys()) + [None], +) +@pytest.mark.parametrize("archetype", ["keisler", "graphcast", "hierarchical"]) +def test_graph_creation(datastore_name, datastore_boundary_name, archetype): """Check that the `create_ graph_from_datastore` function is implemented. - And that the graph is created in the correct location. """ datastore = init_datastore_example(datastore_name) - if not isinstance(datastore, BaseRegularGridDatastore): - pytest.skip( - f"Skipping test for {datastore_name} as it is not a regular " - "grid datastore." + if datastore_boundary_name is None: + # LAM scale + mesh_node_distance = 500000 + datastore_boundary = None + else: + # Global scale, ERA5 coords flattened with proj + mesh_node_distance = 10000000 + datastore_boundary = init_datastore_boundary_example( + datastore_boundary_name ) - if graph_name == "hierarchical": - hierarchical = True - n_max_levels = 3 - elif graph_name == "multiscale": - hierarchical = False - n_max_levels = 3 - elif graph_name == "1level": - hierarchical = False - n_max_levels = 1 - else: - raise ValueError(f"Unknown graph_name: {graph_name}") + create_kwargs = { + "mesh_node_distance": mesh_node_distance, + } + + num_levels = 1 + if archetype != "keisler": + # Add additional multi-level kwargs + create_kwargs.update( + { + "level_refinement_factor": 3, + "max_num_levels": num_levels, + } + ) required_graph_files = [ "m2m_edge_index.pt", @@ -48,8 +63,10 @@ def test_graph_creation(datastore_name, graph_name): "m2m_features.pt", "g2m_features.pt", "m2g_features.pt", - "mesh_features.pt", + "m2m_node_features.pt", ] + + hierarchical = archetype == "hierarchical" if hierarchical: required_graph_files.extend( [ @@ -59,61 +76,61 @@ def test_graph_creation(datastore_name, graph_name): "mesh_down_features.pt", ] ) + num_levels = 3 # TODO: check that the number of edges is consistent over the files, for # now we just check the number of features d_features = 3 d_mesh_static = 2 - with tempfile.TemporaryDirectory() as tmpdir: - graph_dir_path = Path(tmpdir) / "graph" / graph_name - - create_graph_from_datastore( - datastore=datastore, - output_root_path=str(graph_dir_path), - hierarchical=hierarchical, - n_max_levels=n_max_levels, - ) - - assert graph_dir_path.exists() - - # check that all the required files are present - for file_name in required_graph_files: - assert (graph_dir_path / file_name).exists() - - # try to load each and ensure they have the right shape - for file_name in required_graph_files: - file_id = Path(file_name).stem # remove the extension - result = torch.load(graph_dir_path / file_name) + # Name graph + graph_name = f"{datastore_name}_{datastore_boundary_name}_{archetype}" + + # Saved in datastore + # TODO: Maybe save in tmp dir? + graph_dir_path = Path(datastore.root_path) / "graphs" / graph_name + + build_graph_from_archetype( + datastore, datastore_boundary, graph_name, archetype, **create_kwargs + ) + + assert graph_dir_path.exists() + + # check that all the required files are present + for file_name in required_graph_files: + assert (graph_dir_path / file_name).exists() + + # try to load each and ensure they have the right shape + for file_name in required_graph_files: + file_id = Path(file_name).stem # remove the extension + result = torch.load(graph_dir_path / file_name) + + if file_id.startswith("g2m") or file_id.startswith("m2g"): + assert isinstance(result, torch.Tensor) + + if file_id.endswith("_index"): + assert result.shape[0] == 2 # adjacency matrix uses two rows + elif file_id.endswith("_features"): + assert result.shape[1] == d_features + + elif file_id.startswith("m2m") or file_id.startswith("mesh"): + assert isinstance(result, list) + if not hierarchical: + assert len(result) == 1 + else: + if file_id.startswith("mesh_up") or file_id.startswith( + "mesh_down" + ): + assert len(result) == num_levels - 1 + else: + assert len(result) == num_levels - if file_id.startswith("g2m") or file_id.startswith("m2g"): - assert isinstance(result, torch.Tensor) + for r in result: + assert isinstance(r, torch.Tensor) - if file_id.endswith("_index"): - assert ( - result.shape[0] == 2 - ) # adjacency matrix uses two rows + if file_id == "m2m_node_features": + assert r.shape[1] == d_mesh_static + elif file_id.endswith("_index"): + assert r.shape[0] == 2 # adjacency matrix uses two rows elif file_id.endswith("_features"): - assert result.shape[1] == d_features - - elif file_id.startswith("m2m") or file_id.startswith("mesh"): - assert isinstance(result, list) - if not hierarchical: - assert len(result) == 1 - else: - if file_id.startswith("mesh_up") or file_id.startswith( - "mesh_down" - ): - assert len(result) == n_max_levels - 1 - else: - assert len(result) == n_max_levels - - for r in result: - assert isinstance(r, torch.Tensor) - - if file_id == "mesh_features": - assert r.shape[1] == d_mesh_static - elif file_id.endswith("_index"): - assert r.shape[0] == 2 # adjacency matrix uses two rows - elif file_id.endswith("_features"): - assert r.shape[1] == d_features + assert r.shape[1] == d_features diff --git a/tests/test_training.py b/tests/test_training.py index ca0ebf41..86b46e09 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -63,7 +63,7 @@ def test_training(datastore_name, datastore_boundary_name): graph_name = "1level" - graph_dir_path = Path(datastore.root_path) / "graph" / graph_name + graph_dir_path = Path(datastore.root_path) / "graphs" / graph_name if not graph_dir_path.exists(): create_graph_from_datastore( From 4044c09acb2f576953036ea51a82e7a11d8355a3 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Sat, 7 Dec 2024 05:35:11 +0100 Subject: [PATCH 136/190] Remove networkx dependency --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9607d1da..29e4ba22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,6 @@ dependencies = [ "scipy>=1.10.0", "pytorch-lightning>=2.0.3", "shapely>=2.0.1", - "networkx>=3.0", "Cartopy>=0.22.0", "pyproj>=3.4.1", "tueplots>=0.0.8", From cb3787d7d9f37f81d08012dd8d442ff63572fff2 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Sat, 7 Dec 2024 09:10:01 +0100 Subject: [PATCH 137/190] Use python 3.10 to be compatible with wmg --- .github/workflows/ci-pdm-install-and-test-gpu.yml | 4 ++-- .github/workflows/ci-pip-install-and-test-gpu.yml | 4 ++-- .github/workflows/pre-commit.yml | 2 +- pyproject.toml | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-pdm-install-and-test-gpu.yml b/.github/workflows/ci-pdm-install-and-test-gpu.yml index 43a701c2..54ab438b 100644 --- a/.github/workflows/ci-pdm-install-and-test-gpu.yml +++ b/.github/workflows/ci-pdm-install-and-test-gpu.yml @@ -13,10 +13,10 @@ jobs: - name: Checkout uses: actions/checkout@v2 - - name: Set up Python 3.9 + - name: Set up Python 3.10 uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: 3.10 - name: Install pdm run: | diff --git a/.github/workflows/ci-pip-install-and-test-gpu.yml b/.github/workflows/ci-pip-install-and-test-gpu.yml index 3afcca5a..efda1857 100644 --- a/.github/workflows/ci-pip-install-and-test-gpu.yml +++ b/.github/workflows/ci-pip-install-and-test-gpu.yml @@ -13,10 +13,10 @@ jobs: - name: Checkout uses: actions/checkout@v2 - - name: Set up Python 3.9 + - name: Set up Python 3.10 uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: 3.10 - name: Install torch (GPU CUDA 12.1) run: | diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 71e28ad7..4e12c314 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.10", "3.11"] steps: - uses: actions/checkout@v2 - name: Set up Python diff --git a/pyproject.toml b/pyproject.toml index 29e4ba22..e67f121a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ dependencies = [ "mllam-data-prep>=0.5.0", "weather-model-graphs>=0.2.0" ] -requires-python = ">=3.9" +requires-python = ">=3.10" [project.optional-dependencies] dev = ["pre-commit>=3.8.0", "pytest>=8.3.2", "pooch>=1.8.2", "gcsfs>=2021.10.0"] From a1f0f62fce853efc6bfc715d2ccfb847866d0283 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Sat, 7 Dec 2024 09:50:56 +0100 Subject: [PATCH 138/190] Start fixing tests --- pyproject.toml | 2 +- tests/conftest.py | 14 ++++++++++++++ tests/dummy_datastore.py | 2 +- tests/test_cli.py | 4 ++-- tests/test_datasets.py | 15 ++++++++++----- tests/test_graph_creation.py | 5 ++--- tests/test_training.py | 20 +++++++++++++------- 7 files changed, 43 insertions(+), 19 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e67f121a..c2cedaa1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ dependencies = [ "parse>=1.20.2", "dataclass-wizard<0.31.0", "mllam-data-prep>=0.5.0", - "weather-model-graphs>=0.2.0" + "weather-model-graphs @ git+https://github.com/joeloskarsson/weather-model-graphs.git@decoding_mask" ] requires-python = ">=3.10" diff --git a/tests/conftest.py b/tests/conftest.py index 15ee1590..c2dc2140 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,7 @@ from pathlib import Path # Third-party +import numpy as np import pooch import yaml @@ -11,6 +12,7 @@ from neural_lam.datastore.npyfilesmeps import ( compute_standardization_stats as compute_standardization_stats_meps, ) +from neural_lam.utils import get_stacked_xy # Local from .dummy_datastore import DummyDatastore @@ -121,3 +123,15 @@ def init_datastore_boundary_example(datastore_kind): ) return datastore_boundary + + +def get_test_mesh_dist(datastore, datastore_boundary): + """Compute a good mesh_node_distance for testing graph creation with + given datastores + """ + xy = get_stacked_xy(datastore, datastore_boundary) # (num_grid, 2) + # Compute minimum coordinate extent + min_extent = min(np.ptp(xy, axis=0)) + + # Want at least 10 mesh nodes in each direction + return min_extent / 10.0 diff --git a/tests/dummy_datastore.py b/tests/dummy_datastore.py index a958b8f5..2142b803 100644 --- a/tests/dummy_datastore.py +++ b/tests/dummy_datastore.py @@ -336,7 +336,7 @@ def get_dataarray( dim_order = self.expected_dim_order(category=category) return self.ds[category].transpose(*dim_order) - def get_xy(self, category: str, stacked: bool) -> ndarray: + def get_xy(self, category: str, stacked: bool = True) -> ndarray: """Return the x, y coordinates of the dataset. Parameters diff --git a/tests/test_cli.py b/tests/test_cli.py index 0dbd04a1..cd6b00eb 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,6 +1,6 @@ # First-party import neural_lam -import neural_lam.create_graph +import neural_lam.build_rectangular_graph import neural_lam.train_model @@ -8,5 +8,5 @@ def test_import(): """This test just ensures that each cli entry-point can be imported for now, eventually we should test their execution too.""" assert neural_lam is not None - assert neural_lam.create_graph is not None + assert neural_lam.build_rectangular_graph is not None assert neural_lam.train_model is not None diff --git a/tests/test_datasets.py b/tests/test_datasets.py index aa7b645d..748abe8a 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -9,13 +9,14 @@ # First-party from neural_lam import config as nlconfig -from neural_lam.create_graph import create_graph_from_datastore +from neural_lam.build_rectangular_graph import build_graph_from_archetype from neural_lam.datastore import DATASTORES from neural_lam.datastore.base import BaseRegularGridDatastore from neural_lam.models.graph_lam import GraphLAM from neural_lam.weather_dataset import WeatherDataset from tests.conftest import ( DATASTORES_BOUNDARY_EXAMPLES, + get_test_mesh_dist, init_datastore_boundary_example, init_datastore_example, ) @@ -225,14 +226,18 @@ class ModelArgs: args = ModelArgs() - graph_dir_path = Path(datastore.root_path) / "graph" / graph_name + graph_dir_path = Path(datastore.root_path) / "graphs" / graph_name def _create_graph(): if not graph_dir_path.exists(): - create_graph_from_datastore( + build_graph_from_archetype( datastore=datastore, - output_root_path=str(graph_dir_path), - n_max_levels=1, + datastore_boundary=datastore_boundary, + graph_name=graph_name, + archetype="keisler", + mesh_node_distance=get_test_mesh_dist( + datastore, datastore_boundary + ), ) if not isinstance(datastore, BaseRegularGridDatastore): diff --git a/tests/test_graph_creation.py b/tests/test_graph_creation.py index 1c826de5..3d6e079d 100644 --- a/tests/test_graph_creation.py +++ b/tests/test_graph_creation.py @@ -13,6 +13,7 @@ from neural_lam.datastore import DATASTORES from tests.conftest import ( DATASTORES_BOUNDARY_EXAMPLES, + get_test_mesh_dist, init_datastore_boundary_example, init_datastore_example, ) @@ -33,17 +34,15 @@ def test_graph_creation(datastore_name, datastore_boundary_name, archetype): if datastore_boundary_name is None: # LAM scale - mesh_node_distance = 500000 datastore_boundary = None else: # Global scale, ERA5 coords flattened with proj - mesh_node_distance = 10000000 datastore_boundary = init_datastore_boundary_example( datastore_boundary_name ) create_kwargs = { - "mesh_node_distance": mesh_node_distance, + "mesh_node_distance": get_test_mesh_dist(datastore, datastore_boundary), } num_levels = 1 diff --git a/tests/test_training.py b/tests/test_training.py index 86b46e09..5f2d43d3 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -9,13 +9,14 @@ # First-party from neural_lam import config as nlconfig -from neural_lam.create_graph import create_graph_from_datastore +from neural_lam.build_rectangular_graph import build_graph_from_archetype from neural_lam.datastore import DATASTORES from neural_lam.datastore.base import BaseRegularGridDatastore from neural_lam.models.graph_lam import GraphLAM from neural_lam.weather_dataset import WeatherDataModule from tests.conftest import ( DATASTORES_BOUNDARY_EXAMPLES, + get_test_mesh_dist, init_datastore_boundary_example, init_datastore_example, ) @@ -65,12 +66,17 @@ def test_training(datastore_name, datastore_boundary_name): graph_dir_path = Path(datastore.root_path) / "graphs" / graph_name - if not graph_dir_path.exists(): - create_graph_from_datastore( - datastore=datastore, - output_root_path=str(graph_dir_path), - n_max_levels=1, - ) + def _create_graph(): + if not graph_dir_path.exists(): + build_graph_from_archetype( + datastore=datastore, + datastore_boundary=datastore_boundary, + graph_name=graph_name, + archetype="keisler", + mesh_node_distance=get_test_mesh_dist( + datastore, datastore_boundary + ), + ) data_module = WeatherDataModule( datastore=datastore, From 797b86774e35ab488c8d3631693098b2d969b392 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Sat, 7 Dec 2024 10:28:46 +0100 Subject: [PATCH 139/190] Wrap up first version of new graph tests --- tests/conftest.py | 71 ++++++++++++++++ tests/test_datasets.py | 18 ++-- tests/test_graph_creation.py | 156 +++++++++++++++++++---------------- tests/test_training.py | 15 ++-- 4 files changed, 179 insertions(+), 81 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index c2dc2140..7dfb8ef8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,6 +5,7 @@ # Third-party import numpy as np import pooch +import torch import yaml # First-party @@ -135,3 +136,73 @@ def get_test_mesh_dist(datastore, datastore_boundary): # Want at least 10 mesh nodes in each direction return min_extent / 10.0 + + +def check_saved_graph(graph_dir_path, hierarchical): + """Perform all checking for a saved graph""" + required_graph_files = [ + "m2m_edge_index.pt", + "g2m_edge_index.pt", + "m2g_edge_index.pt", + "m2m_features.pt", + "g2m_features.pt", + "m2g_features.pt", + "m2m_node_features.pt", + ] + + if hierarchical: + required_graph_files.extend( + [ + "mesh_up_edge_index.pt", + "mesh_down_edge_index.pt", + "mesh_up_features.pt", + "mesh_down_features.pt", + ] + ) + num_levels = 3 + + # TODO: check that the number of edges is consistent over the files, for + # now we just check the number of features + d_features = 3 + d_mesh_static = 2 + + assert graph_dir_path.exists() + + # check that all the required files are present + for file_name in required_graph_files: + assert (graph_dir_path / file_name).exists() + + # try to load each and ensure they have the right shape + for file_name in required_graph_files: + file_id = Path(file_name).stem # remove the extension + result = torch.load(graph_dir_path / file_name) + + if file_id.startswith("g2m") or file_id.startswith("m2g"): + assert isinstance(result, torch.Tensor) + + if file_id.endswith("_index"): + assert result.shape[0] == 2 # adjacency matrix uses two rows + elif file_id.endswith("_features"): + assert result.shape[1] == d_features + + elif file_id.startswith("m2m") or file_id.startswith("mesh"): + assert isinstance(result, list) + if not hierarchical: + assert len(result) == 1 + else: + if file_id.startswith("mesh_up") or file_id.startswith( + "mesh_down" + ): + assert len(result) == num_levels - 1 + else: + assert len(result) == num_levels + + for r in result: + assert isinstance(r, torch.Tensor) + + if file_id == "m2m_node_features": + assert r.shape[1] == d_mesh_static + elif file_id.endswith("_index"): + assert r.shape[0] == 2 # adjacency matrix uses two rows + elif file_id.endswith("_features"): + assert r.shape[1] == d_features diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 748abe8a..5735f525 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -209,31 +209,34 @@ def test_single_batch(datastore_name, datastore_boundary_name, split): torch.device("cuda") if torch.cuda.is_available() else "cpu" ) # noqa - graph_name = "1level" + flat_graph_name = "1level" class ModelArgs: output_std = False loss = "mse" restore_opt = False n_example_pred = 1 - graph = graph_name + graph_name = flat_graph_name hidden_dim = 4 hidden_layers = 1 processor_layers = 2 mesh_aggr = "sum" num_past_forcing_steps = 1 num_future_forcing_steps = 1 + num_past_boundary_steps = 1 + num_future_boundary_steps = 1 + shared_grid_embedder = False args = ModelArgs() - graph_dir_path = Path(datastore.root_path) / "graphs" / graph_name + graph_dir_path = Path(datastore.root_path) / "graphs" / flat_graph_name def _create_graph(): if not graph_dir_path.exists(): build_graph_from_archetype( datastore=datastore, datastore_boundary=datastore_boundary, - graph_name=graph_name, + graph_name=flat_graph_name, archetype="keisler", mesh_node_distance=get_test_mesh_dist( datastore, datastore_boundary @@ -257,7 +260,12 @@ def _create_graph(): datastore=datastore, datastore_boundary=datastore_boundary, split=split ) - model = GraphLAM(args=args, datastore=datastore, config=config) # noqa + model = GraphLAM( + args=args, + datastore=datastore, + datastore_boundary=datastore_boundary, + config=config, + ) # noqa model_device = model.to(device_name) data_loader = DataLoader(dataset, batch_size=2) diff --git a/tests/test_graph_creation.py b/tests/test_graph_creation.py index 3d6e079d..8c2fa60b 100644 --- a/tests/test_graph_creation.py +++ b/tests/test_graph_creation.py @@ -3,7 +3,6 @@ # Third-party import pytest -import torch # First-party from neural_lam.build_rectangular_graph import ( @@ -13,6 +12,7 @@ from neural_lam.datastore import DATASTORES from tests.conftest import ( DATASTORES_BOUNDARY_EXAMPLES, + check_saved_graph, get_test_mesh_dist, init_datastore_boundary_example, init_datastore_example, @@ -25,8 +25,8 @@ list(DATASTORES_BOUNDARY_EXAMPLES.keys()) + [None], ) @pytest.mark.parametrize("archetype", ["keisler", "graphcast", "hierarchical"]) -def test_graph_creation(datastore_name, datastore_boundary_name, archetype): - """Check that the `create_ graph_from_datastore` function is implemented. +def test_build_archetype(datastore_name, datastore_boundary_name, archetype): + """Check that the `build_graph_from_archetype` function is implemented. And that the graph is created in the correct location. """ @@ -50,86 +50,102 @@ def test_graph_creation(datastore_name, datastore_boundary_name, archetype): # Add additional multi-level kwargs create_kwargs.update( { - "level_refinement_factor": 3, + "level_refinement_factor": 2, "max_num_levels": num_levels, } ) - required_graph_files = [ - "m2m_edge_index.pt", - "g2m_edge_index.pt", - "m2g_edge_index.pt", - "m2m_features.pt", - "g2m_features.pt", - "m2g_features.pt", - "m2m_node_features.pt", - ] + # Name graph + graph_name = f"{datastore_name}_{datastore_boundary_name}_{archetype}" + + # Saved in datastore + # TODO: Maybe save in tmp dir? + graph_dir_path = Path(datastore.root_path) / "graphs" / graph_name + + build_graph_from_archetype( + datastore, datastore_boundary, graph_name, archetype, **create_kwargs + ) hierarchical = archetype == "hierarchical" - if hierarchical: - required_graph_files.extend( - [ - "mesh_up_edge_index.pt", - "mesh_down_edge_index.pt", - "mesh_up_features.pt", - "mesh_down_features.pt", - ] + check_saved_graph(graph_dir_path, hierarchical) + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +@pytest.mark.parametrize( + "datastore_boundary_name", + list(DATASTORES_BOUNDARY_EXAMPLES.keys()) + [None], +) +@pytest.mark.parametrize( + "config_i, graph_kwargs", + enumerate( + [ + # Assortment of options + { + "m2m_connectivity": "flat", + "m2g_connectivity": "nearest_neighbour", + "g2m_connectivity": "nearest_neighbour", + "m2m_connectivity_kwargs": {}, + }, + { + "m2m_connectivity": "flat_multiscale", + "m2g_connectivity": "nearest_neighbours", + "g2m_connectivity": "within_radius", + "m2m_connectivity_kwargs": { + "level_refinement_factor": 2, + }, + "m2g_connectivity_kwargs": { + "max_num_neighbours": 4, + }, + "g2m_connectivity_kwargs": { + "rel_max_dist": 0.3, + }, + }, + { + "m2m_connectivity": "hierarchical", + "m2g_connectivity": "containing_rectangle", + "g2m_connectivity": "within_radius", + "m2m_connectivity_kwargs": { + "level_refinement_factor": 2, + }, + "m2g_connectivity_kwargs": {}, + "g2m_connectivity_kwargs": { + "rel_max_dist": 0.51, + }, + }, + ] + ), +) +def test_build_from_options( + datastore_name, datastore_boundary_name, config_i, graph_kwargs +): + """Check that the `build_graph_from_archetype` function is implemented. + And that the graph is created in the correct location. + + """ + datastore = init_datastore_example(datastore_name) + + if datastore_boundary_name is None: + # LAM scale + datastore_boundary = None + else: + # Global scale, ERA5 coords flattened with proj + datastore_boundary = init_datastore_boundary_example( + datastore_boundary_name ) - num_levels = 3 - # TODO: check that the number of edges is consistent over the files, for - # now we just check the number of features - d_features = 3 - d_mesh_static = 2 + # Insert mesh distance + graph_kwargs["m2m_connectivity_kwargs"][ + "mesh_node_distance" + ] = get_test_mesh_dist(datastore, datastore_boundary) # Name graph - graph_name = f"{datastore_name}_{datastore_boundary_name}_{archetype}" + graph_name = f"{datastore_name}_{datastore_boundary_name}_config{config_i}" # Saved in datastore # TODO: Maybe save in tmp dir? graph_dir_path = Path(datastore.root_path) / "graphs" / graph_name - build_graph_from_archetype( - datastore, datastore_boundary, graph_name, archetype, **create_kwargs - ) + build_graph(datastore, datastore_boundary, graph_name, **graph_kwargs) - assert graph_dir_path.exists() - - # check that all the required files are present - for file_name in required_graph_files: - assert (graph_dir_path / file_name).exists() - - # try to load each and ensure they have the right shape - for file_name in required_graph_files: - file_id = Path(file_name).stem # remove the extension - result = torch.load(graph_dir_path / file_name) - - if file_id.startswith("g2m") or file_id.startswith("m2g"): - assert isinstance(result, torch.Tensor) - - if file_id.endswith("_index"): - assert result.shape[0] == 2 # adjacency matrix uses two rows - elif file_id.endswith("_features"): - assert result.shape[1] == d_features - - elif file_id.startswith("m2m") or file_id.startswith("mesh"): - assert isinstance(result, list) - if not hierarchical: - assert len(result) == 1 - else: - if file_id.startswith("mesh_up") or file_id.startswith( - "mesh_down" - ): - assert len(result) == num_levels - 1 - else: - assert len(result) == num_levels - - for r in result: - assert isinstance(r, torch.Tensor) - - if file_id == "m2m_node_features": - assert r.shape[1] == d_mesh_static - elif file_id.endswith("_index"): - assert r.shape[0] == 2 # adjacency matrix uses two rows - elif file_id.endswith("_features"): - assert r.shape[1] == d_features + hierarchical = graph_kwargs["m2m_connectivity"] == "hierarchical" + check_saved_graph(graph_dir_path, hierarchical) diff --git a/tests/test_training.py b/tests/test_training.py index 5f2d43d3..4d07b087 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -62,16 +62,16 @@ def test_training(datastore_name, datastore_boundary_name): log_every_n_steps=1, ) - graph_name = "1level" + flat_graph_name = "1level" - graph_dir_path = Path(datastore.root_path) / "graphs" / graph_name + graph_dir_path = Path(datastore.root_path) / "graphs" / flat_graph_name def _create_graph(): if not graph_dir_path.exists(): build_graph_from_archetype( datastore=datastore, datastore_boundary=datastore_boundary, - graph_name=graph_name, + graph_name=flat_graph_name, archetype="keisler", mesh_node_distance=get_test_mesh_dist( datastore, datastore_boundary @@ -99,7 +99,7 @@ class ModelArgs: n_example_pred = 1 # XXX: this should be superfluous when we have already defined the # model object no? - graph = graph_name + graph_name = flat_graph_name hidden_dim = 4 hidden_layers = 1 processor_layers = 2 @@ -111,6 +111,7 @@ class ModelArgs: num_future_forcing_steps = 1 num_past_boundary_steps = 1 num_future_boundary_steps = 1 + shared_grid_embedder = False model_args = ModelArgs() @@ -120,10 +121,12 @@ class ModelArgs: ) ) - model = GraphLAM( # noqa + model = GraphLAM( args=model_args, datastore=datastore, + datastore_boundary=datastore_boundary, config=config, - ) + ) # noqa + wandb.init() trainer.fit(model=model, datamodule=data_module) From 9a9bf9183c7eaa0bf64639949a0fb0dfa312d15b Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Sat, 7 Dec 2024 16:19:31 +0100 Subject: [PATCH 140/190] Fix graph creation tests --- tests/conftest.py | 3 +-- tests/test_graph_creation.py | 18 +++++++++++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 7dfb8ef8..a21fffda 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -138,7 +138,7 @@ def get_test_mesh_dist(datastore, datastore_boundary): return min_extent / 10.0 -def check_saved_graph(graph_dir_path, hierarchical): +def check_saved_graph(graph_dir_path, hierarchical, num_levels=1): """Perform all checking for a saved graph""" required_graph_files = [ "m2m_edge_index.pt", @@ -159,7 +159,6 @@ def check_saved_graph(graph_dir_path, hierarchical): "mesh_down_features.pt", ] ) - num_levels = 3 # TODO: check that the number of edges is consistent over the files, for # now we just check the number of features diff --git a/tests/test_graph_creation.py b/tests/test_graph_creation.py index 8c2fa60b..acb625cb 100644 --- a/tests/test_graph_creation.py +++ b/tests/test_graph_creation.py @@ -28,7 +28,6 @@ def test_build_archetype(datastore_name, datastore_boundary_name, archetype): """Check that the `build_graph_from_archetype` function is implemented. And that the graph is created in the correct location. - """ datastore = init_datastore_example(datastore_name) @@ -45,12 +44,14 @@ def test_build_archetype(datastore_name, datastore_boundary_name, archetype): "mesh_node_distance": get_test_mesh_dist(datastore, datastore_boundary), } - num_levels = 1 - if archetype != "keisler": + if archetype == "keisler": + num_levels = 1 + else: # Add additional multi-level kwargs + num_levels = 2 create_kwargs.update( { - "level_refinement_factor": 2, + "level_refinement_factor": 3, "max_num_levels": num_levels, } ) @@ -67,7 +68,7 @@ def test_build_archetype(datastore_name, datastore_boundary_name, archetype): ) hierarchical = archetype == "hierarchical" - check_saved_graph(graph_dir_path, hierarchical) + check_saved_graph(graph_dir_path, hierarchical, num_levels) @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) @@ -91,7 +92,8 @@ def test_build_archetype(datastore_name, datastore_boundary_name, archetype): "m2g_connectivity": "nearest_neighbours", "g2m_connectivity": "within_radius", "m2m_connectivity_kwargs": { - "level_refinement_factor": 2, + "level_refinement_factor": 3, + "max_num_levels": None, }, "m2g_connectivity_kwargs": { "max_num_neighbours": 4, @@ -106,6 +108,7 @@ def test_build_archetype(datastore_name, datastore_boundary_name, archetype): "g2m_connectivity": "within_radius", "m2m_connectivity_kwargs": { "level_refinement_factor": 2, + "max_num_levels": 2, }, "m2g_connectivity_kwargs": {}, "g2m_connectivity_kwargs": { @@ -148,4 +151,5 @@ def test_build_from_options( build_graph(datastore, datastore_boundary, graph_name, **graph_kwargs) hierarchical = graph_kwargs["m2m_connectivity"] == "hierarchical" - check_saved_graph(graph_dir_path, hierarchical) + num_levels = 2 if hierarchical else 1 + check_saved_graph(graph_dir_path, hierarchical, num_levels) From e77c87b17fdbf59e1bc41a13371e8817d8a546c2 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Sat, 7 Dec 2024 16:39:19 +0100 Subject: [PATCH 141/190] Save grpahs in temporary dir during testing --- neural_lam/build_rectangular_graph.py | 31 ++++++++++++++--- tests/conftest.py | 2 +- tests/test_graph_creation.py | 48 +++++++++++++++++---------- 3 files changed, 59 insertions(+), 22 deletions(-) diff --git a/neural_lam/build_rectangular_graph.py b/neural_lam/build_rectangular_graph.py index 7934e98c..2ff5328a 100644 --- a/neural_lam/build_rectangular_graph.py +++ b/neural_lam/build_rectangular_graph.py @@ -111,8 +111,9 @@ def _build_wmg_graph( datastore, datastore_boundary, graph_build_func, - graph_name, kwargs, + graph_name, + dir_save_path=None, ): """ Build a graph using WMG in a way that's compatible with neural-lam. @@ -133,6 +134,9 @@ def _build_wmg_graph( these are here derived in a consistent way from the datastores. graph_name : str Name to save the graph as. + dir_save_path : str or None + Path to directory where graph should be saved, in directory graph_name. + If None, save in "graphs" directory in the root directory of datastore. """ for derived_kwarg in ( @@ -193,7 +197,11 @@ def _build_wmg_graph( ) # Save graph - graph_dir_path = os.path.join(datastore.root_path, "graphs", graph_name) + if dir_save_path is None: + graph_dir_path = os.path.join(datastore.root_path, "graphs", graph_name) + else: + graph_dir_path = os.path.join(dir_save_path, graph_name) + os.makedirs(graph_dir_path, exist_ok=True) for component, graph in graph_comp.items(): # This seems like a bit of a hack, maybe better if saving in wmg @@ -241,7 +249,12 @@ def _build_wmg_graph( def build_graph_from_archetype( - datastore, datastore_boundary, graph_name, archetype, **kwargs + datastore, + datastore_boundary, + graph_name, + archetype, + dir_save_path, + **kwargs, ): """ Function that builds graph using wmg archetype. @@ -259,6 +272,9 @@ def build_graph_from_archetype( archetype : str Archetype to build. Must be one of "keisler", "graphcast" or "hierarchical" + dir_save_path : str or None + Path to directory where graph should be saved, in directory graph_name. + If None, save in "graphs" directory in the root directory of datastore. **kwargs Keyword arguments that are passed on to wmg.create.base.create_all_graph_components. See WMG for accepted @@ -273,11 +289,14 @@ def build_graph_from_archetype( datastore_boundary=datastore_boundary, graph_build_func=archetype_create_func, graph_name=graph_name, + dir_save_path=dir_save_path, kwargs=kwargs, ) -def build_graph(datastore, datastore_boundary, graph_name, **kwargs): +def build_graph( + datastore, datastore_boundary, graph_name, dir_save_path=None, **kwargs +): """ Function that can be used for more fine-grained control of graph construction. Directly uses wmg.create.base.create_all_graph_components, @@ -291,6 +310,9 @@ def build_graph(datastore, datastore_boundary, graph_name, **kwargs): Datastore representing boundary region, or None if no boundary forcing graph_name : str Name to save the graph as. + dir_save_path : str or None + Path to directory where graph should be saved, in directory graph_name. + If None, save in "graphs" directory in the root directory of datastore. **kwargs Keyword arguments that are passed on to wmg.create.base.create_all_graph_components. See WMG for accepted @@ -301,6 +323,7 @@ def build_graph(datastore, datastore_boundary, graph_name, **kwargs): datastore_boundary=datastore_boundary, graph_build_func=wmg.create.base.create_all_graph_components, graph_name=graph_name, + dir_save_path=dir_save_path, kwargs=kwargs, ) diff --git a/tests/conftest.py b/tests/conftest.py index a21fffda..ea06862e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -174,7 +174,7 @@ def check_saved_graph(graph_dir_path, hierarchical, num_levels=1): # try to load each and ensure they have the right shape for file_name in required_graph_files: file_id = Path(file_name).stem # remove the extension - result = torch.load(graph_dir_path / file_name) + result = torch.load(graph_dir_path / file_name, weights_only=True) if file_id.startswith("g2m") or file_id.startswith("m2g"): assert isinstance(result, torch.Tensor) diff --git a/tests/test_graph_creation.py b/tests/test_graph_creation.py index acb625cb..a2335dfa 100644 --- a/tests/test_graph_creation.py +++ b/tests/test_graph_creation.py @@ -1,4 +1,5 @@ # Standard library +import tempfile from pathlib import Path # Third-party @@ -59,16 +60,22 @@ def test_build_archetype(datastore_name, datastore_boundary_name, archetype): # Name graph graph_name = f"{datastore_name}_{datastore_boundary_name}_{archetype}" - # Saved in datastore - # TODO: Maybe save in tmp dir? - graph_dir_path = Path(datastore.root_path) / "graphs" / graph_name - - build_graph_from_archetype( - datastore, datastore_boundary, graph_name, archetype, **create_kwargs - ) + # Saved in temporary dir + with tempfile.TemporaryDirectory() as tmpdir: + graph_saving_path = Path(tmpdir) / "graphs" + graph_dir_path = graph_saving_path / graph_name + + build_graph_from_archetype( + datastore, + datastore_boundary, + graph_name, + archetype, + dir_save_path=graph_saving_path, + **create_kwargs, + ) - hierarchical = archetype == "hierarchical" - check_saved_graph(graph_dir_path, hierarchical, num_levels) + hierarchical = archetype == "hierarchical" + check_saved_graph(graph_dir_path, hierarchical, num_levels) @pytest.mark.parametrize("datastore_name", DATASTORES.keys()) @@ -144,12 +151,19 @@ def test_build_from_options( # Name graph graph_name = f"{datastore_name}_{datastore_boundary_name}_config{config_i}" - # Saved in datastore - # TODO: Maybe save in tmp dir? - graph_dir_path = Path(datastore.root_path) / "graphs" / graph_name - - build_graph(datastore, datastore_boundary, graph_name, **graph_kwargs) + # Save in temporary dir + with tempfile.TemporaryDirectory() as tmpdir: + graph_saving_path = Path(tmpdir) / "graphs" + graph_dir_path = graph_saving_path / graph_name + + build_graph( + datastore, + datastore_boundary, + graph_name, + dir_save_path=graph_saving_path, + **graph_kwargs, + ) - hierarchical = graph_kwargs["m2m_connectivity"] == "hierarchical" - num_levels = 2 if hierarchical else 1 - check_saved_graph(graph_dir_path, hierarchical, num_levels) + hierarchical = graph_kwargs["m2m_connectivity"] == "hierarchical" + num_levels = 2 if hierarchical else 1 + check_saved_graph(graph_dir_path, hierarchical, num_levels) From b6949d3dbbf579d875b8b3a6062b43a0eaf565a2 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Sat, 7 Dec 2024 17:37:16 +0100 Subject: [PATCH 142/190] Rescale static mesh node features with maximum grid coordinate again --- neural_lam/models/base_graph_model.py | 7 ++++++ neural_lam/utils.py | 35 +++++++++++++-------------- tests/test_utils.py | 30 +++++++++++++++++++++++ 3 files changed, 54 insertions(+), 18 deletions(-) create mode 100644 tests/test_utils.py diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index 2bdb83ac..948083d1 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -38,6 +38,13 @@ def __init__( graph_dir_path=graph_dir_path ) for name, attr_value in graph_ldict.items(): + # NOTE: It would be good to rescale mesh node position features in + # exactly the same way as grid node position static features. + if name == "mesh_static_features": + max_coord = datastore.get_xy("state").max() + # Rescale by dividing by maximum coordinate in interior + attr_value = attr_value / max_coord + # Make BufferLists module members and register tensors as buffers if isinstance(attr_value, torch.Tensor): self.register_buffer(name, attr_value, persistent=False) diff --git a/neural_lam/utils.py b/neural_lam/utils.py index 1ba36fd6..068981e9 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -34,6 +34,17 @@ def __len__(self): def __iter__(self): return (self[i] for i in range(len(self))) + def __itruediv__(self, other): + """Divide each element in list with other""" + return self.__imul__(1.0 / other) + + def __imul__(self, other): + """Multiply each element in list with other""" + for buffer_tensor in self: + buffer_tensor *= other + + return self + def zero_index_edge_index(edge_index): """ @@ -166,10 +177,8 @@ def loads_file(fn): longest_edge = max( torch.max(level_features[:, 0]) for level_features in m2m_features ) # Col. 0 is length - m2m_features = BufferList( - [level_features / longest_edge for level_features in m2m_features], - persistent=False, - ) + m2m_features = BufferList(m2m_features, persistent=False) + m2m_features /= longest_edge g2m_features = g2m_features / longest_edge m2g_features = m2g_features / longest_edge @@ -206,20 +215,10 @@ def loads_file(fn): ) # List of (M_down[l], d_edge_f) # Rescale - mesh_up_features = BufferList( - [ - edge_features / longest_edge - for edge_features in mesh_up_features - ], - persistent=False, - ) - mesh_down_features = BufferList( - [ - edge_features / longest_edge - for edge_features in mesh_down_features - ], - persistent=False, - ) + mesh_up_features = BufferList(mesh_up_features, persistent=False) + mesh_up_features /= longest_edge + mesh_down_features = BufferList(mesh_down_features, persistent=False) + mesh_down_features /= longest_edge mesh_static_features = BufferList( mesh_static_features, persistent=False diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..ab978887 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,30 @@ +# Standard library +import copy + +# Third-party +import torch + +# First-party +from neural_lam.utils import BufferList + + +def test_bufferlist_idiv(): + """Test in-place division of bufferlist""" + + tensors_to_buffer = [i * torch.ones(5) for i in range(3)] + tensors_for_checking = copy.deepcopy(tensors_to_buffer) + blist = BufferList(tensors_to_buffer) + + divisor = 5.0 + div_tensors = [ten / divisor for ten in tensors_for_checking] + div_blist = copy.deepcopy(blist) + div_blist /= divisor + for bl_ten, check_ten in zip(div_tensors, div_blist): + torch.testing.assert_allclose(bl_ten, check_ten) + + multiplier = 2.0 + mult_tensors = [ten * multiplier for ten in tensors_for_checking] + mult_blist = copy.deepcopy(blist) + mult_blist *= multiplier + for bl_ten, check_ten in zip(mult_tensors, mult_blist): + torch.testing.assert_allclose(bl_ten, check_ten) From 5ed304a542cd7f484ac7bc994f2dfeb5309b62e5 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Sat, 7 Dec 2024 18:08:00 +0100 Subject: [PATCH 143/190] Change var names and comments to clarify difference between interior and full grid --- neural_lam/models/ar_model.py | 74 ++++++++++++++------------- neural_lam/models/base_graph_model.py | 57 +++++++++++---------- neural_lam/models/graph_lam.py | 1 - 3 files changed, 70 insertions(+), 62 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 1edb7931..7beee310 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -44,12 +44,12 @@ def __init__( num_past_forcing_steps = args.num_past_forcing_steps num_future_forcing_steps = args.num_future_forcing_steps - # Load static features for grid + # Load static features for interior da_static_features = datastore.get_dataarray( category="static", split=None ) self.register_buffer( - "grid_static_features", + "interior_static_features", torch.tensor(da_static_features.values, dtype=torch.float32), persistent=False, ) @@ -100,14 +100,15 @@ def __init__( persistent=False, ) - # grid_dim from data + static + # interior from data + static ( - self.num_grid_nodes, - grid_static_dim, - ) = self.grid_static_features.shape - self.grid_dim = ( + self.num_interior_nodes, + interior_static_dim, + ) = self.interior_static_features.shape + self.num_total_grid_nodes = self.num_interior_nodes + self.interior_dim = ( 2 * self.grid_output_dim - + grid_static_dim + + interior_static_dim # Temporal Embedding counts as one additional forcing_feature + (num_forcing_vars + 1) * (num_past_forcing_steps + num_future_forcing_steps + 1) @@ -117,7 +118,7 @@ def __init__( self.boundary_forced = datastore_boundary is not None if self.boundary_forced: - # Load static features for grid + # Load static features for boundary da_boundary_static_features = datastore_boundary.get_dataarray( category="static", split=None ) @@ -148,6 +149,8 @@ def __init__( * (num_past_boundary_steps + num_future_boundary_steps + 1) ) + self.num_total_grid_nodes += self.num_boundary_nodes + # Instantiate loss function self.loss = metrics.get_metric(args.loss) @@ -227,17 +230,18 @@ def predict_step( ): """ Step state one step ahead using prediction model, X_{t-1}, X_t -> X_t+1 - prev_state: (B, num_grid_nodes, feature_dim), X_t - prev_prev_state: (B, num_grid_nodes, feature_dim), X_{t-1} - forcing: (B, num_grid_nodes, forcing_dim) + prev_state: (B, num_interior_nodes, feature_dim), X_t + prev_prev_state: (B, num_interior_nodes, feature_dim), X_{t-1} + forcing: (B, num_interior_nodes, forcing_dim) + boundary_forcing: (B, num_boundary_nodes, boundary_forcing_dim) """ raise NotImplementedError("No prediction step implemented") def unroll_prediction(self, init_states, forcing, boundary_forcing): """ Roll out prediction taking multiple autoregressive steps with model - init_states: (B, 2, num_grid_nodes, d_f) - forcing: (B, pred_steps, num_grid_nodes, d_static_f) + init_states: (B, 2, num_interior_nodes, d_f) + forcing: (B, pred_steps, num_interior_nodes, d_static_f) boundary_forcing: (B, pred_steps, num_boundary_nodes, d_boundary_f) """ prev_prev_state = init_states[:, 0] @@ -257,8 +261,8 @@ def unroll_prediction(self, init_states, forcing, boundary_forcing): pred_state, pred_std = self.predict_step( prev_state, prev_prev_state, forcing_step, boundary_forcing_step ) - # state: (B, num_grid_nodes, d_f) pred_std: (B, num_grid_nodes, - # d_f) or None + # state: (B, num_interior_nodes, d_f) + # pred_std: (B, num_interior_nodes, d_f) or None prediction_list.append(pred_state) @@ -271,11 +275,11 @@ def unroll_prediction(self, init_states, forcing, boundary_forcing): prediction = torch.stack( prediction_list, dim=1 - ) # (B, pred_steps, num_grid_nodes, d_f) + ) # (B, pred_steps, num_interior_nodes, d_f) if self.output_std: pred_std = torch.stack( pred_std_list, dim=1 - ) # (B, pred_steps, num_grid_nodes, d_f) + ) # (B, pred_steps, num_interior_nodes, d_f) else: pred_std = self.per_var_std # (d_f,) @@ -285,9 +289,9 @@ def common_step(self, batch): """ Predict on single batch batch consists of: - init_states: (B, 2, num_grid_nodes, d_features) - target_states: (B, pred_steps, num_grid_nodes, d_features) - forcing: (B, pred_steps, num_grid_nodes, d_forcing), + init_states: (B, 2, num_interior_nodes, d_features) + target_states: (B, pred_steps, num_interior_nodes, d_features) + forcing: (B, pred_steps, num_interior_nodes, d_forcing), boundary_forcing: (B, pred_steps, num_boundary_nodes, d_boundary_forcing), where index 0 corresponds to index 1 of init_states @@ -302,9 +306,9 @@ def common_step(self, batch): prediction, pred_std = self.unroll_prediction( init_states, forcing, boundary_forcing - ) # (B, pred_steps, num_grid_nodes, d_f) - # prediction: (B, pred_steps, num_grid_nodes, d_f) pred_std: (B, - # pred_steps, num_grid_nodes, d_f) or (d_f,) + ) # (B, pred_steps, num_interior_nodes, d_f) + # prediction: (B, pred_steps, num_interior_nodes, d_f) pred_std: (B, + # pred_steps, num_interior_nodes, d_f) or (d_f,) return prediction, target_states, pred_std, batch_times @@ -405,8 +409,8 @@ def test_step(self, batch, batch_idx): """ # TODO Here batch_times can be used for plotting routines prediction, target, pred_std, batch_times = self.common_step(batch) - # prediction: (B, pred_steps, num_grid_nodes, d_f) pred_std: (B, - # pred_steps, num_grid_nodes, d_f) or (d_f,) + # prediction: (B, pred_steps, num_interior_nodes, d_f) pred_std: (B, + # pred_steps, num_interior_nodes, d_f) or (d_f,) time_step_loss = torch.mean( self.loss( @@ -454,12 +458,12 @@ def test_step(self, batch, batch_idx): # Save per-sample spatial loss for specific times spatial_loss = self.loss( prediction, target, pred_std, average_grid=False - ) # (B, pred_steps, num_grid_nodes) + ) # (B, pred_steps, num_interior_nodes) log_spatial_losses = spatial_loss[ :, [step - 1 for step in self.args.val_steps_to_log] ] self.spatial_loss_maps.append(log_spatial_losses) - # (B, N_log, num_grid_nodes) + # (B, N_log, num_interior_nodes) # Plot example predictions (on rank 0 only) if ( @@ -483,10 +487,10 @@ def plot_examples(self, batch, n_examples, split, prediction=None): """ Plot the first n_examples forecasts from batch - batch: batch with data to plot corresponding forecasts for n_examples: - number of forecasts to plot prediction: (B, pred_steps, num_grid_nodes, - d_f), existing prediction. - Generate if None. + batch: batch with data to plot corresponding forecasts for + n_examples: number of forecasts to plot + prediction: (B, pred_steps, num_interior_nodes, d_f), + existing prediction. Generate if None. """ if prediction is None: prediction, target, _, _ = self.common_step(batch) @@ -504,7 +508,7 @@ def plot_examples(self, batch, n_examples, split, prediction=None): target_rescaled[:n_examples], time[:n_examples], ): - # Each slice is (pred_steps, num_grid_nodes, d_f) + # Each slice is (pred_steps, num_interior_nodes, d_f) self.plotted_examples += 1 # Increment already here da_prediction = self._create_dataarray_from_tensor( @@ -680,11 +684,11 @@ def on_test_epoch_end(self): # Plot spatial loss maps spatial_loss_tensor = self.all_gather_cat( torch.cat(self.spatial_loss_maps, dim=0) - ) # (N_test, N_log, num_grid_nodes) + ) # (N_test, N_log, num_interior_nodes) if self.trainer.is_global_zero: mean_spatial_loss = torch.mean( spatial_loss_tensor, dim=0 - ) # (N_log, num_grid_nodes) + ) # (N_log, num_interior_nodes) loss_map_figs = [ vis.plot_spatial_error( diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index 948083d1..aee6837e 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -53,32 +53,34 @@ def __init__( # Specify dimensions of data print( - f"Loaded graph with {self.num_grid_nodes + self.num_mesh_nodes} " - f"nodes ({self.num_grid_nodes} grid, {self.num_mesh_nodes} mesh)" + "Loaded graph with " + f"{self.num_total_grid_nodes + self.num_mesh_nodes} " + f"nodes ({self.num_total_grid_nodes} grid, " + f"{self.num_mesh_nodes} mesh)" ) - # grid_dim from data + static + # interior_dim from data + static self.g2m_edges, g2m_dim = self.g2m_features.shape self.m2g_edges, m2g_dim = self.m2g_features.shape # Define sub-models - # Feature embedders for grid + # Feature embedders for interior self.mlp_blueprint_end = [args.hidden_dim] * (args.hidden_layers + 1) - self.grid_embedder = utils.make_mlp( - [self.grid_dim] + self.mlp_blueprint_end + self.interior_embedder = utils.make_mlp( + [self.interior_dim] + self.mlp_blueprint_end ) if self.boundary_forced: # Define embedder for boundary nodes # Optional separate embedder for boundary nodes if args.shared_grid_embedder: - assert self.grid_dim == self.boundary_dim, ( + assert self.interior_dim == self.boundary_dim, ( "Grid and boundary input dimension must " "be the same when using " - f"the same embedder, got grid_dim={self.grid_dim}, " + f"the same embedder, got interior_dim={self.interior_dim}, " f"boundary_dim={self.boundary_dim}" ) - self.boundary_embedder = self.grid_embedder + self.boundary_embedder = self.interior_embedder else: self.boundary_embedder = utils.make_mlp( [self.boundary_dim] + self.mlp_blueprint_end @@ -106,7 +108,7 @@ def __init__( args.hidden_dim, hidden_layers=args.hidden_layers, update_edges=False, - num_rec=self.num_grid_nodes, + num_rec=self.num_interior_nodes, ) # Output mapping (hidden_dim -> output_dim) @@ -155,20 +157,21 @@ def predict_step( ): """ Step state one step ahead using prediction model, X_{t-1}, X_t -> X_t+1 - prev_state: (B, num_grid_nodes, feature_dim), X_t - prev_prev_state: (B, num_grid_nodes, feature_dim), X_{t-1} - forcing: (B, num_grid_nodes, forcing_dim) + prev_state: (B, num_interior_nodes, feature_dim), X_t + prev_prev_state: (B, num_interior_nodes, feature_dim), X_{t-1} + forcing: (B, num_interior_nodes, forcing_dim) boundary_forcing: (B, num_boundary_nodes, boundary_forcing_dim) """ batch_size = prev_state.shape[0] - # Create full grid node features of shape (B, num_grid_nodes, grid_dim) - grid_features = torch.cat( + # Create full interior node features of shape + # (B, num_interior_nodes, interior_dim) + interior_features = torch.cat( ( prev_state, prev_prev_state, forcing, - self.expand_to_batch(self.grid_static_features, batch_size), + self.expand_to_batch(self.interior_static_features, batch_size), ), dim=-1, ) @@ -191,7 +194,9 @@ def predict_step( # (B, num_boundary_nodes, d_h) # Embed all features - grid_emb = self.grid_embedder(grid_features) # (B, num_grid_nodes, d_h) + interior_emb = self.interior_embedder( + interior_features + ) # (B, num_interior_nodes, d_h) g2m_emb = self.g2m_embedder(self.g2m_features) # (M_g2m, d_h) m2g_emb = self.m2g_embedder(self.m2g_features) # (M_m2g, d_h) mesh_emb = self.embedd_mesh_nodes() @@ -199,10 +204,10 @@ def predict_step( if self.boundary_forced: # Merge interior and boundary emb into input embedding # We enforce ordering (interior, boundary) of nodes - input_emb = torch.cat((grid_emb, boundary_emb), dim=1) + full_grid_emb = torch.cat((interior_emb, boundary_emb), dim=1) else: # Only maps from interior to mesh - input_emb = grid_emb + full_grid_emb = interior_emb # Map from grid to mesh mesh_emb_expanded = self.expand_to_batch( @@ -212,12 +217,12 @@ def predict_step( # Encode to mesh mesh_rep = self.g2m_gnn( - input_emb, mesh_emb_expanded, g2m_emb_expanded + full_grid_emb, mesh_emb_expanded, g2m_emb_expanded ) # (B, num_mesh_nodes, d_h) # Also MLP with residual for grid representation - grid_rep = grid_emb + self.encoding_grid_mlp( - grid_emb - ) # (B, num_grid_nodes, d_h) + grid_rep = interior_emb + self.encoding_grid_mlp( + interior_emb + ) # (B, num_interior_nodes, d_h) # Run processor step mesh_rep = self.process_step(mesh_rep) @@ -226,17 +231,17 @@ def predict_step( m2g_emb_expanded = self.expand_to_batch(m2g_emb, batch_size) grid_rep = self.m2g_gnn( mesh_rep, grid_rep, m2g_emb_expanded - ) # (B, num_grid_nodes, d_h) + ) # (B, num_interior_nodes, d_h) # Map to output dimension, only for grid net_output = self.output_map( grid_rep - ) # (B, num_grid_nodes, d_grid_out) + ) # (B, num_interior_nodes, d_grid_out) if self.output_std: pred_delta_mean, pred_std_raw = net_output.chunk( 2, dim=-1 - ) # both (B, num_grid_nodes, d_f) + ) # both (B, num_interior_nodes, d_f) # NOTE: The predicted std. is not scaled in any way here # linter for some reason does not think softplus is callable # pylint: disable-next=not-callable diff --git a/neural_lam/models/graph_lam.py b/neural_lam/models/graph_lam.py index 7adb02a6..bd2b4b2e 100644 --- a/neural_lam/models/graph_lam.py +++ b/neural_lam/models/graph_lam.py @@ -38,7 +38,6 @@ def __init__( not self.hierarchical ), "GraphLAM does not use a hierarchical mesh graph" - # grid_dim from data + static + batch_static mesh_dim = self.mesh_static_features.shape[1] m2m_edges, m2m_dim = self.m2m_features.shape print( From e61bdfec179c048360149e1f61390d2b2c0c48ef Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Mon, 9 Dec 2024 19:11:45 -0800 Subject: [PATCH 144/190] Make dir_save_path default to None --- neural_lam/build_rectangular_graph.py | 2 +- tests/test_training.py | 21 ++++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/neural_lam/build_rectangular_graph.py b/neural_lam/build_rectangular_graph.py index 2ff5328a..0280b016 100644 --- a/neural_lam/build_rectangular_graph.py +++ b/neural_lam/build_rectangular_graph.py @@ -253,7 +253,7 @@ def build_graph_from_archetype( datastore_boundary, graph_name, archetype, - dir_save_path, + dir_save_path=None, **kwargs, ): """ diff --git a/tests/test_training.py b/tests/test_training.py index 4d07b087..4773bbf3 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -66,17 +66,16 @@ def test_training(datastore_name, datastore_boundary_name): graph_dir_path = Path(datastore.root_path) / "graphs" / flat_graph_name - def _create_graph(): - if not graph_dir_path.exists(): - build_graph_from_archetype( - datastore=datastore, - datastore_boundary=datastore_boundary, - graph_name=flat_graph_name, - archetype="keisler", - mesh_node_distance=get_test_mesh_dist( - datastore, datastore_boundary - ), - ) + if not graph_dir_path.exists(): + build_graph_from_archetype( + datastore=datastore, + datastore_boundary=datastore_boundary, + graph_name=flat_graph_name, + archetype="keisler", + mesh_node_distance=get_test_mesh_dist( + datastore, datastore_boundary + ), + ) data_module = WeatherDataModule( datastore=datastore, From ff0c8e0038943904c4adb11f959c89462a81f743 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Tue, 17 Dec 2024 14:40:53 +0100 Subject: [PATCH 145/190] Use in-place division for BufferList containing mesh graph node features --- neural_lam/models/base_graph_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index aee6837e..91e6afb4 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -43,7 +43,7 @@ def __init__( if name == "mesh_static_features": max_coord = datastore.get_xy("state").max() # Rescale by dividing by maximum coordinate in interior - attr_value = attr_value / max_coord + attr_value /= max_coord # Make BufferLists module members and register tensors as buffers if isinstance(attr_value, torch.Tensor): From 8cc608dfc4f8b4e97a931c58e5170c2578281fef Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 11:03:37 +0100 Subject: [PATCH 146/190] rename function to represent multiple datastores --- neural_lam/config.py | 6 +++--- neural_lam/create_graph.py | 4 ++-- neural_lam/plot_graph.py | 4 ++-- neural_lam/train_model.py | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/neural_lam/config.py b/neural_lam/config.py index f8879811..4b57a141 100644 --- a/neural_lam/config.py +++ b/neural_lam/config.py @@ -140,11 +140,11 @@ class InvalidConfigError(Exception): pass -def load_config_and_datastore( +def load_config_and_datastores( config_path: str, ) -> tuple[NeuralLAMConfig, Union[MDPDatastore, NpyFilesDatastoreMEPS]]: """ - Load the neural-lam configuration and the datastore specified in the + Load the neural-lam configuration and the datastores specified in the configuration. Parameters @@ -155,7 +155,7 @@ def load_config_and_datastore( Returns ------- tuple[NeuralLAMConfig, Union[MDPDatastore, NpyFilesDatastoreMEPS]] - The Neural-LAM configuration and the loaded datastore. + The Neural-LAM configuration and the loaded datastores. """ try: config = NeuralLAMConfig.from_yaml_file(config_path) diff --git a/neural_lam/create_graph.py b/neural_lam/create_graph.py index ef979be3..1ab4e1e9 100644 --- a/neural_lam/create_graph.py +++ b/neural_lam/create_graph.py @@ -13,7 +13,7 @@ from torch_geometric.utils.convert import from_networkx # Local -from .config import load_config_and_datastore +from .config import load_config_and_datastores from .datastore.base import BaseRegularGridDatastore @@ -595,7 +595,7 @@ def cli(input_args=None): ), "Specify your config with --config_path" # Load neural-lam configuration and datastore to use - _, datastore = load_config_and_datastore(config_path=args.config_path) + _, datastore = load_config_and_datastores(config_path=args.config_path) create_graph_from_datastore( datastore=datastore, diff --git a/neural_lam/plot_graph.py b/neural_lam/plot_graph.py index 999c8e53..ad27b5b0 100644 --- a/neural_lam/plot_graph.py +++ b/neural_lam/plot_graph.py @@ -9,7 +9,7 @@ # Local from . import utils -from .config import load_config_and_datastore +from .config import load_config_and_datastores MESH_HEIGHT = 0.1 MESH_LEVEL_DIST = 0.2 @@ -43,7 +43,7 @@ def main(): ) args = parser.parse_args() - _, datastore = load_config_and_datastore( + _, datastore = load_config_and_datastores( config_path=args.datastore_config_path ) diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index 2a61e86c..54017dbb 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -12,7 +12,7 @@ # Local from . import utils -from .config import load_config_and_datastore +from .config import load_config_and_datastores from .models import GraphLAM, HiLAM, HiLAMParallel from .weather_dataset import WeatherDataModule @@ -238,7 +238,7 @@ def main(input_args=None): seed.seed_everything(args.seed) # Load neural-lam configuration and datastore to use - config, datastore, datastore_boundary = load_config_and_datastore( + config, datastore, datastore_boundary = load_config_and_datastores( config_path=args.config_path ) From 857f7482e9e04c90a7112e8063bc6e449c9971c6 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 11:07:26 +0100 Subject: [PATCH 147/190] streamline da_grid_reference variable naming --- neural_lam/datastore/mdp.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 809bbdb8..f68bb4d0 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -386,11 +386,10 @@ def grid_shape_state(self): "no state data found in datastore" "returning grid shape from forcing data" ) - ds_forcing = self.unstack_grid_coords(self._ds["forcing"]) - da_x, da_y = ds_forcing.x, ds_forcing.y + da_grid_reference = self.unstack_grid_coords(self._ds["forcing"]) else: - ds_state = self.unstack_grid_coords(self._ds["state"]) - da_x, da_y = ds_state.x, ds_state.y + da_grid_reference = self.unstack_grid_coords(self._ds["state"]) + da_x, da_y = da_grid_reference.x, da_grid_reference.y assert da_x.ndim == da_y.ndim == 1 return CartesianGridShape(x=da_x.size, y=da_y.size) From d0a6f2425f473b06db99740f295c0d452d003281 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 11:09:44 +0100 Subject: [PATCH 148/190] updated docstring of WeatherDataset --- neural_lam/weather_dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 0ddad878..510a9504 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -21,9 +21,9 @@ class WeatherDataset(torch.utils.data.Dataset): Parameters ---------- datastore : BaseDatastore - The datastore to load the data from (e.g. mdp). + The datastore to load the data from. datastore_boundary : BaseDatastore - The boundary datastore to load the data from (e.g. mdp). + The boundary datastore to load the data from. split : str, optional The data split to use ("train", "val" or "test"). Default is "train". ar_steps : int, optional From ef40a399fe0595dc936ee9f0e6c8028338a0c2f5 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 11:11:25 +0100 Subject: [PATCH 149/190] renamed da_boundary -> da_boundary_forcing --- neural_lam/weather_dataset.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 510a9504..f20e3506 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -89,11 +89,11 @@ def __init__( ) # XXX For now boundary data is always considered mdp-forcing data if self.datastore_boundary is not None: - self.da_boundary = self.datastore_boundary.get_dataarray( + self.da_boundary_forcing = self.datastore_boundary.get_dataarray( category="forcing", split=self.split ) else: - self.da_boundary = None + self.da_boundary_forcing = None # check that with the provided data-arrays and ar_steps that we have a # non-zero amount of samples @@ -153,7 +153,7 @@ def __init__( self.time_step_state = self._get_time_step(state_times) # Check time coverage for forcing and boundary data - if self.da_forcing is not None or self.da_boundary is not None: + if self.da_forcing is not None or self.da_boundary_forcing is not None: if self.datastore.is_forecast: state_times = self.da_state.analysis_time else: @@ -176,17 +176,17 @@ def __init__( forcing_times.values ) - if self.da_boundary is not None: + if self.da_boundary_forcing is not None: # Boundary data is part of a separate datastore # The boundary data is allowed to have a different time_step # Check that the boundary data covers the required time range if self.datastore_boundary.is_forecast: - boundary_times = self.da_boundary.analysis_time + boundary_times = self.da_boundary_forcing.analysis_time self.forecast_step_boundary = self._get_time_step( - self.da_boundary.elapsed_forecast_duration + self.da_boundary_forcing.elapsed_forecast_duration ) else: - boundary_times = self.da_boundary.time + boundary_times = self.da_boundary_forcing.time self.time_step_boundary = self._get_time_step( boundary_times.values ) @@ -238,7 +238,7 @@ def __init__( self.da_forcing_std = self.ds_forcing_stats.forcing_std # XXX: Again, the boundary data is considered forcing data for now - if self.da_boundary is not None: + if self.da_boundary_forcing is not None: self.ds_boundary_stats = ( self.datastore_boundary.get_standardization_dataarray( category="forcing" @@ -601,8 +601,8 @@ def _build_item_dataarrays(self, idx): else: da_forcing = None - if self.da_boundary is not None: - da_boundary = self.da_boundary + if self.da_boundary_forcing is not None: + da_boundary = self.da_boundary_forcing else: da_boundary = None From 71b52b248caef8ffb15a0147988f60db9e41fcda Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 11:31:36 +0100 Subject: [PATCH 150/190] updated docstrings of get_dataarray() --- neural_lam/datastore/base.py | 8 +++----- neural_lam/datastore/mdp.py | 5 ++--- tests/dummy_datastore.py | 8 +++----- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/neural_lam/datastore/base.py b/neural_lam/datastore/base.py index e2d21404..b9de2da5 100644 --- a/neural_lam/datastore/base.py +++ b/neural_lam/datastore/base.py @@ -193,11 +193,9 @@ def get_dataarray( """ Return the processed data (as a single `xr.DataArray`) for the given category of data and test/train/val-split that covers all the data (in - space and time) of a given category (state/forcing/static). A - datastore must be able to return for the "state" category, but - "forcing" and "static" are optional (in which case the method should - return `None`). For the "static" category the `split` is allowed to be - `None` because the static data is the same for all splits. + space and time) of a given category (state/forcing/static). For the + "static" category the `split` is allowed to be `None` because the static + data is the same for all splits. The returned dataarray is expected to at minimum have dimensions of `(grid_index, {category}_feature)` so that any spatial dimensions have diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index f68bb4d0..8f488910 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -218,9 +218,8 @@ def get_dataarray(self, category: str, split: str) -> xr.DataArray: """ Return the processed data (as a single `xr.DataArray`) for the given category of data and test/train/val-split that covers all the data (in - space and time) of a given category (state/forcin g/static). "state" is - the only required category, for other categories, the method will - return `None` if the category is not found in the datastore. + space and time) of a given category (state/forcing/static). The method + will return `None` if the category is not found in the datastore. The returned dataarray will at minimum have dimensions of `(grid_index, {category}_feature)` so that any spatial dimensions have been stacked diff --git a/tests/dummy_datastore.py b/tests/dummy_datastore.py index a958b8f5..1bdbc8c8 100644 --- a/tests/dummy_datastore.py +++ b/tests/dummy_datastore.py @@ -300,11 +300,9 @@ def get_dataarray( """ Return the processed data (as a single `xr.DataArray`) for the given category of data and test/train/val-split that covers all the data (in - space and time) of a given category (state/forcing/static). A - datastore must be able to return for the "state" category, but - "forcing" and "static" are optional (in which case the method should - return `None`). For the "static" category the `split` is allowed to be - `None` because the static data is the same for all splits. + space and time) of a given category (state/forcing/static). For the + "static" category the `split` is allowed to be `None` because the static + data is the same for all splits. The returned dataarray is expected to at minimum have dimensions of `(grid_index, {category}_feature)` so that any spatial dimensions have From b69056341eb288c5a439fcdf038ae3172aee52c3 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 12:17:29 +0100 Subject: [PATCH 151/190] check times in stateless functions from utils.py --- neural_lam/utils.py | 90 +++++++++++++++++++++++++ neural_lam/weather_dataset.py | 120 ++++++++++------------------------ 2 files changed, 124 insertions(+), 86 deletions(-) diff --git a/neural_lam/utils.py b/neural_lam/utils.py index 4a0752e4..f55f17da 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -3,6 +3,7 @@ import shutil # Third-party +import numpy as np import torch from torch import nn from tueplots import bundles, figsizes @@ -241,3 +242,92 @@ def init_wandb_metrics(wandb_logger, val_steps): experiment.define_metric("val_mean_loss", summary="min") for step in val_steps: experiment.define_metric(f"val_loss_unroll{step}", summary="min") + + +def get_time_step(self, times): + """Calculate the time step from a time dataarray. + + Parameters + ---------- + times : xr.DataArray + The time dataarray to calculate the time step from. + + Returns + ------- + time_step : float + The time step in the the datetime-format of the times dataarray. + """ + time_diffs = np.diff(times) + if not np.all(time_diffs == time_diffs[0]): + raise ValueError( + "Inconsistent time steps in data. " + f"Found different time steps: {np.unique(time_diffs)}" + ) + return time_diffs[0] + + +def check_time_overlap( + da1, + da2, + da1_is_forecast=False, + da2_is_forecast=False, + num_past_steps=1, + num_future_steps=1, +): + """Check that the time coverage of two dataarrays overlap. + + Parameters + ---------- + da1 : xr.DataArray + The first dataarray to check. + da2 : xr.DataArray + The second dataarray to check. + da1_is_forecast : bool, optional + Whether the first dataarray is forecast data. + da2_is_forecast : bool, optional + Whether the second dataarray is forecast data. + num_past_steps : int, optional + Number of past forcing steps. + num_future_steps : int, optional + Number of future forcing steps. + + Raises + ------ + ValueError + If the time coverage of the dataarrays does not overlap. + """ + + if da1_is_forecast: + times_da1 = da1.analysis_time + else: + times_da1 = da1.time + time_min_da1 = times_da1.min().values + time_max_da1 = times_da1.max().values + + if da2_is_forecast: + times_da2 = da2.analysis_time + _ = get_time_step(da2.elapsed_forecast_duration) + else: + times_da2 = da2.time + time_step_da2 = get_time_step(times_da2.values) + + time_min_da2 = da2.min().values + time_max_da2 = da2.max().values + + # Calculate required bounds for da2 using its time step + da2_required_time_min = time_min_da1 - num_past_steps * time_step_da2 + da2_required_time_max = time_max_da1 + num_future_steps * time_step_da2 + + if time_min_da2 > da2_required_time_min: + raise ValueError( + f"The second DataArray ('Boundary forcing'?) data starts too late." + f"Required start: {da2_required_time_min}, " + f"but DataArray starts at {time_min_da2}." + ) + + if time_max_da2 < da2_required_time_max: + raise ValueError( + f"The second DataArray ('Boundary forcing'?) ends too early." + f"Required end: {da2_required_time_max}, " + f"but DataArray ends at {time_max_da2}." + ) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index f20e3506..c6b142ec 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -11,6 +11,7 @@ # First-party from neural_lam.datastore.base import BaseDatastore +from neural_lam.utils import check_time_overlap, get_time_step class WeatherDataset(torch.utils.data.Dataset): @@ -142,80 +143,48 @@ def __init__( else: self.da_state = self.da_state - # Check time step consistency in state data + # Check time step consistency in state data and determine time steps + # for state, forcing and boundary data if self.datastore.is_forecast: state_times = self.da_state.analysis_time - self.forecast_step_state = self._get_time_step( + self.forecast_step_state = get_time_step( self.da_state.elapsed_forecast_duration ) else: state_times = self.da_state.time - self.time_step_state = self._get_time_step(state_times) - - # Check time coverage for forcing and boundary data - if self.da_forcing is not None or self.da_boundary_forcing is not None: + self.time_step_state = get_time_step(state_times) + if self.da_forcing is not None: + # Forcing data is part of the same datastore as state data + # During creation the time dimension of the forcing data + # is matched to the state data if self.datastore.is_forecast: - state_times = self.da_state.analysis_time - else: - state_times = self.da_state.time - state_time_min = state_times.min().values - state_time_max = state_times.max().values - - if self.da_forcing is not None: - # Forcing data is part of the same datastore as state data - # During creation the time dimension of the forcing data - # is matched to the state data - if self.datastore.is_forecast: - forcing_times = self.da_forcing.analysis_time - self.forecast_step_forcing = self._get_time_step( - self.da_forcing.elapsed_forecast_duration - ) - else: - forcing_times = self.da_forcing.time - self.time_step_forcing = self._get_time_step( - forcing_times.values + forcing_times = self.da_forcing.analysis_time + self.forecast_step_forcing = self._get_time_step( + self.da_forcing.elapsed_forecast_duration ) - - if self.da_boundary_forcing is not None: - # Boundary data is part of a separate datastore - # The boundary data is allowed to have a different time_step - # Check that the boundary data covers the required time range - if self.datastore_boundary.is_forecast: - boundary_times = self.da_boundary_forcing.analysis_time - self.forecast_step_boundary = self._get_time_step( - self.da_boundary_forcing.elapsed_forecast_duration - ) - else: - boundary_times = self.da_boundary_forcing.time - self.time_step_boundary = self._get_time_step( - boundary_times.values - ) - boundary_time_min = boundary_times.min().values - boundary_time_max = boundary_times.max().values - - # Calculate required bounds for boundary using its time step - boundary_required_time_min = ( - state_time_min - - self.num_past_forcing_steps * self.time_step_boundary - ) - boundary_required_time_max = ( - state_time_max - + self.num_future_forcing_steps * self.time_step_boundary + else: + forcing_times = self.da_forcing.time + self.time_step_forcing = self._get_time_step(forcing_times.values) + # Boundary data is part of a separate datastore + # The boundary data is allowed to have a different time_step + # Check that the boundary data covers the required time range + if self.datastore_boundary.is_forecast: + boundary_times = self.da_boundary_forcing.analysis_time + self.forecast_step_boundary = self._get_time_step( + self.da_boundary_forcing.elapsed_forecast_duration ) - - if boundary_time_min > boundary_required_time_min: - raise ValueError( - f"Boundary data starts too late." - f"Required start: {boundary_required_time_min}, " - f"but boundary starts at {boundary_time_min}." - ) - - if boundary_time_max < boundary_required_time_max: - raise ValueError( - f"Boundary data ends too early." - f"Required end: {boundary_required_time_max}, " - f"but boundary ends at {boundary_time_max}." - ) + else: + boundary_times = self.da_boundary_forcing.time + self.time_step_boundary = self._get_time_step(boundary_times.values) + + check_time_overlap( + self.da_state, + self.da_boundary_forcing, + da1_is_forecast=self.datastore.is_forecast, + da2_is_forecast=self.datastore_boundary.is_forecast, + num_past_steps=self.num_past_boundary_steps, + num_future_steps=self.num_future_boundary_steps, + ) # Set up for standardization # TODO: This will become part of ar_model.py soon! @@ -293,27 +262,6 @@ def __len__(self): - self.num_future_forcing_steps ) - def _get_time_step(self, times): - """Calculate the time step from the data - - Parameters - ---------- - times : xr.DataArray - The time dataarray to calculate the time step from. - - Returns - ------- - time_step : float - The time step in the the format of the times dataarray. - """ - time_diffs = np.diff(times) - if not np.all(time_diffs == time_diffs[0]): - raise ValueError( - "Inconsistent time steps in data. " - f"Found different time steps: {np.unique(time_diffs)}" - ) - return time_diffs[0] - def _slice_time( self, da_state, From a37dc3ceddfdb9421767528a03e08147bbe4a185 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 14:03:22 +0100 Subject: [PATCH 152/190] add num_ensemble_members property to BaseDatastore --- neural_lam/datastore/base.py | 13 +++++++++++++ neural_lam/datastore/npyfilesmeps/store.py | 6 ++++-- neural_lam/weather_dataset.py | 18 +++++++++--------- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/neural_lam/datastore/base.py b/neural_lam/datastore/base.py index b9de2da5..84600b50 100644 --- a/neural_lam/datastore/base.py +++ b/neural_lam/datastore/base.py @@ -298,6 +298,19 @@ def num_grid_points(self) -> int: """ pass + @property + @abc.abstractmethod + def num_ensemble_members(self) -> int: + """Return the number of ensemble members in the dataset. + + Returns + ------- + int + The number of ensemble members in the dataset. + + """ + pass + @cached_property @abc.abstractmethod def state_feature_weights_values(self) -> List[float]: diff --git a/neural_lam/datastore/npyfilesmeps/store.py b/neural_lam/datastore/npyfilesmeps/store.py index 24349e7e..b91f7291 100644 --- a/neural_lam/datastore/npyfilesmeps/store.py +++ b/neural_lam/datastore/npyfilesmeps/store.py @@ -166,7 +166,6 @@ def __init__( self._root_path = self._config_path.parent self._config = NpyDatastoreConfig.from_yaml_file(self._config_path) - self._num_ensemble_members = self.config.dataset.num_ensemble_members self._num_timesteps = self.config.dataset.num_timesteps self._step_length = self.config.dataset.step_length self._remove_state_features_with_index = ( @@ -199,6 +198,9 @@ def config(self) -> NpyDatastoreConfig: """ return self._config + def num_ensemble_members(self) -> int: + return self.config.dataset.num_ensemble_members + def get_dataarray(self, category: str, split: str) -> DataArray: """ Get the data array for the given category and split of data. If the @@ -230,7 +232,7 @@ def get_dataarray(self, category: str, split: str) -> DataArray: if category == "state": das = [] # for the state category, we need to load all ensemble members - for member in range(self._num_ensemble_members): + for member in range(self.num_ensemble_members): da_member = self._get_single_timeseries_dataarray( features=self.get_vars_names(category="state"), split=split, diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index c6b142ec..d2fb5921 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -217,21 +217,21 @@ def __init__( self.da_boundary_std = self.ds_boundary_stats.forcing_std def __len__(self): + + if self.datastore.is_ensemble: + warnings.warn( + "only using first ensemble member, so dataset size is " + " effectively reduced by the number of ensemble members " + f"({self.datastore.num_ensemble_members})", + UserWarning, + ) + if self.datastore.is_forecast: # for now we simply create a single sample for each analysis time # and then take the first (2 + ar_steps) forecast times. In # addition we only use the first ensemble member (if ensemble data # has been provided). # This means that for each analysis time we get a single sample - - if self.datastore.is_ensemble: - warnings.warn( - "only using first ensemble member, so dataset size is " - " effectively reduced by the number of ensemble members " - f"({self.datastore._num_ensemble_members})", - UserWarning, - ) - # check that there are enough forecast steps available to create # samples given the number of autoregressive steps requested n_forecast_steps = self.da_state.elapsed_forecast_duration.size From 8d1bec6a72ecc628f2dffec405173ee28fa1680f Mon Sep 17 00:00:00 2001 From: sadamov <45732287+sadamov@users.noreply.github.com> Date: Fri, 20 Dec 2024 14:08:07 +0100 Subject: [PATCH 153/190] Update neural_lam/weather_dataset.py Co-authored-by: Leif Denby --- neural_lam/weather_dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index d2fb5921..f311b7e8 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -276,7 +276,8 @@ def _slice_time( Produce time slices of the given dataarrays `da_state` (state) and `da_forcing`. For the state data, slicing is done based on `idx`. For the forcing/boundary data, nearest neighbor matching - is performed based on the state times. Additionally, the time difference + is performed based on the state times (assuming constant timestep size). + Additionally, the time difference between the matched forcing/boundary times and state times (in multiples of state time steps) is added to the forcing dataarray. This will be used as an additional input feature in the model (temporal embedding). From 47370f9636c355e10f7290c900955de0085c80eb Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 14:17:18 +0100 Subject: [PATCH 154/190] renaming time_diff_steps to time_deltas --- neural_lam/weather_dataset.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index d2fb5921..02a940b0 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -217,7 +217,6 @@ def __init__( self.da_boundary_std = self.ds_boundary_stats.forcing_std def __len__(self): - if self.datastore.is_ensemble: warnings.warn( "only using first ensemble member, so dataset size is " @@ -423,7 +422,7 @@ def _slice_time( da_forcing_matched = xr.concat(da_list, dim="time") - # Generate temporal embedding `time_diff_steps` for the + # Generate temporal embedding `time_deltas` for the # forcing/boundary data. This is the time difference in multiples # of state time steps between the forcing/boundary time and the # state time @@ -435,7 +434,7 @@ def _slice_time( else: boundary_time_step = self.time_step_boundary state_time_step = self.time_step_state - time_diff_steps = ( + time_deltas = ( da_forcing_matched["window"] * (boundary_time_step / state_time_step), ) @@ -446,18 +445,18 @@ def _slice_time( else: forcing_time_step = self.time_step_forcing state_time_step = self.time_step_state - time_diff_steps = ( + time_deltas = ( da_forcing_matched["window"] * (forcing_time_step / state_time_step), ) - time_diff_steps = da_forcing_matched.isel( + time_deltas = da_forcing_matched.isel( grid_index=0, forcing_feature=0 ).window.values # Add time difference as a new coordinate to concatenate to the # forcing features later as temporal embedding - da_forcing_matched["time_diff_steps"] = ( + da_forcing_matched["time_deltas"] = ( ("window"), - time_diff_steps, + time_deltas, ) return da_state_sliced, da_forcing_matched @@ -494,12 +493,12 @@ def _process_windowed_data(self, da_windowed, da_state, da_target_times): ) # Add the time step differences as a new feature to the windowed # data - time_diff_steps = da_windowed["time_diff_steps"].isel( + time_deltas = da_windowed["time_deltas"].isel( forcing_feature_windowed=slice(0, window_size) ) # All data variables share the same temporal embedding da_windowed = xr.concat( - [da_windowed, time_diff_steps], + [da_windowed, time_deltas], dim="forcing_feature_windowed", ) else: From d52437717a41b8de9324e5987c35572ae768401c Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 14:23:53 +0100 Subject: [PATCH 155/190] add num_ensemble_members to mdp store --- neural_lam/datastore/mdp.py | 12 ++++++++++++ neural_lam/datastore/npyfilesmeps/store.py | 1 + 2 files changed, 13 insertions(+) diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 8f488910..3682a51e 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -140,6 +140,18 @@ def step_length(self) -> int: da_dt = self._ds["time"].diff("time") return (da_dt.dt.seconds[0] // 3600).item() + @property + def num_ensemble_members(self) -> int: + """The number of ensemble members in the dataset. + + Returns + ------- + int + The number of ensemble members in the dataset. + + """ + return None + def get_vars_units(self, category: str) -> List[str]: """Return the units of the variables in the given category. diff --git a/neural_lam/datastore/npyfilesmeps/store.py b/neural_lam/datastore/npyfilesmeps/store.py index b91f7291..1b0f6065 100644 --- a/neural_lam/datastore/npyfilesmeps/store.py +++ b/neural_lam/datastore/npyfilesmeps/store.py @@ -198,6 +198,7 @@ def config(self) -> NpyDatastoreConfig: """ return self._config + @property def num_ensemble_members(self) -> int: return self.config.dataset.num_ensemble_members From 98c54d9856f4dead1ed8fd8951a69795931cb5e6 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 14:31:25 +0100 Subject: [PATCH 156/190] Rename temporal embeddings and diffs to time deltas --- neural_lam/models/ar_model.py | 2 +- neural_lam/weather_dataset.py | 39 ++++++++++++++++------------------- tests/test_datasets.py | 2 +- tests/test_time_slicing.py | 9 ++++---- 4 files changed, 24 insertions(+), 28 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 81d5a623..e21faf43 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -110,7 +110,7 @@ def __init__( self.grid_dim = ( 2 * self.grid_output_dim + grid_static_dim - # Temporal Embedding counts as one additional forcing_feature + # Time deltas count as one additional forcing_feature + (num_forcing_vars + 1) * (num_past_forcing_steps + num_future_forcing_steps + 1) ) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index db2b2c70..2f37b4b3 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -273,13 +273,13 @@ def _slice_time( ): """ Produce time slices of the given dataarrays `da_state` (state) and - `da_forcing`. For the state data, slicing is done - based on `idx`. For the forcing/boundary data, nearest neighbor matching - is performed based on the state times (assuming constant timestep size). - Additionally, the time difference - between the matched forcing/boundary times and state times (in multiples - of state time steps) is added to the forcing dataarray. This will be - used as an additional input feature in the model (temporal embedding). + `da_forcing`. For the state data, slicing is done based on `idx`. For + the forcing/boundary data, nearest neighbor matching is performed based + on the state times (assuming constant timestep size). Additionally, the + time deltas between the matched forcing/boundary times and state times + (in multiples of state time steps) is added to the forcing dataarray. + This will be used as an additional input feature in the model (as + temporal embedding). Parameters ---------- @@ -423,10 +423,9 @@ def _slice_time( da_forcing_matched = xr.concat(da_list, dim="time") - # Generate temporal embedding `time_deltas` for the - # forcing/boundary data. This is the time difference in multiples - # of state time steps between the forcing/boundary time and the - # state time + # Generate time_deltas for the forcing/boundary data. This is the time + # difference in multiples of state time steps between the + # forcing/boundary time and the state time if is_boundary: if self.datastore_boundary.is_forecast: @@ -453,8 +452,8 @@ def _slice_time( time_deltas = da_forcing_matched.isel( grid_index=0, forcing_feature=0 ).window.values - # Add time difference as a new coordinate to concatenate to the - # forcing features later as temporal embedding + # Add time deltas as a new coordinate to concatenate to the + # forcing features later as temporal embedding in the model da_forcing_matched["time_deltas"] = ( ("window"), time_deltas, @@ -465,7 +464,7 @@ def _slice_time( def _process_windowed_data(self, da_windowed, da_state, da_target_times): """Helper function to process windowed data. This function stacks the 'forcing_feature' and 'window' dimensions and adds the time step - differences to the existing features as a temporal embedding. + deltas to the existing features. Parameters ---------- @@ -487,17 +486,16 @@ def _process_windowed_data(self, da_windowed, da_state, da_target_times): if da_windowed is not None: window_size = da_windowed.window.size # Stack the 'feature' and 'window' dimensions and add the - # time step differences to the existing features as a temporal - # embedding + # time deltas to the existing features da_windowed = da_windowed.stack( {stacked_dim: ("forcing_feature", "window")} ) - # Add the time step differences as a new feature to the windowed + # Add the time deltas a new feature to the windowed # data time_deltas = da_windowed["time_deltas"].isel( forcing_feature_windowed=slice(0, window_size) ) - # All data variables share the same temporal embedding + # All data variables share the same time deltas da_windowed = xr.concat( [da_windowed, time_deltas], dim="forcing_feature_windowed", @@ -616,9 +614,8 @@ def _build_item_dataarrays(self, idx): ) / self.da_boundary_std # This function handles the stacking of the forcing and boundary data - # and adds the time step differences as a temporal embedding. - # It can handle `None` inputs for the forcing and boundary data - # (and simlpy return an empty DataArray in that case). + # and adds the time deltas. It can handle `None` inputs for the forcing + # and boundary data (and simlpy return an empty DataArray in that case). da_forcing_windowed = self._process_windowed_data( da_forcing_windowed, da_state, da_target_times ) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index aa7b645d..6031fc81 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -82,7 +82,7 @@ def test_dataset_item_shapes(datastore_name, datastore_boundary_name): assert forcing.ndim == 3 assert forcing.shape[0] == N_pred_steps assert forcing.shape[1] == N_gridpoints - # each time step in the window has one corresponding temporal embedding + # each time step in the window has one corresponding time deltas # that is shared across all grid points, times and variables assert forcing.shape[2] == (datastore.get_num_data_vars("forcing") + 1) * ( num_past_forcing_steps + num_future_forcing_steps + 1 diff --git a/tests/test_time_slicing.py b/tests/test_time_slicing.py index 21038e7b..a8afdacd 100644 --- a/tests/test_time_slicing.py +++ b/tests/test_time_slicing.py @@ -223,11 +223,10 @@ def test_time_slicing_analysis( assert forcing.shape == ( ar_steps, 1, - total_forcing_window - * 2, # Each windowed feature includes temporal embedding + total_forcing_window * 2, # Each windowed feature includes time deltas ) - # Extract the forcing values from the tensor (excluding temporal embeddings) + # Extract the forcing values from the tensor (excluding time deltas) forcing_values = forcing[:, 0, :total_forcing_window] # Compare with expected forcing values @@ -313,11 +312,11 @@ def test_time_slicing_forecast( ar_steps, # Number of AR steps 1, # Number of grid points total_forcing_window # Total number of forcing steps in the window - * 2, # Each windowed feature includes temporal embedding + * 2, # Each windowed feature includes time deltas ) assert forcing.shape == expected_forcing_shape - # Extract the forcing values from the tensor (excluding temporal embeddings) + # Extract the forcing values from the tensor (excluding time deltas) forcing_values = forcing[:, 0, :total_forcing_window] # Compare with expected forcing values From 4a278fd9dea3fd8ce7bce91e88cb6259fa42cc6a Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 14:40:52 +0100 Subject: [PATCH 157/190] Adding some comments about analysis_time indexing --- tests/test_time_slicing.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_time_slicing.py b/tests/test_time_slicing.py index a8afdacd..daec72f2 100644 --- a/tests/test_time_slicing.py +++ b/tests/test_time_slicing.py @@ -248,6 +248,7 @@ def test_time_slicing_forecast( len(STATE_VALUES_FORECAST) ) ELAPSED_FORECAST_DURATION = np.timedelta64(0, "D") + np.arange( + # Retrieving the first analysis_time len(FORCING_VALUES_FORECAST[0]) ) # Create a dummy datastore with forecast data @@ -281,6 +282,7 @@ def test_time_slicing_forecast( # Compute expected initial states and target states based on ar_steps offset = max(0, num_past_forcing_steps - INIT_STEPS) init_idx = INIT_STEPS + offset + # Retrieving the first analysis_time expected_init_states = STATE_VALUES_FORECAST[0][offset:init_idx] expected_target_states = STATE_VALUES_FORECAST[0][ init_idx : init_idx + ar_steps @@ -293,6 +295,8 @@ def test_time_slicing_forecast( for i in range(ar_steps): start_idx = i + init_idx - num_past_forcing_steps end_idx = i + init_idx + num_future_forcing_steps + 1 + # Retrieving the analysis_time relevant for forcing-windows (i.e. + # the first analysis_time after the 2 init_steps) forcing_window = FORCING_VALUES_FORECAST[INIT_STEPS][start_idx:end_idx] expected_forcing_values.append(forcing_window) From c82d22ba9749c8f1fcd04c7bd7e1301881951622 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 14:46:12 +0100 Subject: [PATCH 158/190] moved comments around --- neural_lam/weather_dataset.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 2f37b4b3..0577f89f 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -154,9 +154,6 @@ def __init__( state_times = self.da_state.time self.time_step_state = get_time_step(state_times) if self.da_forcing is not None: - # Forcing data is part of the same datastore as state data - # During creation the time dimension of the forcing data - # is matched to the state data if self.datastore.is_forecast: forcing_times = self.da_forcing.analysis_time self.forecast_step_forcing = self._get_time_step( @@ -165,9 +162,6 @@ def __init__( else: forcing_times = self.da_forcing.time self.time_step_forcing = self._get_time_step(forcing_times.values) - # Boundary data is part of a separate datastore - # The boundary data is allowed to have a different time_step - # Check that the boundary data covers the required time range if self.datastore_boundary.is_forecast: boundary_times = self.da_boundary_forcing.analysis_time self.forecast_step_boundary = self._get_time_step( @@ -177,6 +171,12 @@ def __init__( boundary_times = self.da_boundary_forcing.time self.time_step_boundary = self._get_time_step(boundary_times.values) + # Forcing data is part of the same datastore as state data + # During creation the time dimension of the forcing data + # is matched to the state data + # Boundary data is part of a separate datastore + # The boundary data is allowed to have a different time_step + # Check that the boundary data covers the required time range check_time_overlap( self.da_state, self.da_boundary_forcing, From 6e3f3bd42ad804ba80765369c4688908c701f99c Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Thu, 19 Dec 2024 12:12:01 +0100 Subject: [PATCH 159/190] Make hotfix to make boundary dataset created with mdp work --- neural_lam/datastore/mdp.py | 76 +++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 19 deletions(-) diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 3682a51e..4007c192 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -1,5 +1,6 @@ # Standard library import copy +import functools import warnings from functools import cached_property from pathlib import Path @@ -8,9 +9,9 @@ # Third-party import cartopy.crs as ccrs import mllam_data_prep as mdp +import numpy as np import xarray as xr from loguru import logger -from numpy import ndarray # Local from .base import BaseRegularGridDatastore, CartesianGridShape @@ -86,6 +87,8 @@ def __init__(self, config_path, reuse_existing=True): print("With the following splits (over time):") for split in required_splits: da_split = self._ds.splits.sel(split_name=split) + if "grid_index" in da_split.coords: + da_split = da_split.isel(grid_index=0) da_split_start = da_split.sel(split_part="start").load().item() da_split_end = da_split.sel(split_part="end").load().item() print(f" {split:<8s}: {da_split_start} to {da_split_end}") @@ -266,27 +269,15 @@ def get_dataarray(self, category: str, split: str) -> xr.DataArray: da_category = self._ds[category] - # set units on x y coordinates if missing - for coord in ["x", "y"]: - if "units" not in da_category[coord].attrs: - da_category[coord].attrs["units"] = "m" - # set multi-index for grid-index da_category = da_category.set_index(grid_index=self.CARTESIAN_COORDS) if "time" in da_category.dims: - t_start = ( - self._ds.splits.sel(split_name=split) - .sel(split_part="start") - .load() - .item() - ) - t_end = ( - self._ds.splits.sel(split_name=split) - .sel(split_part="end") - .load() - .item() - ) + da_split = self._ds.splits.sel(split_name=split) + if "grid_index" in da_split.coords: + da_split = da_split.isel(grid_index=0) + t_start = da_split.sel(split_part="start").load().item() + t_end = da_split.sel(split_part="end").load().item() da_category = da_category.sel(time=slice(t_start, t_end)) dim_order = self.expected_dim_order(category=category) @@ -324,6 +315,8 @@ def get_standardization_dataarray(self, category: str) -> xr.Dataset: ) ds_stats = self._ds[stats_variables.keys()].rename(stats_variables) + if "grid_index" in ds_stats.coords: + ds_stats = ds_stats.isel(grid_index=0) return ds_stats @property @@ -404,7 +397,7 @@ def grid_shape_state(self): assert da_x.ndim == da_y.ndim == 1 return CartesianGridShape(x=da_x.size, y=da_y.size) - def get_xy(self, category: str, stacked: bool) -> ndarray: + def get_xy(self, category: str, stacked: bool = True) -> np.ndarray: """Return the x, y coordinates of the dataset. Parameters @@ -449,3 +442,48 @@ def get_xy(self, category: str, stacked: bool) -> ndarray: da_xy = da_xy.transpose(*dims) return da_xy.values + + @functools.lru_cache + def get_lat_lon(self, category: str) -> np.ndarray: + """ + Return the longitude, latitude coordinates of the dataset as numpy + array for a given category of data. + Override in MDP to use lat/lons directly from xr.Dataset, if available. + + Parameters + ---------- + category : str + The category of the dataset (state/forcing/static). + + Returns + ------- + np.ndarray + The longitude, latitude coordinates of the dataset + with shape `[n_grid_points, 2]`. + """ + # Check first if lat/lon saved in ds + lookup_ds = self._ds + if "latitude" in lookup_ds.coords and "longitude" in lookup_ds.coords: + lon = lookup_ds.longitude + lat = lookup_ds.latitude + elif "lat" in lookup_ds.coords and "lon" in lookup_ds.coords: + lon = lookup_ds.lon + lat = lookup_ds.lat + else: + # Not saved, use method from BaseDatastore to derive from x/y + return super().get_lat_lon(category) + + coords = np.stack((lon.values, lat.values), axis=1) + return coords + + @property + def num_grid_points(self) -> int: + """Return the number of grid points in the dataset. + + Returns + ------- + int + The number of grid points in the dataset. + + """ + return len(self._ds.grid_index) From 20ca2636c363fd82323aaff31594259cb5f49fbd Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 15:34:20 +0100 Subject: [PATCH 160/190] Bugfixes --- neural_lam/datastore/mdp.py | 2 +- neural_lam/utils.py | 2 +- neural_lam/weather_dataset.py | 8 ++++---- tests/dummy_datastore.py | 12 ++++++++++++ 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 4007c192..3f1e0441 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -153,7 +153,7 @@ def num_ensemble_members(self) -> int: The number of ensemble members in the dataset. """ - return None + return 1 def get_vars_units(self, category: str) -> List[str]: """Return the units of the variables in the given category. diff --git a/neural_lam/utils.py b/neural_lam/utils.py index f55f17da..c2bc3c57 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -244,7 +244,7 @@ def init_wandb_metrics(wandb_logger, val_steps): experiment.define_metric(f"val_loss_unroll{step}", summary="min") -def get_time_step(self, times): +def get_time_step(times): """Calculate the time step from a time dataarray. Parameters diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 0577f89f..91d68462 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -156,20 +156,20 @@ def __init__( if self.da_forcing is not None: if self.datastore.is_forecast: forcing_times = self.da_forcing.analysis_time - self.forecast_step_forcing = self._get_time_step( + self.forecast_step_forcing = get_time_step( self.da_forcing.elapsed_forecast_duration ) else: forcing_times = self.da_forcing.time - self.time_step_forcing = self._get_time_step(forcing_times.values) + self.time_step_forcing = get_time_step(forcing_times.values) if self.datastore_boundary.is_forecast: boundary_times = self.da_boundary_forcing.analysis_time - self.forecast_step_boundary = self._get_time_step( + self.forecast_step_boundary = get_time_step( self.da_boundary_forcing.elapsed_forecast_duration ) else: boundary_times = self.da_boundary_forcing.time - self.time_step_boundary = self._get_time_step(boundary_times.values) + self.time_step_boundary = get_time_step(boundary_times.values) # Forcing data is part of the same datastore as state data # During creation the time dimension of the forcing data diff --git a/tests/dummy_datastore.py b/tests/dummy_datastore.py index 1bdbc8c8..dcc5510f 100644 --- a/tests/dummy_datastore.py +++ b/tests/dummy_datastore.py @@ -410,6 +410,18 @@ def num_grid_points(self) -> int: """ return self._num_grid_points + @property + def num_ensemble_members(self) -> int: + """Return the number of ensemble members in the dataset. + + Returns + ------- + int + The number of ensemble members in the dataset. + + """ + return 1 + @cached_property def grid_shape_state(self) -> CartesianGridShape: """The shape of the grid for the state variables. From c0c50d5e44580c6a8e486d44cee19be6ed1fd847 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 15:47:16 +0100 Subject: [PATCH 161/190] --- neural_lam/datastore/base.py | 19 +++++++++++++++---- neural_lam/datastore/mdp.py | 12 ------------ neural_lam/datastore/npyfilesmeps/store.py | 10 +++++++++- tests/dummy_datastore.py | 12 ------------ 4 files changed, 24 insertions(+), 29 deletions(-) diff --git a/neural_lam/datastore/base.py b/neural_lam/datastore/base.py index 84600b50..8b51b07e 100644 --- a/neural_lam/datastore/base.py +++ b/neural_lam/datastore/base.py @@ -47,7 +47,6 @@ class BaseDatastore(abc.ABC): each of the `x` and `y` coordinates. """ - is_ensemble: bool = False is_forecast: bool = False @property @@ -299,17 +298,29 @@ def num_grid_points(self) -> int: pass @property - @abc.abstractmethod def num_ensemble_members(self) -> int: """Return the number of ensemble members in the dataset. Returns ------- int - The number of ensemble members in the dataset. + The number of ensemble members in the dataset (default is 1 - + not an ensemble). """ - pass + return 1 + + @property + def is_ensemble(self) -> bool: + """Return whether the dataset represents ensemble data. + + Returns + ------- + bool + True if the dataset represents ensemble data, False otherwise. + + """ + return self.num_ensemble_members > 1 @cached_property @abc.abstractmethod diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 3f1e0441..b82c9277 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -143,18 +143,6 @@ def step_length(self) -> int: da_dt = self._ds["time"].diff("time") return (da_dt.dt.seconds[0] // 3600).item() - @property - def num_ensemble_members(self) -> int: - """The number of ensemble members in the dataset. - - Returns - ------- - int - The number of ensemble members in the dataset. - - """ - return 1 - def get_vars_units(self, category: str) -> List[str]: """Return the units of the variables in the given category. diff --git a/neural_lam/datastore/npyfilesmeps/store.py b/neural_lam/datastore/npyfilesmeps/store.py index 1b0f6065..b4d93ca3 100644 --- a/neural_lam/datastore/npyfilesmeps/store.py +++ b/neural_lam/datastore/npyfilesmeps/store.py @@ -141,7 +141,6 @@ class NpyFilesDatastoreMEPS(BaseRegularGridDatastore): """ SHORT_NAME = "npyfilesmeps" - is_ensemble = True is_forecast = True def __init__( @@ -200,6 +199,15 @@ def config(self) -> NpyDatastoreConfig: @property def num_ensemble_members(self) -> int: + """Return the number of ensemble members in the dataset as defined in + the config file. + + Returns + ------- + int + The number of ensemble members in the dataset. + + """ return self.config.dataset.num_ensemble_members def get_dataarray(self, category: str, split: str) -> DataArray: diff --git a/tests/dummy_datastore.py b/tests/dummy_datastore.py index dcc5510f..1bdbc8c8 100644 --- a/tests/dummy_datastore.py +++ b/tests/dummy_datastore.py @@ -410,18 +410,6 @@ def num_grid_points(self) -> int: """ return self._num_grid_points - @property - def num_ensemble_members(self) -> int: - """Return the number of ensemble members in the dataset. - - Returns - ------- - int - The number of ensemble members in the dataset. - - """ - return 1 - @cached_property def grid_shape_state(self) -> CartesianGridShape: """The shape of the grid for the state variables. From 94de24018ea75a229245f54f3b8aa17cfc2d79a4 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 15:54:17 +0100 Subject: [PATCH 162/190] Add missing check if boundary_forcing is None --- neural_lam/weather_dataset.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 91d68462..bb9934b4 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -144,7 +144,8 @@ def __init__( self.da_state = self.da_state # Check time step consistency in state data and determine time steps - # for state, forcing and boundary data + # for state, forcing and boundary forcing data + # STATE if self.datastore.is_forecast: state_times = self.da_state.analysis_time self.forecast_step_state = get_time_step( @@ -153,6 +154,7 @@ def __init__( else: state_times = self.da_state.time self.time_step_state = get_time_step(state_times) + # FORCING if self.da_forcing is not None: if self.datastore.is_forecast: forcing_times = self.da_forcing.analysis_time @@ -162,6 +164,8 @@ def __init__( else: forcing_times = self.da_forcing.time self.time_step_forcing = get_time_step(forcing_times.values) + # BOUNDARY FORCING + if self.da_boundary_forcing is not None: if self.datastore_boundary.is_forecast: boundary_times = self.da_boundary_forcing.analysis_time self.forecast_step_boundary = get_time_step( @@ -177,14 +181,15 @@ def __init__( # Boundary data is part of a separate datastore # The boundary data is allowed to have a different time_step # Check that the boundary data covers the required time range - check_time_overlap( - self.da_state, - self.da_boundary_forcing, - da1_is_forecast=self.datastore.is_forecast, - da2_is_forecast=self.datastore_boundary.is_forecast, - num_past_steps=self.num_past_boundary_steps, - num_future_steps=self.num_future_boundary_steps, - ) + if self.da_boundary_forcing is not None: + check_time_overlap( + self.da_state, + self.da_boundary_forcing, + da1_is_forecast=self.datastore.is_forecast, + da2_is_forecast=self.datastore_boundary.is_forecast, + num_past_steps=self.num_past_boundary_steps, + num_future_steps=self.num_future_boundary_steps, + ) # Set up for standardization # TODO: This will become part of ar_model.py soon! From 1d14a157e81cef3b6584758390077b5327a82f61 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 20:23:10 +0100 Subject: [PATCH 163/190] bugfix typo in time check --- neural_lam/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_lam/utils.py b/neural_lam/utils.py index c2bc3c57..32f92cf2 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -311,8 +311,8 @@ def check_time_overlap( times_da2 = da2.time time_step_da2 = get_time_step(times_da2.values) - time_min_da2 = da2.min().values - time_max_da2 = da2.max().values + time_min_da2 = times_da2.min().values + time_max_da2 = times_da2.max().values # Calculate required bounds for da2 using its time step da2_required_time_min = time_min_da1 - num_past_steps * time_step_da2 From 7e5797e18f5a251441e98ef9ad63f412c06ef409 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Fri, 20 Dec 2024 21:39:30 +0100 Subject: [PATCH 164/190] introduce crop_time_if_needed to align interior with boundary data --- neural_lam/utils.py | 77 ++++++++++++++++++++++++++++++++++- neural_lam/weather_dataset.py | 30 ++++++++++---- 2 files changed, 98 insertions(+), 9 deletions(-) diff --git a/neural_lam/utils.py b/neural_lam/utils.py index 32f92cf2..2a8ba6ed 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -320,14 +320,87 @@ def check_time_overlap( if time_min_da2 > da2_required_time_min: raise ValueError( - f"The second DataArray ('Boundary forcing'?) data starts too late." + f"The second DataArray (e.g. 'boundary forcing') starts too late." f"Required start: {da2_required_time_min}, " f"but DataArray starts at {time_min_da2}." ) if time_max_da2 < da2_required_time_max: raise ValueError( - f"The second DataArray ('Boundary forcing'?) ends too early." + f"The second DataArray (e.g. 'boundary forcing') ends too early." f"Required end: {da2_required_time_max}, " f"but DataArray ends at {time_max_da2}." ) + + +def crop_time_if_needed( + da1, da2, da1_is_forecast=False, da2_is_forecast=False, num_past_steps=1 +): + """ + Slice away the first few timesteps from the first DataArray (e.g. 'state') + if the second DataArray (e.g. boundary forcing) does not cover that range + (including num_past_steps). + + Parameters + ---------- + da1 : xr.DataArray + The first DataArray to crop. + da2 : xr.DataArray + The second DataArray to compare against. + da1_is_forecast : bool, optional + Whether the first dataarray is forecast data. + da2_is_forecast : bool, optional + Whether the second dataarray is forecast data. + num_past_steps : int + Number of past time steps to consider. + + Return + ------ + da1 : xr.DataArray + The cropped first DataArray and print a warning if any steps are + removed. + """ + if da1 is None or da2 is None: + return da1 + + try: + check_time_overlap( + da1, + da2, + da1_is_forecast, + da2_is_forecast, + num_past_steps, + num_future_steps=0, + ) + return da1 + except ValueError: + # If da2 coverage is insufficient, remove earliest da1 times + # until coverage is possible. Figure out how many steps to remove. + if da1_is_forecast: + da1_tvals = da1.analysis_time.values + else: + da1_tvals = da1.time.values + if da2_is_forecast: + da2_tvals = da2.analysis_time.values + else: + da2_tvals = da2.time.values + + if da1_tvals[0] < da2_tvals[0]: + # Calculate how many steps to remove skip just enough steps so that: + if da2_is_forecast: + # The windowing for forecast type data happens in the + # elapsed_forecast_duration dimension, so we can omit it here. + required_min = da2_tvals[0] + else: + dt = get_time_step(da2_tvals) + required_min = da2_tvals[0] + num_past_steps * dt + first_valid_idx = (da1_tvals >= required_min).argmax() + n_removed = first_valid_idx + if n_removed > 0: + print( + f"Warning: removing {n_removed} da1 (e.g. 'state') " + f"timesteps to align with da2 (e.g. 'boundary forcing') " + f"coverage." + ) + da1 = da1.isel(time=slice(first_valid_idx, None)) + return da1 diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index bb9934b4..66165b6f 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -11,7 +11,11 @@ # First-party from neural_lam.datastore.base import BaseDatastore -from neural_lam.utils import check_time_overlap, get_time_step +from neural_lam.utils import ( + check_time_overlap, + crop_time_if_needed, + get_time_step, +) class WeatherDataset(torch.utils.data.Dataset): @@ -175,12 +179,24 @@ def __init__( boundary_times = self.da_boundary_forcing.time self.time_step_boundary = get_time_step(boundary_times.values) - # Forcing data is part of the same datastore as state data - # During creation the time dimension of the forcing data - # is matched to the state data - # Boundary data is part of a separate datastore - # The boundary data is allowed to have a different time_step - # Check that the boundary data covers the required time range + # Forcing data is part of the same datastore as state data. During + # creation, the time dimension of the forcing data is matched to the + # state data. + # Boundary data is part of a separate datastore The boundary data is + # allowed to have a different time_step Checks that the boundary data + # covers the required time range is required. + + # Crop interior data if boundary coverage is insufficient + if self.da_boundary_forcing is not None: + self.da_state = crop_time_if_needed( + self.da_state, + self.da_boundary_forcing, + da1_is_forecast=self.datastore.is_forecast, + da2_is_forecast=self.datastore_boundary.is_forecast, + num_past_steps=self.num_past_boundary_steps, + ) + + # Now do final overlap check and possibly raise errors if still invalid if self.da_boundary_forcing is not None: check_time_overlap( self.da_state, From b2960956b7aa551bbd26dad92db966c3f737d860 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Fri, 10 Jan 2025 16:17:31 +0100 Subject: [PATCH 165/190] Fix bug in datastore loading in graph creation script --- neural_lam/build_rectangular_graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_lam/build_rectangular_graph.py b/neural_lam/build_rectangular_graph.py index 0280b016..fea578c5 100644 --- a/neural_lam/build_rectangular_graph.py +++ b/neural_lam/build_rectangular_graph.py @@ -9,7 +9,7 @@ # Local from . import utils -from .config import load_config_and_datastore +from .config import load_config_and_datastores WMG_ARCHETYPES = { "keisler": wmg.create.archetype.create_keisler_graph, @@ -81,7 +81,7 @@ def main(input_args=None): args.graph_name is not None ), "Specify the name to save graph as with --graph_name" - _, datastore, datastore_boundary = load_config_and_datastore( + _, datastore, datastore_boundary = load_config_and_datastores( config_path=args.config_path ) From 2f6515d56e2592e19b2b426c77b6c4cd0590a126 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Mon, 13 Jan 2025 11:02:12 +0100 Subject: [PATCH 166/190] Return None from mdp datastore when no forcing is present, to not break handling in WeatherDataset --- neural_lam/datastore/mdp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index b82c9277..2eb96a79 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -4,7 +4,7 @@ import warnings from functools import cached_property from pathlib import Path -from typing import List +from typing import List, Union # Third-party import cartopy.crs as ccrs @@ -217,7 +217,7 @@ def get_num_data_vars(self, category: str) -> int: """ return len(self.get_vars_names(category)) - def get_dataarray(self, category: str, split: str) -> xr.DataArray: + def get_dataarray(self, category: str, split: str) -> Union[xr.DataArray, None]: """ Return the processed data (as a single `xr.DataArray`) for the given category of data and test/train/val-split that covers all the data (in @@ -253,7 +253,7 @@ def get_dataarray(self, category: str, split: str) -> xr.DataArray: """ if category not in self._ds: warnings.warn(f"no {category} data found in datastore") - return [] + return None da_category = self._ds[category] From 1c75fe168f00e91d6e1f8efe95e52c98eaecccf1 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Mon, 13 Jan 2025 11:52:00 +0100 Subject: [PATCH 167/190] Fix time cropping for both start and end of interval --- neural_lam/utils.py | 55 ++++++++++++++++++++++------------- neural_lam/weather_dataset.py | 1 + 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/neural_lam/utils.py b/neural_lam/utils.py index 030082dd..fb0a9e6f 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -448,7 +448,12 @@ def check_time_overlap( def crop_time_if_needed( - da1, da2, da1_is_forecast=False, da2_is_forecast=False, num_past_steps=1 + da1, + da2, + da1_is_forecast=False, + da2_is_forecast=False, + num_past_steps=1, + num_future_steps=1, ): """ Slice away the first few timesteps from the first DataArray (e.g. 'state') @@ -467,6 +472,8 @@ def crop_time_if_needed( Whether the second dataarray is forecast data. num_past_steps : int Number of past time steps to consider. + num_future_steps : int + Number of future time steps to consider. Return ------ @@ -484,7 +491,7 @@ def crop_time_if_needed( da1_is_forecast, da2_is_forecast, num_past_steps, - num_future_steps=0, + num_future_steps, ) return da1 except ValueError: @@ -499,22 +506,30 @@ def crop_time_if_needed( else: da2_tvals = da2.time.values - if da1_tvals[0] < da2_tvals[0]: - # Calculate how many steps to remove skip just enough steps so that: - if da2_is_forecast: - # The windowing for forecast type data happens in the - # elapsed_forecast_duration dimension, so we can omit it here. - required_min = da2_tvals[0] - else: - dt = get_time_step(da2_tvals) - required_min = da2_tvals[0] + num_past_steps * dt - first_valid_idx = (da1_tvals >= required_min).argmax() - n_removed = first_valid_idx - if n_removed > 0: - print( - f"Warning: removing {n_removed} da1 (e.g. 'state') " - f"timesteps to align with da2 (e.g. 'boundary forcing') " - f"coverage." - ) - da1 = da1.isel(time=slice(first_valid_idx, None)) + # Calculate how many steps we would have to remove + if da2_is_forecast: + # The windowing for forecast type data happens in the + # elapsed_forecast_duration dimension, so we can omit it here. + required_min = da2_tvals[0] + required_max = da2_tvals[-1] + else: + dt = get_time_step(da2_tvals) + required_min = da2_tvals[0] + num_past_steps * dt + required_max = da2_tvals[-1] - num_future_steps * dt + + # Calculate how many steps to remove at beginning and end + first_valid_idx = (da1_tvals >= required_min).argmax() + n_removed_begin = first_valid_idx + last_valid_idx_plus_one = ( + da1_tvals > required_max + ).argmax() # To use for slice + n_removed_begin = first_valid_idx + n_removed_end = len(da1_tvals) - last_valid_idx_plus_one + if n_removed_begin > 0 or n_removed_end > 0: + print( + f"Warning: cropping da1 (e.g. 'state') to align with da2 " + f"(e.g. 'boundary forcing'). Removed {n_removed_begin} steps " + f"at start of data interval and {n_removed_end} at the end." + ) + da1 = da1.isel(time=slice(first_valid_idx, last_valid_idx_plus_one)) return da1 diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 66165b6f..c122d722 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -194,6 +194,7 @@ def __init__( da1_is_forecast=self.datastore.is_forecast, da2_is_forecast=self.datastore_boundary.is_forecast, num_past_steps=self.num_past_boundary_steps, + num_future_steps=self.num_future_boundary_steps, ) # Now do final overlap check and possibly raise errors if still invalid From 44c8284653b732cd23025a3073d693c8fd131b5f Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Mon, 13 Jan 2025 12:12:43 +0100 Subject: [PATCH 168/190] Do not icount forcing time delta as input dim if no forcing is used --- neural_lam/models/ar_model.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index c3a8549a..9eace2d7 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -106,13 +106,13 @@ def __init__( interior_static_dim, ) = self.interior_static_features.shape self.num_total_grid_nodes = self.num_interior_nodes - self.interior_dim = ( - 2 * self.grid_output_dim - + interior_static_dim + self.interior_dim = 2 * self.grid_output_dim + interior_static_dim + if num_forcing_vars > 0: + # Interior has forcing, add on forcing dimensions # Time deltas count as one additional forcing_feature - + (num_forcing_vars + 1) - * (num_past_forcing_steps + num_future_forcing_steps + 1) - ) + self.interior_dim = self.interior_dim + (num_forcing_vars + 1) * ( + num_past_forcing_steps + num_future_forcing_steps + 1 + ) # If datastore_boundary is given, the model is forced from the boundary self.boundary_forced = datastore_boundary is not None From bba94a55789bd5f825aaf97d4b382bc6368feb08 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 13 Jan 2025 13:36:06 +0100 Subject: [PATCH 169/190] linter --- neural_lam/plot_graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_lam/plot_graph.py b/neural_lam/plot_graph.py index e9955b03..9eac68ad 100644 --- a/neural_lam/plot_graph.py +++ b/neural_lam/plot_graph.py @@ -136,7 +136,7 @@ def main(): to_pos, "green", 1, - f"Mesh up {level_i}-{level_i+1}", + f"Mesh up {level_i}-{level_i + 1}", ) ) # Add down edges @@ -150,7 +150,7 @@ def main(): to_pos, "green", 1, - f"Mesh down {level_i+1}-{level_i}", + f"Mesh down {level_i + 1}-{level_i}", ) ) From a33b33dfc5a4551534b1796f60689a4ae8e7e38c Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 13 Jan 2025 13:39:36 +0100 Subject: [PATCH 170/190] fixed missing boundary_datastore arg --- neural_lam/models/ar_model.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index c3a8549a..e192903a 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -205,7 +205,9 @@ def _create_dataarray_from_tensor( # TODO: creating an instance of WeatherDataset here on every call is # not how this should be done but whether WeatherDataset should be # provided to ARModel or where to put plotting still needs discussion - weather_dataset = WeatherDataset(datastore=self._datastore, split=split) + weather_dataset = WeatherDataset( + datastore=self._datastore, datastore_boundary=None, split=split + ) time = np.array(time.cpu(), dtype="datetime64[ns]") da = weather_dataset.create_dataarray_from_tensor( tensor=tensor.cpu().numpy(), time=time, category=category From 4e7bd9a59851f65daa012195590ba7155c6b5f13 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 13 Jan 2025 13:45:20 +0100 Subject: [PATCH 171/190] improve cpu capabilities --- neural_lam/models/ar_model.py | 13 ++++++++++--- neural_lam/weather_dataset.py | 3 ++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index e192903a..a0637afe 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -206,11 +206,18 @@ def _create_dataarray_from_tensor( # not how this should be done but whether WeatherDataset should be # provided to ARModel or where to put plotting still needs discussion weather_dataset = WeatherDataset( - datastore=self._datastore, datastore_boundary=None, split=split + datastore=self._datastore, + datastore_boundary=None, + split=split, ) - time = np.array(time.cpu(), dtype="datetime64[ns]") + + # Move to CPU if on GPU + time = time.detach().cpu() + time = np.array(time, dtype="datetime64[ns]") + + tensor = tensor.detach().cpu() da = weather_dataset.create_dataarray_from_tensor( - tensor=tensor.cpu().numpy(), time=time, category=category + tensor=tensor, time=time, category=category ) return da diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 66165b6f..ea74a6a8 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -801,8 +801,9 @@ def _is_listlike(obj): if add_time_as_dim: coords["time"] = time + tensor = tensor.detach().cpu().numpy() da = xr.DataArray( - tensor.cpu().numpy(), + tensor, dims=dims, coords=coords, ) From 7316a0036866c64f3198035002d92930d487e08a Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 13 Jan 2025 15:02:12 +0100 Subject: [PATCH 172/190] format --- neural_lam/datastore/mdp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 2eb96a79..9494e718 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -217,7 +217,9 @@ def get_num_data_vars(self, category: str) -> int: """ return len(self.get_vars_names(category)) - def get_dataarray(self, category: str, split: str) -> Union[xr.DataArray, None]: + def get_dataarray( + self, category: str, split: str + ) -> Union[xr.DataArray, None]: """ Return the processed data (as a single `xr.DataArray`) for the given category of data and test/train/val-split that covers all the data (in From 66727566bef6547dd4b1566d03738dc4675dfeac Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Mon, 13 Jan 2025 15:09:39 +0100 Subject: [PATCH 173/190] bugfix indexing batch-index for time --- neural_lam/models/ar_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index fc4fa757..31378d3d 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -505,7 +505,7 @@ def plot_examples(self, batch, n_examples, split, prediction=None): prediction, target, _, _ = self.common_step(batch) target = batch[1] - time = batch[3] + time = batch[-1] # Rescale to original data scale prediction_rescaled = prediction * self.state_std + self.state_mean From 4d6bbed02dda047b85fee673cd451dfd2b762483 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Mon, 13 Jan 2025 15:55:42 +0100 Subject: [PATCH 174/190] Do not force plot extent to be global --- neural_lam/vis.py | 1 - 1 file changed, 1 deletion(-) diff --git a/neural_lam/vis.py b/neural_lam/vis.py index 10b84fb7..044afa7a 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -78,7 +78,6 @@ def plot_on_axis( """ Plot weather state on given axis """ - ax.set_global() ax.coastlines() # Add coastline outlines extent = datastore.get_xy_extent("state") From ecf05e0abbce4d68e92837295818c22fe3e70cf9 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Tue, 14 Jan 2025 12:08:01 +0100 Subject: [PATCH 175/190] Fix bug making time deltas not be multiple of state time step --- neural_lam/weather_dataset.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 2e186076..602c8702 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -456,9 +456,8 @@ def _slice_time( else: boundary_time_step = self.time_step_boundary state_time_step = self.time_step_state - time_deltas = ( - da_forcing_matched["window"] - * (boundary_time_step / state_time_step), + time_deltas = da_forcing_matched["window"] * ( + boundary_time_step / state_time_step ) else: if self.datastore.is_forecast: @@ -467,18 +466,14 @@ def _slice_time( else: forcing_time_step = self.time_step_forcing state_time_step = self.time_step_state - time_deltas = ( - da_forcing_matched["window"] - * (forcing_time_step / state_time_step), + time_deltas = da_forcing_matched["window"] * ( + forcing_time_step / state_time_step ) - time_deltas = da_forcing_matched.isel( - grid_index=0, forcing_feature=0 - ).window.values # Add time deltas as a new coordinate to concatenate to the # forcing features later as temporal embedding in the model da_forcing_matched["time_deltas"] = ( ("window"), - time_deltas, + time_deltas.values, ) return da_state_sliced, da_forcing_matched From 27061ec667a585c270c96d9fa5b7e3276fc34efb Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Tue, 14 Jan 2025 12:10:20 +0100 Subject: [PATCH 176/190] Do not add time delta features for interior forcing --- neural_lam/models/ar_model.py | 15 ++++++------- neural_lam/weather_dataset.py | 40 +++++++++++++++++++++++------------ 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 31378d3d..9e7bf66d 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -106,13 +106,12 @@ def __init__( interior_static_dim, ) = self.interior_static_features.shape self.num_total_grid_nodes = self.num_interior_nodes - self.interior_dim = 2 * self.grid_output_dim + interior_static_dim - if num_forcing_vars > 0: - # Interior has forcing, add on forcing dimensions - # Time deltas count as one additional forcing_feature - self.interior_dim = self.interior_dim + (num_forcing_vars + 1) * ( - num_past_forcing_steps + num_future_forcing_steps + 1 - ) + self.interior_dim = ( + 2 * self.grid_output_dim + + interior_static_dim + + num_forcing_vars + * (num_past_forcing_steps + num_future_forcing_steps + 1) + ) # If datastore_boundary is given, the model is forced from the boundary self.boundary_forced = datastore_boundary is not None @@ -144,7 +143,7 @@ def __init__( num_future_boundary_steps = args.num_future_boundary_steps self.boundary_dim = ( boundary_static_dim - # Temporal Embedding counts as one additional forcing_feature + # Time delta counts as one additional forcing_feature + (num_boundary_forcing_vars + 1) * (num_past_boundary_steps + num_future_boundary_steps + 1) ) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 602c8702..a1a3d616 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -478,7 +478,9 @@ def _slice_time( return da_state_sliced, da_forcing_matched - def _process_windowed_data(self, da_windowed, da_state, da_target_times): + def _process_windowed_data( + self, da_windowed, da_state, da_target_times, add_time_deltas=True + ): """Helper function to process windowed data. This function stacks the 'forcing_feature' and 'window' dimensions and adds the time step deltas to the existing features. @@ -491,6 +493,9 @@ def _process_windowed_data(self, da_windowed, da_state, da_target_times): The state dataarray. da_target_times : xr.DataArray The target times. + add_time_deltas : bool + If time deltas to each window position should be concatenated + as features Returns ------- @@ -507,16 +512,17 @@ def _process_windowed_data(self, da_windowed, da_state, da_target_times): da_windowed = da_windowed.stack( {stacked_dim: ("forcing_feature", "window")} ) - # Add the time deltas a new feature to the windowed - # data - time_deltas = da_windowed["time_deltas"].isel( - forcing_feature_windowed=slice(0, window_size) - ) - # All data variables share the same time deltas - da_windowed = xr.concat( - [da_windowed, time_deltas], - dim="forcing_feature_windowed", - ) + if add_time_deltas: + # Add the time deltas a new feature to the windowed + # data + time_deltas = da_windowed["time_deltas"].isel( + forcing_feature_windowed=slice(0, window_size) + ) + # All data variables share the same time deltas + da_windowed = xr.concat( + [da_windowed, time_deltas], + dim="forcing_feature_windowed", + ) else: # Create empty DataArray with the correct dimensions and coordinates da_windowed = xr.DataArray( @@ -633,11 +639,19 @@ def _build_item_dataarrays(self, idx): # This function handles the stacking of the forcing and boundary data # and adds the time deltas. It can handle `None` inputs for the forcing # and boundary data (and simlpy return an empty DataArray in that case). + # We don't need time delta features for interior forcing, as these + # deltas are always the same. da_forcing_windowed = self._process_windowed_data( - da_forcing_windowed, da_state, da_target_times + da_forcing_windowed, + da_state, + da_target_times, + add_time_deltas=False, ) da_boundary_windowed = self._process_windowed_data( - da_boundary_windowed, da_state, da_target_times + da_boundary_windowed, + da_state, + da_target_times, + add_time_deltas=True, ) return ( From 64a28c3938aefdeb7c3a34c38b2ee141a3c3f6e9 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Tue, 14 Jan 2025 15:52:37 +0100 Subject: [PATCH 177/190] Compute forcing time deltas per sample to accurately represent shift between interior and boundary time steps --- neural_lam/weather_dataset.py | 44 +++++++---------------------------- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index a1a3d616..8903f234 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -431,13 +431,15 @@ def _slice_time( forcing_time_idx + num_future_steps + 1, ), ) - + window_time_deltas = (da_window.time - state_time).values da_window = da_window.rename({"time": "window"}) # Assign 'window' coordinate da_window = da_window.assign_coords( window=np.arange(-num_past_steps, num_future_steps + 1) ) + # Assign window time delta coordinate + da_window["window_time_deltas"] = ("window", window_time_deltas) da_window = da_window.expand_dims(dim={"time": [state_time]}) @@ -445,37 +447,6 @@ def _slice_time( da_forcing_matched = xr.concat(da_list, dim="time") - # Generate time_deltas for the forcing/boundary data. This is the time - # difference in multiples of state time steps between the - # forcing/boundary time and the state time - - if is_boundary: - if self.datastore_boundary.is_forecast: - boundary_time_step = self.forecast_step_boundary - state_time_step = self.forecast_step_state - else: - boundary_time_step = self.time_step_boundary - state_time_step = self.time_step_state - time_deltas = da_forcing_matched["window"] * ( - boundary_time_step / state_time_step - ) - else: - if self.datastore.is_forecast: - forcing_time_step = self.forecast_step_forcing - state_time_step = self.forecast_step_state - else: - forcing_time_step = self.time_step_forcing - state_time_step = self.time_step_state - time_deltas = da_forcing_matched["window"] * ( - forcing_time_step / state_time_step - ) - # Add time deltas as a new coordinate to concatenate to the - # forcing features later as temporal embedding in the model - da_forcing_matched["time_deltas"] = ( - ("window"), - time_deltas.values, - ) - return da_state_sliced, da_forcing_matched def _process_windowed_data( @@ -514,9 +485,12 @@ def _process_windowed_data( ) if add_time_deltas: # Add the time deltas a new feature to the windowed - # data - time_deltas = da_windowed["time_deltas"].isel( - forcing_feature_windowed=slice(0, window_size) + # data, as a multiple of the state time step + time_deltas = ( + da_windowed["window_time_deltas"].isel( + forcing_feature_windowed=slice(0, window_size) + ) + / self.time_step_state ) # All data variables share the same time deltas da_windowed = xr.concat( From 284a9543cc32ecdc42b7da01c0488b6de64cf94b Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Tue, 14 Jan 2025 18:02:14 +0100 Subject: [PATCH 178/190] Implement time delta encodings --- neural_lam/models/ar_model.py | 18 ++++++++- neural_lam/models/base_graph_model.py | 56 +++++++++++++++++++++++++++ neural_lam/train_model.py | 7 ++++ neural_lam/weather_dataset.py | 1 + 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 9e7bf66d..c0c69bfe 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -139,14 +139,30 @@ def __init__( num_boundary_forcing_vars = datastore_boundary.get_num_data_vars( category="forcing" ) + + # Dimensionality of encoded time deltas + self.time_delta_enc_dim = ( + args.hidden_dim + if args.time_delta_enc_dim is None + else args.time_delta_enc_dim + ) + assert self.time_delta_enc_dim % 2 == 0, ( + "Number of dimensions to use for time delta encoding must be " + "even (sin and cos)" + ) + num_past_boundary_steps = args.num_past_boundary_steps num_future_boundary_steps = args.num_future_boundary_steps self.boundary_dim = ( boundary_static_dim # Time delta counts as one additional forcing_feature - + (num_boundary_forcing_vars + 1) + + (num_boundary_forcing_vars + self.time_delta_enc_dim) * (num_past_boundary_steps + num_future_boundary_steps + 1) ) + # How many of the last boundary forcing dims contain time-deltas + self.boundary_time_delta_dims = ( + num_past_boundary_steps + num_future_boundary_steps + 1 + ) self.num_total_grid_nodes += self.num_boundary_nodes diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index 91e6afb4..6979426e 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -118,6 +118,25 @@ def __init__( layer_norm=False, ) # No layer norm on this one + # Compute constants for use in time_delta encoding + step_length_ratio = ( + datastore_boundary.step_length / datastore.step_length + ) + min_time_delta = -(args.num_past_boundary_steps + 1) * step_length_ratio + max_time_delta = args.num_future_boundary_steps * step_length_ratio + time_delta_magnitude = max(max_time_delta, abs(min_time_delta)) + + freq_indices = 1.0 + torch.arange( + self.time_delta_enc_dim // 2, + dtype=torch.float, + ) + self.register_buffer( + "enc_freq_denom", + (2 * time_delta_magnitude) + ** (2 * freq_indices / self.time_delta_enc_dim), + persistent=False, + ) + @property def num_mesh_nodes(self): """ @@ -177,6 +196,9 @@ def predict_step( ) if self.boundary_forced: + # sin-encode time deltas for boundary forcing + boundary_forcing = self.encode_forcing_time_deltas(boundary_forcing) + # Create full boundary node features of shape # (B, num_boundary_nodes, boundary_dim) boundary_features = torch.cat( @@ -255,3 +277,37 @@ def predict_step( # Residual connection for full state return prev_state + rescaled_delta_mean, pred_std + + def encode_forcing_time_deltas(self, boundary_forcing): + + """ + TODO + boundary_forcing: (B, num_nodes, num_forcing_dims) + """ + # Extract time delta dimensions + time_deltas = boundary_forcing[..., -self.boundary_time_delta_dims :] + # (B, num_boundary_nodes, num_time_deltas) + + # Compute sinusoidal encodings + frequencies = time_deltas.unsqueeze(-1) / self.enc_freq_denom + # (B, num_boundary_nodes, num_time_deltas, num_freq) + encodings_stacked = torch.cat( + ( + torch.sin(frequencies), + torch.cos(frequencies), + ), + dim=-1, + ) + # (B, num_boundary_nodes, num_time_deltas, 2*num_freq) + + encoded_time_deltas = encodings_stacked.flatten(-2, -1) + # (B, num_boundary_nodes, num_encoding_dims) + + # Put together encoded time deltas with rest of boundary_forcing + return torch.cat( + ( + boundary_forcing[..., : -self.boundary_time_delta_dims], + encoded_time_deltas, + ), + dim=-1, + ) diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index 614d6f1f..a18ae92a 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -123,6 +123,13 @@ def main(input_args=None): " grid nodes. Note that this requires the same dimensionality for " "both kinds of grid inputs. (default: False (no))", ) + parser.add_argument( + "--time_delta_enc_dim", + type=int, + help="Dimensionality of positional encoding for time deltas of boundary" + " forcing. If None, same as hidden_dim. If given, must be even " + "(default: None)", + ) # Training options parser.add_argument( diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 8903f234..9668ebf7 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -407,6 +407,7 @@ def _slice_time( da_sliced = da_sliced.assign_coords( window=np.arange(-num_past_steps, num_future_steps + 1) ) + # TODO Compute window_time_deltas for fc data da_sliced = da_sliced.expand_dims( dim={"time": [current_time.values]} From eb31f32659756818a93ab5626f13513b7c9a3f38 Mon Sep 17 00:00:00 2001 From: Simon Adamov Date: Wed, 15 Jan 2025 09:28:06 +0100 Subject: [PATCH 179/190] implemented correct delta_times for forecasts --- neural_lam/weather_dataset.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 9668ebf7..27621cc3 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -407,7 +407,13 @@ def _slice_time( da_sliced = da_sliced.assign_coords( window=np.arange(-num_past_steps, num_future_steps + 1) ) - # TODO Compute window_time_deltas for fc data + # Calculate window time deltas for forecast data + window_time_deltas = ( + da_forcing.elapsed_forecast_duration[start_idx:end_idx].values + - da_forcing.elapsed_forecast_duration[step_idx].values + ) + # Assign window time delta coordinate + da_sliced["window_time_deltas"] = ("window", window_time_deltas) da_sliced = da_sliced.expand_dims( dim={"time": [current_time.values]} From a3a548c57bab931d744438aac4e0ea8f3668600b Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Thu, 16 Jan 2025 17:04:47 +0100 Subject: [PATCH 180/190] Add docstring for encoding function --- neural_lam/models/base_graph_model.py | 22 +++++++++++++++++++--- neural_lam/weather_dataset.py | 6 ++++-- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index 6979426e..4d99bb17 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -279,10 +279,26 @@ def predict_step( return prev_state + rescaled_delta_mean, pred_std def encode_forcing_time_deltas(self, boundary_forcing): - """ - TODO - boundary_forcing: (B, num_nodes, num_forcing_dims) + Build sinusoidal encodings of time deltas in boundary forcing. Removes + original time delta features and replaces these with encoded sinusoidal + features, returning the full new forcing tensor. + + Parameters + ---------- + boundary_forcing : torch.Tensor + Tensor of shape (B, num_nodes, num_forcing_dims) containing boundary + forcing features. Time delta features are the last + self.boundary_time_delta_dims dimensions of the num_forcing_dims + feature dimensions. + + + Returns + ------- + encoded_forcing : torch.Tensor + Tensor of shape (B, num_nodes, num_forcing_dims'), where the + time delta features have been removed and encoded versions added. + Note that this might change the number of feature dimensions. """ # Extract time delta dimensions time_deltas = boundary_forcing[..., -self.boundary_time_delta_dims :] diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 27621cc3..a6b52b88 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -409,10 +409,12 @@ def _slice_time( ) # Calculate window time deltas for forecast data window_time_deltas = ( - da_forcing.elapsed_forecast_duration[start_idx:end_idx].values + da_forcing.elapsed_forecast_duration[ + start_idx:end_idx + ].values - da_forcing.elapsed_forecast_duration[step_idx].values ) - # Assign window time delta coordinate + # Assign window time delta coordinate da_sliced["window_time_deltas"] = ("window", window_time_deltas) da_sliced = da_sliced.expand_dims( From d95c1bfde6715e25536d9f4dfc47594f793e4517 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Thu, 16 Jan 2025 17:37:08 +0100 Subject: [PATCH 181/190] Add option for number of HPC nodes Squashed commit of the following: commit 20dfb3a57ebfcca358a890127c605e27fb570a49 Author: sadamov Date: Thu Jan 16 15:12:02 2025 +0100 linting commit 6bbe5fbd19e69e89acd31637dcf5840a1855e024 Author: sadamov Date: Thu Jan 16 15:08:50 2025 +0100 simplify multi-node and checkpointing commit a80921457da9ee0deed8dd8c7779663343d1b8af Author: sadamov Date: Wed Jan 15 21:22:33 2025 +0100 only keep wandb checkpoint commit c074651cee70457b046a98a0874f9e1ebc215277 Author: sadamov Date: Wed Jan 15 21:00:55 2025 +0100 fixed checkpoints commit 4c16d1d0bc227cf0d2d1945c7b4e7c78b83ffbe4 Author: sadamov Date: Wed Jan 15 19:20:26 2025 +0100 upload checkpoints to wandb commit c30c6b9ea4783a744631bd0c5b7be84d3c5adcdd Author: sadamov Date: Wed Jan 15 13:55:54 2025 +0100 multinode training --- neural_lam/train_model.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index a18ae92a..356e4300 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -49,6 +49,12 @@ def main(input_args=None): default=4, help="Number of workers in data loader (default: 4)", ) + parser.add_argument( + "--num_nodes", + type=int, + default=1, + help="Number of nodes to use in DDP (default: 1)", + ) parser.add_argument( "--epochs", type=int, @@ -81,8 +87,7 @@ def main(input_args=None): "--graph_name", type=str, default="multiscale", - help="Graph to load and use in graph-based model " - "(default: multiscale)", + help="Graph to load and use in graph-based model (default: multiscale)", ) parser.add_argument( "--hidden_dim", @@ -314,6 +319,7 @@ def main(input_args=None): deterministic=True, strategy="ddp", accelerator=device_name, + num_nodes=args.num_nodes, logger=logger, log_every_n_steps=1, callbacks=[checkpoint_callback], From ee23f482d1a69e9d61f4288401259410e4ed289f Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Thu, 16 Jan 2025 18:31:04 +0100 Subject: [PATCH 182/190] Standardize static features for interior Squashed commit of the following: commit 1a284599e374e041cb05adc32cfcbecdaad55889 Author: joeloskarsson Date: Thu Jan 16 18:03:06 2025 +0100 Add standardization test commit 3f53b53f26bacb1e988bab77c80cc09572e7e5a0 Author: joeloskarsson Date: Mon Dec 9 19:01:04 2024 -0800 Add changelog entry commit 97aa8dd9c4b62ee7a8904b9fc0ee177bf5d575ea Author: joeloskarsson Date: Mon Dec 9 18:58:12 2024 -0800 ... and docstring commit 8f78ad98aeffbd7ac3c2c2c8318297d98b34c371 Author: joeloskarsson Date: Mon Dec 9 18:55:31 2024 -0800 Add standardize keyword also to dummydatastore in tests commit 0805ff61763999bec5153a2d85ff5101c578e984 Author: joeloskarsson Date: Sat Dec 7 22:48:04 2024 +0100 Implement standardization of static features --- CHANGELOG.md | 2 ++ neural_lam/datastore/base.py | 31 +++++++++++++++++++++- neural_lam/datastore/mdp.py | 11 ++++++-- neural_lam/datastore/npyfilesmeps/store.py | 9 ++++++- neural_lam/models/ar_model.py | 2 +- tests/dummy_datastore.py | 12 +++++++-- tests/test_datastores.py | 21 +++++++++++++++ 7 files changed, 81 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 32961b16..42d81149 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [\#66](https://github.com/mllam/neural-lam/pull/66) @leifdenby @sadamov +- Implement standardization of static features when loaded in ARModel [\#96](https://github.com/mllam/neural-lam/pull/96) @joeloskarsson + ### Fixed - Fix wandb environment variable disabling wandb during tests. Now correctly uses WANDB_MODE=disabled. [\#94](https://github.com/mllam/neural-lam/pull/94) @joeloskarsson diff --git a/neural_lam/datastore/base.py b/neural_lam/datastore/base.py index fc60208c..701dba83 100644 --- a/neural_lam/datastore/base.py +++ b/neural_lam/datastore/base.py @@ -185,9 +185,36 @@ def get_standardization_dataarray(self, category: str) -> xr.Dataset: """ pass + def _standardize_datarray( + self, da: xr.DataArray, category: str + ) -> xr.DataArray: + """ + Helper function to standardize a dataarray before returning it. + + Parameters + ---------- + da: xr.DataArray + The dataarray to standardize + category : str + The category of the dataarray (state/forcing/static), to load + standardization statistics for. + + Returns + ------- + xr.Dataarray + The standardized dataarray + """ + + standard_da = self.get_standardization_dataarray(category=category) + + mean = standard_da[f"{category}_mean"] + std = standard_da[f"{category}_std"] + + return (da - mean) / std + @abc.abstractmethod def get_dataarray( - self, category: str, split: str + self, category: str, split: str, standardize: bool = False ) -> Union[xr.DataArray, None]: """ Return the processed data (as a single `xr.DataArray`) for the given @@ -216,6 +243,8 @@ def get_dataarray( The category of the dataset (state/forcing/static). split : str The time split to filter the dataset (train/val/test). + standardize: bool + If the dataarray should be returned standardized Returns ------- diff --git a/neural_lam/datastore/mdp.py b/neural_lam/datastore/mdp.py index 9494e718..9582c558 100644 --- a/neural_lam/datastore/mdp.py +++ b/neural_lam/datastore/mdp.py @@ -218,7 +218,7 @@ def get_num_data_vars(self, category: str) -> int: return len(self.get_vars_names(category)) def get_dataarray( - self, category: str, split: str + self, category: str, split: str, standardize: bool = False ) -> Union[xr.DataArray, None]: """ Return the processed data (as a single `xr.DataArray`) for the given @@ -246,6 +246,8 @@ def get_dataarray( The category of the dataset (state/forcing/static). split : str The time split to filter the dataset (train/val/test). + standardize: bool + If the dataarray should be returned standardized Returns ------- @@ -271,7 +273,12 @@ def get_dataarray( da_category = da_category.sel(time=slice(t_start, t_end)) dim_order = self.expected_dim_order(category=category) - return da_category.transpose(*dim_order) + da_category = da_category.transpose(*dim_order) + + if standardize: + return self._standardize_datarray(da_category, category=category) + + return da_category def get_standardization_dataarray(self, category: str) -> xr.Dataset: """ diff --git a/neural_lam/datastore/npyfilesmeps/store.py b/neural_lam/datastore/npyfilesmeps/store.py index 2a6ed302..22588a06 100644 --- a/neural_lam/datastore/npyfilesmeps/store.py +++ b/neural_lam/datastore/npyfilesmeps/store.py @@ -210,7 +210,9 @@ def num_ensemble_members(self) -> int: """ return self.config.dataset.num_ensemble_members - def get_dataarray(self, category: str, split: str) -> DataArray: + def get_dataarray( + self, category: str, split: str, standardize: bool = False + ) -> DataArray: """ Get the data array for the given category and split of data. If the category is 'state', the data array will be a concatenation of the data @@ -225,6 +227,8 @@ def get_dataarray(self, category: str, split: str) -> DataArray: split : str The dataset split to load the data for. One of 'train', 'val', or 'test'. + standardize: bool + If the dataarray should be returned standardized Returns ------- @@ -314,6 +318,9 @@ def get_dataarray(self, category: str, split: str) -> DataArray: dim_order = self.expected_dim_order(category=category) da = da.transpose(*dim_order) + if standardize: + return self._standardize_datarray(da, category=category) + return da def _get_single_timeseries_dataarray( diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index c0c69bfe..30a02cb9 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -46,7 +46,7 @@ def __init__( # Load static features for interior da_static_features = datastore.get_dataarray( - category="static", split=None + category="static", split=None, standardize=True ) self.register_buffer( "interior_static_features", diff --git a/tests/dummy_datastore.py b/tests/dummy_datastore.py index 9c8f8289..ede2aa13 100644 --- a/tests/dummy_datastore.py +++ b/tests/dummy_datastore.py @@ -295,7 +295,7 @@ def get_standardization_dataarray(self, category: str) -> xr.Dataset: return ds_standardization def get_dataarray( - self, category: str, split: str + self, category: str, split: str, standardize: bool = False ) -> Union[xr.DataArray, None]: """ Return the processed data (as a single `xr.DataArray`) for the given @@ -324,6 +324,8 @@ def get_dataarray( The category of the dataset (state/forcing/static). split : str The time split to filter the dataset (train/val/test). + standardize: bool + If the dataarray should be returned standardized Returns ------- @@ -332,7 +334,13 @@ def get_dataarray( """ dim_order = self.expected_dim_order(category=category) - return self.ds[category].transpose(*dim_order) + + da_category = self.ds[category].transpose(*dim_order) + + if standardize: + return self._standardize_datarray(da_category, category=category) + + return da_category def get_xy(self, category: str, stacked: bool = True) -> ndarray: """Return the x, y coordinates of the dataset. diff --git a/tests/test_datastores.py b/tests/test_datastores.py index a91f6245..ff7435c9 100644 --- a/tests/test_datastores.py +++ b/tests/test_datastores.py @@ -361,3 +361,24 @@ def test_plot_example_from_datastore(datastore_name): assert fig is not None assert fig.get_axes() + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +@pytest.mark.parametrize("category", ("state", "static")) +def test_get_standardized_da(datastore_name, category): + """Check that dataarray is actually standardized when calling + get_dataarray with standardize=True""" + datastore = init_datastore_example(datastore_name) + ds_stats = datastore.get_standardization_dataarray(category=category) + + mean = ds_stats[f"{category}_mean"] + std = ds_stats[f"{category}_std"] + + non_standard_da = datastore.get_dataarray( + category=category, split="train", standardize=False + ) + standard_da = datastore.get_dataarray( + category=category, split="train", standardize=True + ) + + assert np.allclose(standard_da, (non_standard_da - mean) / std, atol=1e-6) From 17065393f941090b99426e71529585a5cd1997a7 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Thu, 16 Jan 2025 18:32:45 +0100 Subject: [PATCH 183/190] Standardize static boundary features --- neural_lam/models/ar_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 30a02cb9..0ce523c2 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -119,7 +119,7 @@ def __init__( if self.boundary_forced: # Load static features for boundary da_boundary_static_features = datastore_boundary.get_dataarray( - category="static", split=None + category="static", split=None, standardize=True ) self.register_buffer( "boundary_static_features", From 29fbc1d359aaca4cf7dca859cb5290f37feaaf7a Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Thu, 16 Jan 2025 18:43:27 +0100 Subject: [PATCH 184/190] Add functionality for constraining values of predicted variables Squashed commit of the following: commit f10886f78f66103cd920d58dffc09575f3ec37f5 Author: Simon Kamuk Christiansen Date: Tue Jan 14 15:48:02 2025 +0000 set clamp lims as buffers. Updated clamping test with correct r2m limit commit bb40e8226a857bf81a76de3d7d88eeb0848c54cd Author: Simon Kamuk Christiansen Date: Tue Jan 14 15:21:30 2025 +0000 linting commit afeee02daf6bbd7cf53ef516db6ce16db5d31e49 Author: Simon Kamuk Christiansen Date: Tue Jan 14 15:16:38 2025 +0000 review suggestions commit 18cb4721ce645415f224c28a75148d7a8bcae747 Author: SimonKamuk <43374850+SimonKamuk@users.noreply.github.com> Date: Tue Jan 14 15:43:38 2025 +0100 Update neural_lam/models/base_graph_model.py Co-authored-by: Joel Oskarsson commit 59793934a59a286b0d97df8468a815849b69cf13 Author: SimonKamuk <43374850+SimonKamuk@users.noreply.github.com> Date: Tue Jan 14 15:42:24 2025 +0100 Update README.md Co-authored-by: Joel Oskarsson commit f53ae59b8198421dacca88030fc14f68b984ed41 Author: Simon Kamuk Christiansen Date: Fri Jan 10 13:07:52 2025 +0000 update docstring and name of clamping function commit 82844e3b8498130faa738bd0025e707df08cab35 Author: Simon Kamuk Christiansen Date: Tue Dec 17 08:17:20 2024 +0000 linting commit 0681a3559dbe2c7978c7d30215a38a2027a340b2 Author: SimonKamuk <43374850+SimonKamuk@users.noreply.github.com> Date: Mon Dec 16 13:57:18 2024 +0100 Update README.md Added description of clamping feature in config.yaml commit 3bc51ab8414eb7dcd18e0d98fd2faa05a69efa0f Author: SimonKamuk <43374850+SimonKamuk@users.noreply.github.com> Date: Mon Dec 16 09:50:30 2024 +0100 fix typo changed prepare_clamping_parames to prepare_clamping_params commit 5c7567d096e1d0068c7f44325fcc9d1793dc4e9a Author: Simon Kamuk Christiansen Date: Wed Dec 11 13:41:10 2024 +0000 added test commit 5ba72ceb373848fa63acdeab8c194b278f6c17e6 Merge: da1480c 1a12826 Author: SimonKamuk <43374850+SimonKamuk@users.noreply.github.com> Date: Tue Dec 10 09:39:33 2024 +0100 Merge branch 'mllam:main' into feat/prediction_constraints commit da1480c30403a3fe58c0597e997fb52ff9f25105 Author: Simon Kamuk Christiansen Date: Wed Dec 4 10:40:03 2024 +0000 prevent inverse sigmoid and softplus from returning +/- inf commit 4a27c85723ca4d22306f52808bfe5362a64d8b8c Author: Simon Kamuk Christiansen Date: Wed Dec 4 10:26:57 2024 +0000 update clamping method to use inverse method commit a247f9a849f02c2835761b3142cd29478beba269 Author: Simon Kamuk Christiansen Date: Fri Nov 29 10:19:17 2024 +0000 ensure only state delta is clamped but enforcing limits on the final outputted state commit 50ce774299e78ea04ba791795cb890712562bf49 Author: Simon Kamuk Christiansen Date: Fri Nov 29 08:57:41 2024 +0000 linting commit 4f13df76c9a6d52a0130ced0db706e051dda985e Author: Simon Kamuk Christiansen Date: Thu Nov 28 14:30:19 2024 +0000 initial commit, add ability to clamp predicted outputs to limits supplied in config --- README.md | 18 +- neural_lam/config.py | 21 ++ neural_lam/models/base_graph_model.py | 192 +++++++++++- neural_lam/utils.py | 33 ++ .../mdp/danra_100m_winds/config.yaml | 9 + .../mdp/danra_100m_winds/danra.datastore.yaml | 20 +- tests/test_clamping.py | 283 ++++++++++++++++++ 7 files changed, 570 insertions(+), 6 deletions(-) create mode 100644 tests/test_clamping.py diff --git a/README.md b/README.md index 7a5e5caf..20f09c86 100644 --- a/README.md +++ b/README.md @@ -154,11 +154,23 @@ training: weights: u100m: 1.0 v100m: 1.0 + t2m: 1.0 + r2m: 1.0 + output_clamping: + lower: + t2m: 0.0 + r2m: 0 + upper: + r2m: 1.0 ``` -For now the neural-lam config only defines two things: -1) the kind of datastores and the path to their config -2) the weighting of different features in the loss function. If you don't define the state feature weighting it will default to weighting all features equally. +For now the neural-lam config only defines few things: + +1. The kind of datastore and the path to its config +2. The weighting of different features in +the loss function. If you don't define the state feature weighting it will default to +weighting all features equally. +3. Valid numerical range for output of each feature.The numerical range of all features default to $]-\infty, \infty[$. (This example is taken from the `tests/datastore_examples/mdp` directory.) diff --git a/neural_lam/config.py b/neural_lam/config.py index 4b57a141..49440953 100644 --- a/neural_lam/config.py +++ b/neural_lam/config.py @@ -68,6 +68,23 @@ class UniformFeatureWeighting: pass +@dataclasses.dataclass +class OutputClamping: + """ + Configuration for clamping the output of the model. + + Attributes + ---------- + lower : Dict[str, float] + The minimum value to clamp each output feature to. + upper : Dict[str, float] + The maximum value to clamp each output feature to. + """ + + lower: Dict[str, float] = dataclasses.field(default_factory=dict) + upper: Dict[str, float] = dataclasses.field(default_factory=dict) + + @dataclasses.dataclass class TrainingConfig: """ @@ -86,6 +103,10 @@ class TrainingConfig: ManualStateFeatureWeighting, UniformFeatureWeighting ] = dataclasses.field(default_factory=UniformFeatureWeighting) + output_clamping: OutputClamping = dataclasses.field( + default_factory=OutputClamping + ) + @dataclasses.dataclass class NeuralLAMConfig(dataclass_wizard.JSONWizard, dataclass_wizard.YAMLWizard): diff --git a/neural_lam/models/base_graph_model.py b/neural_lam/models/base_graph_model.py index 4d99bb17..0e004935 100644 --- a/neural_lam/models/base_graph_model.py +++ b/neural_lam/models/base_graph_model.py @@ -137,6 +137,9 @@ def __init__( persistent=False, ) + # Compute indices and define clamping functions + self.prepare_clamping_params(config, datastore) + @property def num_mesh_nodes(self): """ @@ -144,6 +147,189 @@ def num_mesh_nodes(self): """ raise NotImplementedError("num_mesh_nodes not implemented") + def prepare_clamping_params( + self, config: NeuralLAMConfig, datastore: BaseDatastore + ): + """ + Prepare parameters for clamping predicted values to valid range + """ + + # Read configs + state_feature_names = datastore.get_vars_names(category="state") + lower_lims = config.training.output_clamping.lower + upper_lims = config.training.output_clamping.upper + + # Check that limits in config are for valid features + unknown_features_lower = set(lower_lims.keys()) - set( + state_feature_names + ) + unknown_features_upper = set(upper_lims.keys()) - set( + state_feature_names + ) + if unknown_features_lower or unknown_features_upper: + raise ValueError( + "State feature limits were provided for unknown features: " + f"{unknown_features_lower.union(unknown_features_upper)}" + ) + + # Constant parameters for clamping + sigmoid_sharpness = 1 + softplus_sharpness = 1 + sigmoid_center = 0 + softplus_center = 0 + + normalize_clamping_lim = ( + lambda x, feature_idx: (x - self.state_mean[feature_idx]) + / self.state_std[feature_idx] + ) + + # Check which clamping functions to use for each feature + sigmoid_lower_upper_idx = [] + sigmoid_lower_lims = [] + sigmoid_upper_lims = [] + + softplus_lower_idx = [] + softplus_lower_lims = [] + + softplus_upper_idx = [] + softplus_upper_lims = [] + + for feature_idx, feature in enumerate(state_feature_names): + if feature in lower_lims and feature in upper_lims: + assert ( + lower_lims[feature] < upper_lims[feature] + ), f'Invalid clamping limits for feature "{feature}",\ + lower: {lower_lims[feature]}, larger than\ + upper: {upper_lims[feature]}' + sigmoid_lower_upper_idx.append(feature_idx) + sigmoid_lower_lims.append( + normalize_clamping_lim(lower_lims[feature], feature_idx) + ) + sigmoid_upper_lims.append( + normalize_clamping_lim(upper_lims[feature], feature_idx) + ) + elif feature in lower_lims and feature not in upper_lims: + softplus_lower_idx.append(feature_idx) + softplus_lower_lims.append( + normalize_clamping_lim(lower_lims[feature], feature_idx) + ) + elif feature not in lower_lims and feature in upper_lims: + softplus_upper_idx.append(feature_idx) + softplus_upper_lims.append( + normalize_clamping_lim(upper_lims[feature], feature_idx) + ) + + self.register_buffer( + "sigmoid_lower_lims", torch.tensor(sigmoid_lower_lims) + ) + self.register_buffer( + "sigmoid_upper_lims", torch.tensor(sigmoid_upper_lims) + ) + self.register_buffer( + "softplus_lower_lims", torch.tensor(softplus_lower_lims) + ) + self.register_buffer( + "softplus_upper_lims", torch.tensor(softplus_upper_lims) + ) + + self.register_buffer( + "clamp_lower_upper_idx", torch.tensor(sigmoid_lower_upper_idx) + ) + self.register_buffer( + "clamp_lower_idx", torch.tensor(softplus_lower_idx) + ) + self.register_buffer( + "clamp_upper_idx", torch.tensor(softplus_upper_idx) + ) + + # Define clamping functions + self.clamp_lower_upper = lambda x: ( + self.sigmoid_lower_lims + + (self.sigmoid_upper_lims - self.sigmoid_lower_lims) + * torch.sigmoid(sigmoid_sharpness * (x - sigmoid_center)) + ) + self.clamp_lower = lambda x: ( + self.softplus_lower_lims + + torch.nn.functional.softplus( + x - softplus_center, beta=softplus_sharpness + ) + ) + self.clamp_upper = lambda x: ( + self.softplus_upper_lims + - torch.nn.functional.softplus( + softplus_center - x, beta=softplus_sharpness + ) + ) + + self.inverse_clamp_lower_upper = lambda x: ( + sigmoid_center + + utils.inverse_sigmoid( + (x - self.sigmoid_lower_lims) + / (self.sigmoid_upper_lims - self.sigmoid_lower_lims) + ) + / sigmoid_sharpness + ) + self.inverse_clamp_lower = lambda x: ( + utils.inverse_softplus( + x - self.softplus_lower_lims, beta=softplus_sharpness + ) + + softplus_center + ) + self.inverse_clamp_upper = lambda x: ( + -utils.inverse_softplus( + self.softplus_upper_lims - x, beta=softplus_sharpness + ) + + softplus_center + ) + + def get_clamped_new_state(self, state_delta, prev_state): + """ + Clamp prediction to valid range supplied in config + Returns the clamped new state after adding delta to original state + + Instead of the new state being computed as + $X_{t+1} = X_t + \\delta = X_t + model(\\{X_t,X_{t-1},...\\}, forcing)$ + The clamped values will be + $f(f^{-1}(X_t) + model(\\{X_t, X_{t-1},... \\}, forcing))$ + Which means the model will learn to output values in the range of the + inverse clamping function + + state_delta: (B, num_grid_nodes, feature_dim) + prev_state: (B, num_grid_nodes, feature_dim) + """ + + # Assign new state, but overwrite clamped values of each type later + new_state = prev_state + state_delta + + # Sigmoid/logistic clamps between ]a,b[ + if self.clamp_lower_upper_idx.numel() > 0: + idx = self.clamp_lower_upper_idx + + new_state[:, :, idx] = self.clamp_lower_upper( + self.inverse_clamp_lower_upper(prev_state[:, :, idx]) + + state_delta[:, :, idx] + ) + + # Softplus clamps between ]a,infty[ + if self.clamp_lower_idx.numel() > 0: + idx = self.clamp_lower_idx + + new_state[:, :, idx] = self.clamp_lower( + self.inverse_clamp_lower(prev_state[:, :, idx]) + + state_delta[:, :, idx] + ) + + # Softplus clamps between ]-infty,b[ + if self.clamp_upper_idx.numel() > 0: + idx = self.clamp_upper_idx + + new_state[:, :, idx] = self.clamp_upper( + self.inverse_clamp_upper(prev_state[:, :, idx]) + + state_delta[:, :, idx] + ) + + return new_state + @property def num_grid_connected_mesh_nodes(self): """ @@ -275,8 +461,10 @@ def predict_step( # Rescale with one-step difference statistics rescaled_delta_mean = pred_delta_mean * self.diff_std + self.diff_mean - # Residual connection for full state - return prev_state + rescaled_delta_mean, pred_std + # Clamp values to valid range (also add the delta to the previous state) + new_state = self.get_clamped_new_state(rescaled_delta_mean, prev_state) + + return new_state, pred_std def encode_forcing_time_deltas(self, boundary_forcing): """ diff --git a/neural_lam/utils.py b/neural_lam/utils.py index fb0a9e6f..3bf3f815 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -533,3 +533,36 @@ def crop_time_if_needed( ) da1 = da1.isel(time=slice(first_valid_idx, last_valid_idx_plus_one)) return da1 + + +def inverse_softplus(x, beta=1, threshold=20): + """ + Inverse of torch.nn.functional.softplus + + For x*beta above threshold, returns linear function for numerical + stability. + + Input is clamped to x > ln(1+1e-6)/beta which is approximately positive + values of x. + Note that this torch.clamp_min will make gradients 0, but this is not a + problem as values of x that are this close to 0 have gradients of 0 anyhow. + """ + non_linear_part = ( + torch.log(torch.clamp_min(torch.expm1(x * beta), 1e-6)) / beta + ) + x = torch.where(x * beta <= threshold, non_linear_part, x) + + return x + + +def inverse_sigmoid(x): + """ + Inverse of torch.sigmoid + + Sigmoid output takes values in [0,1], this makes sure input is just within + this interval. + Note that this torch.clamp will make gradients 0, but this is not a problem + as values of x that are this close to 0 or 1 have gradients of 0 anyhow. + """ + x_clamped = torch.clamp(x, min=1e-6, max=1 - 1e-6) + return torch.log(x_clamped / (1 - x_clamped)) diff --git a/tests/datastore_examples/mdp/danra_100m_winds/config.yaml b/tests/datastore_examples/mdp/danra_100m_winds/config.yaml index 0bb5c5ec..8b3362e0 100644 --- a/tests/datastore_examples/mdp/danra_100m_winds/config.yaml +++ b/tests/datastore_examples/mdp/danra_100m_winds/config.yaml @@ -7,3 +7,12 @@ training: weights: u100m: 1.0 v100m: 1.0 + t2m: 1.0 + r2m: 1.0 + output_clamping: + lower: + t2m: 0.0 + r2m: 0 + upper: + r2m: 1.0 + u100m: 100.0 diff --git a/tests/datastore_examples/mdp/danra_100m_winds/danra.datastore.yaml b/tests/datastore_examples/mdp/danra_100m_winds/danra.datastore.yaml index 3edf1267..e601cc02 100644 --- a/tests/datastore_examples/mdp/danra_100m_winds/danra.datastore.yaml +++ b/tests/datastore_examples/mdp/danra_100m_winds/danra.datastore.yaml @@ -55,7 +55,7 @@ inputs: dims: [x, y] target_output_variable: state - danra_surface: + danra_surface_forcing: path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/single_levels.zarr dims: [time, x, y] variables: @@ -73,6 +73,24 @@ inputs: name_format: "{var_name}" target_output_variable: forcing + danra_surface: + path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/single_levels.zarr + dims: [time, x, y] + variables: + - r2m + - t2m + dim_mapping: + time: + method: rename + dim: time + grid_index: + method: stack + dims: [x, y] + state_feature: + method: stack_variables_by_var_name + name_format: "{var_name}" + target_output_variable: state + danra_lsm: path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/lsm.zarr dims: [x, y] diff --git a/tests/test_clamping.py b/tests/test_clamping.py new file mode 100644 index 00000000..f3f9365d --- /dev/null +++ b/tests/test_clamping.py @@ -0,0 +1,283 @@ +# Standard library +from pathlib import Path + +# Third-party +import torch + +# First-party +from neural_lam import config as nlconfig +from neural_lam.create_graph import create_graph_from_datastore +from neural_lam.datastore.mdp import MDPDatastore +from neural_lam.models.graph_lam import GraphLAM +from tests.conftest import init_datastore_example + + +def test_clamping(): + datastore = init_datastore_example(MDPDatastore.SHORT_NAME) + + graph_name = "1level" + + graph_dir_path = Path(datastore.root_path) / "graph" / graph_name + + if not graph_dir_path.exists(): + create_graph_from_datastore( + datastore=datastore, + output_root_path=str(graph_dir_path), + n_max_levels=1, + ) + + class ModelArgs: + output_std = False + loss = "mse" + restore_opt = False + n_example_pred = 1 + graph = graph_name + hidden_dim = 4 + hidden_layers = 1 + processor_layers = 2 + mesh_aggr = "sum" + lr = 1.0e-3 + val_steps_to_log = [1, 3] + metrics_watch = [] + num_past_forcing_steps = 1 + num_future_forcing_steps = 1 + + model_args = ModelArgs() + + config = nlconfig.NeuralLAMConfig( + datastore=nlconfig.DatastoreSelection( + kind=datastore.SHORT_NAME, config_path=datastore.root_path + ), + training=nlconfig.TrainingConfig( + output_clamping=nlconfig.OutputClamping( + lower={"t2m": 0.0, "r2m": 0.0}, + upper={"r2m": 1.0, "u100m": 100.0}, + ) + ), + ) + + model = GraphLAM( + args=model_args, + datastore=datastore, + config=config, + ) + + features = datastore.get_vars_names(category="state") + original_state = torch.zeros(1, 1, len(features)) + zero_delta = original_state.clone() + + # Get a state well within the bounds + original_state[:, :, model.clamp_lower_upper_idx] = ( + model.sigmoid_lower_lims + model.sigmoid_upper_lims + ) / 2 + original_state[:, :, model.clamp_lower_idx] = model.softplus_lower_lims + 10 + original_state[:, :, model.clamp_upper_idx] = model.softplus_upper_lims - 10 + + # Get a delta that tries to push the state out of bounds + delta = torch.ones_like(zero_delta) + delta[:, :, model.clamp_lower_upper_idx] = ( + model.sigmoid_upper_lims - model.sigmoid_lower_lims + ) / 3 + delta[:, :, model.clamp_lower_idx] = -5 + delta[:, :, model.clamp_upper_idx] = 5 + + # Check that a delta of 0 gives unchanged state + zero_prediction = model.get_clamped_new_state(zero_delta, original_state) + assert (abs(original_state - zero_prediction) < 1e-6).all().item() + + # Make predictions towards bounds for each feature + prediction = zero_prediction.clone() + n_loops = 100 + for i in range(n_loops): + prediction = model.get_clamped_new_state(delta, prediction) + + # check that unclamped states are as expected + # delta is 1, so they should be 1*n_loops + assert ( + ( + abs( + prediction[ + :, + :, + list( + set(range(len(features))) + - set(model.clamp_lower_upper_idx.tolist()) + - set(model.clamp_lower_idx.tolist()) + - set(model.clamp_upper_idx.tolist()) + ), + ] + - n_loops + ) + < 1e-6 + ) + .all() + .item() + ) + + # Check that clamped states are within bounds + # they should not be at the bounds but allow it due to numerical precision + assert ( + ( + model.sigmoid_lower_lims + <= prediction[:, :, model.clamp_lower_upper_idx] + <= model.sigmoid_upper_lims + ) + .all() + .item() + ) + assert ( + (model.softplus_lower_lims <= prediction[:, :, model.clamp_lower_idx]) + .all() + .item() + ) + assert ( + (prediction[:, :, model.clamp_upper_idx] <= model.softplus_upper_lims) + .all() + .item() + ) + + # Check that prediction is within bounds in original non-normalized space + unscaled_prediction = prediction * model.state_std + model.state_mean + features_idx = {f: i for i, f in enumerate(features)} + lower_lims = { + features_idx[f]: lim + for f, lim in config.training.output_clamping.lower.items() + } + upper_lims = { + features_idx[f]: lim + for f, lim in config.training.output_clamping.upper.items() + } + assert ( + ( + torch.tensor(list(lower_lims.values())) + <= unscaled_prediction[:, :, list(lower_lims.keys())] + ) + .all() + .item() + ) + assert ( + ( + unscaled_prediction[:, :, list(upper_lims.keys())] + <= torch.tensor(list(upper_lims.values())) + ) + .all() + .item() + ) + + # Check that a prediction from a state starting outside the bounds is also + # pushed within bounds. 3 delta should be enough to give an initial state + # out of bounds so 5 is well outside + invalid_state = original_state + 5 * delta + assert ( + not ( + model.sigmoid_lower_lims + <= invalid_state[:, :, model.clamp_lower_upper_idx] + <= model.sigmoid_upper_lims + ) + .any() + .item() + ) + assert ( + not ( + model.softplus_lower_lims + <= invalid_state[:, :, model.clamp_lower_idx] + ) + .any() + .item() + ) + assert ( + not ( + invalid_state[:, :, model.clamp_upper_idx] + <= model.softplus_upper_lims + ) + .any() + .item() + ) + invalid_prediction = model.get_clamped_new_state(zero_delta, invalid_state) + assert ( + ( + model.sigmoid_lower_lims + <= invalid_prediction[:, :, model.clamp_lower_upper_idx] + <= model.sigmoid_upper_lims + ) + .all() + .item() + ) + assert ( + ( + model.softplus_lower_lims + <= invalid_prediction[:, :, model.clamp_lower_idx] + ) + .all() + .item() + ) + assert ( + ( + invalid_prediction[:, :, model.clamp_upper_idx] + <= model.softplus_upper_lims + ) + .all() + .item() + ) + + # Above tests only check the upper sigmoid limit. + # Repeat to check lower sigmoid limit + + # Make predictions towards bounds for each feature + prediction = zero_prediction.clone() + n_loops = 100 + for i in range(n_loops): + prediction = model.get_clamped_new_state(-delta, prediction) + + # Check that clamped states are within bounds + assert ( + ( + model.sigmoid_lower_lims + <= prediction[:, :, model.clamp_lower_upper_idx] + <= model.sigmoid_upper_lims + ) + .all() + .item() + ) + + # Check that prediction is within bounds in original non-normalized space + assert ( + ( + torch.tensor(list(lower_lims.values())) + <= unscaled_prediction[:, :, list(lower_lims.keys())] + ) + .all() + .item() + ) + assert ( + ( + unscaled_prediction[:, :, list(upper_lims.keys())] + <= torch.tensor(list(upper_lims.values())) + ) + .all() + .item() + ) + + # Check that a prediction from a state starting outside the bounds is also + # pushed within bounds. 3 delta should be enough to give an initial state + # out of bounds so 5 is well outside + invalid_state = original_state - 5 * delta + assert ( + not ( + model.sigmoid_lower_lims + <= invalid_state[:, :, model.clamp_lower_upper_idx] + <= model.sigmoid_upper_lims + ) + .any() + .item() + ) + invalid_prediction = model.get_clamped_new_state(zero_delta, invalid_state) + assert ( + ( + model.sigmoid_lower_lims + <= invalid_prediction[:, :, model.clamp_lower_upper_idx] + <= model.sigmoid_upper_lims + ) + .all() + .item() + ) From 5f4705b5fab2e3588bbcb69baf389930026d0a79 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Thu, 16 Jan 2025 18:53:36 +0100 Subject: [PATCH 185/190] Implement optional gradient checkpointing --- neural_lam/models/ar_model.py | 19 +++++++++++++++++-- neural_lam/train_model.py | 6 ++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/neural_lam/models/ar_model.py b/neural_lam/models/ar_model.py index 0ce523c2..2bce702b 100644 --- a/neural_lam/models/ar_model.py +++ b/neural_lam/models/ar_model.py @@ -189,6 +189,17 @@ def __init__( # For storing spatial loss maps during evaluation self.spatial_loss_maps = [] + # Set if grad checkpointing function should be used during rollout + if args.grad_checkpointing: + # Perform gradient checkpointing at each unrolling step + self.unroll_ckpt_func = ( + lambda f, *args: torch.utils.checkpoint.checkpoint( + f, *args, use_reentrant=False + ) + ) + else: + self.unroll_ckpt_func = lambda f, *args: f(*args) + def _create_dataarray_from_tensor( self, tensor: torch.Tensor, @@ -282,8 +293,12 @@ def unroll_prediction(self, init_states, forcing, boundary_forcing): else: boundary_forcing_step = None - pred_state, pred_std = self.predict_step( - prev_state, prev_prev_state, forcing_step, boundary_forcing_step + pred_state, pred_std = self.unroll_ckpt_func( + self.predict_step, + prev_state, + prev_prev_state, + forcing_step, + boundary_forcing_step, ) # state: (B, num_interior_nodes, d_f) # pred_std: (B, num_interior_nodes, d_f) or None diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index 356e4300..8e6424fd 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -160,6 +160,12 @@ def main(input_args=None): help="Number of epochs training between each validation run " "(default: 1)", ) + parser.add_argument( + "--grad_checkpointing", + action="store_true", + help="If gradient checkpointing should be used in-between each " + "unrolling step (default: false)", + ) # Evaluation options parser.add_argument( From 3816170bfddaebee8ee2989ceba600a23880a12c Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Thu, 16 Jan 2025 18:58:19 +0100 Subject: [PATCH 186/190] Add option to control number of steps of sanity checking --- neural_lam/train_model.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index 8e6424fd..ad419ea7 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -81,6 +81,13 @@ def main(input_args=None): default=32, help="Numerical precision to use for model (32/16/bf16) (default: 32)", ) + parser.add_argument( + "--num_sanity_steps", + type=int, + default=2, + help="Number of sanity checking validation steps to run before starting" + " training (default: 2)", + ) # Model architecture parser.add_argument( @@ -331,6 +338,7 @@ def main(input_args=None): callbacks=[checkpoint_callback], check_val_every_n_epoch=args.val_interval, precision=args.precision, + num_sanity_val_steps=args.num_sanity_steps, ) # Only init once, on rank 0 only From 939ddd16e186722751442dfce0acd4340be1c58e Mon Sep 17 00:00:00 2001 From: sadamov Date: Fri, 17 Jan 2025 14:33:22 +0100 Subject: [PATCH 187/190] Fixing test_time_slicing - don't expect interior forcing time deltas --- tests/test_time_slicing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_time_slicing.py b/tests/test_time_slicing.py index daec72f2..d1992669 100644 --- a/tests/test_time_slicing.py +++ b/tests/test_time_slicing.py @@ -223,7 +223,7 @@ def test_time_slicing_analysis( assert forcing.shape == ( ar_steps, 1, - total_forcing_window * 2, # Each windowed feature includes time deltas + total_forcing_window, # No time deltas for interior forcing ) # Extract the forcing values from the tensor (excluding time deltas) @@ -315,8 +315,8 @@ def test_time_slicing_forecast( expected_forcing_shape = ( ar_steps, # Number of AR steps 1, # Number of grid points - total_forcing_window # Total number of forcing steps in the window - * 2, # Each windowed feature includes time deltas + total_forcing_window, # Total number of forcing steps in the window + # no time deltas for interior forcing ) assert forcing.shape == expected_forcing_shape From 9ba275c3011b70096a1de0b413b8ec061c7e7718 Mon Sep 17 00:00:00 2001 From: sadamov Date: Fri, 17 Jan 2025 20:43:08 +0100 Subject: [PATCH 188/190] introducing subsampling of interior and boundary time step --- neural_lam/weather_dataset.py | 227 ++++++++++++++++++++++++++-------- tests/dummy_datastore.py | 2 +- tests/test_time_slicing.py | 210 +++++++++++++++++++++++++++++++ 3 files changed, 388 insertions(+), 51 deletions(-) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index a6b52b88..743f5a5e 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -53,10 +53,26 @@ class WeatherDataset(torch.utils.data.Dataset): boundary from times t, t+1, ..., t+j-1, t+j (and potentially times before t, given num_past_forcing_steps) are included as boundary inputs at time t. Default is 1. + interior_subsample_step : int, optional + The stride/step size used when sampling interior domain data points. A + value of N means only every Nth point will be sampled in the temporal + dimension. For example, if step_length=3 hours and + interior_subsample_step=2, data will be sampled every 6 hours. Default + is 1 (use every timestep). + boundary_subsample_step : int, optional + The stride/step size used when sampling boundary condition data points. + A value of N means only every Nth point will be sampled in the temporal + dimension. For example, if step_length=3 hours and + boundary_subsample_step=2, boundary conditions will be sampled every 6 + hours. Default is 1 (use every timestep). standardize : bool, optional Whether to standardize the data. Default is True. """ + # The current implementation requires at least 2 time steps for the + # initial state (see GraphCast). + INIT_STEPS = 2 # Number of initial state steps needed + def __init__( self, datastore: BaseDatastore, @@ -67,6 +83,8 @@ def __init__( num_future_forcing_steps=1, num_past_boundary_steps=1, num_future_boundary_steps=1, + interior_subsample_step=1, + boundary_subsample_step=1, standardize=True, ): super().__init__() @@ -79,6 +97,37 @@ def __init__( self.num_future_forcing_steps = num_future_forcing_steps self.num_past_boundary_steps = num_past_boundary_steps self.num_future_boundary_steps = num_future_boundary_steps + self.interior_subsample_step = interior_subsample_step + self.boundary_subsample_step = boundary_subsample_step + # Scale forcing steps based on subsampling + self.effective_past_forcing_steps = ( + num_past_forcing_steps * interior_subsample_step + ) + self.effective_future_forcing_steps = ( + num_future_forcing_steps * interior_subsample_step + ) + self.effective_past_boundary_steps = ( + num_past_boundary_steps * boundary_subsample_step + ) + self.effective_future_boundary_steps = ( + num_future_boundary_steps * boundary_subsample_step + ) + + # Validate subsample steps + if ( + not isinstance(interior_subsample_step, int) + or interior_subsample_step < 1 + ): + raise ValueError( + "interior_subsample_step must be a positive integer" + ) + if ( + not isinstance(boundary_subsample_step, int) + or boundary_subsample_step < 1 + ): + raise ValueError( + "boundary_subsample_step must be a positive integer" + ) self.da_state = self.datastore.get_dataarray( category="state", split=self.split @@ -144,8 +193,6 @@ def __init__( ) i_ensemble = 0 self.da_state = self.da_state.isel(ensemble_member=i_ensemble) - else: - self.da_state = self.da_state # Check time step consistency in state data and determine time steps # for state, forcing and boundary forcing data @@ -168,6 +215,16 @@ def __init__( else: forcing_times = self.da_forcing.time self.time_step_forcing = get_time_step(forcing_times.values) + # inform user about the original and the subsampled time step + if self.interior_subsample_step != 1: + print( + f"Subsampling interior data with step size " + f"{self.interior_subsample_step} from original time step " + f"{self.time_step_state}" + ) + else: + print(f"Using original time step {self.time_step_state} for data") + # BOUNDARY FORCING if self.da_boundary_forcing is not None: if self.datastore_boundary.is_forecast: @@ -179,13 +236,24 @@ def __init__( boundary_times = self.da_boundary_forcing.time self.time_step_boundary = get_time_step(boundary_times.values) + if self.boundary_subsample_step != 1: + print( + f"Subsampling boundary data with step size " + f"{self.boundary_subsample_step} from original time step " + f"{self.time_step_boundary}" + ) + else: + print( + f"Using original time step {self.time_step_boundary} for " + "boundary data" + ) + # Forcing data is part of the same datastore as state data. During # creation, the time dimension of the forcing data is matched to the # state data. # Boundary data is part of a separate datastore The boundary data is # allowed to have a different time_step Checks that the boundary data # covers the required time range is required. - # Crop interior data if boundary coverage is insufficient if self.da_boundary_forcing is not None: self.da_state = crop_time_if_needed( @@ -255,32 +323,47 @@ def __len__(self): # This means that for each analysis time we get a single sample # check that there are enough forecast steps available to create # samples given the number of autoregressive steps requested - n_forecast_steps = self.da_state.elapsed_forecast_duration.size - if n_forecast_steps < 2 + self.ar_steps: + required_steps = self.INIT_STEPS + self.ar_steps + required_span = (required_steps - 1) * self.interior_subsample_step + + # Calculate available forecast steps + n_forecast_steps = len(self.da_state.elapsed_forecast_duration) + + if n_forecast_steps < required_span: raise ValueError( - "The number of forecast steps available " - f"({n_forecast_steps}) is less than the required " - f"2+ar_steps (2+{self.ar_steps}={2 + self.ar_steps}) for " - "creating a sample with initial and target states." + f"Not enough forecast steps ({n_forecast_steps}) for " + f"required span of {required_span} steps with " + f"subsample_step={self.interior_subsample_step}" ) return self.da_state.analysis_time.size else: - # Calculate the number of samples in the dataset n_samples = total - # time steps - (autoregressive steps + past forcing + future - # forcing) - #: + # Calculate the number of samples in the dataset as: + # total_samples = total_timesteps - required_time_span - + # required_past_steps - effective_future_forcing_steps # Where: - # - total time steps: len(self.da_state.time) - # - autoregressive steps: self.ar_steps - # - past forcing: max(2, self.num_past_forcing_steps) (at least 2 - # time steps are required for the initial state) - # - future forcing: self.num_future_forcing_steps + # - total_timesteps: total number of timesteps in the state data + # - required_time_span: number of continuous timesteps needed for + # initial state + autoregressive steps, accounting for subsampling + # - required_past_steps: additional past timesteps needed for + # forcing data beyond initial state + # - effective_future_forcing_steps: number of future timesteps + # needed for forcing data with subsampling + required_continuous_steps = self.INIT_STEPS + self.ar_steps + required_time_span = ( + required_continuous_steps * self.interior_subsample_step + ) + required_past_steps = max( + 0, + self.effective_past_forcing_steps + - self.INIT_STEPS * self.interior_subsample_step, + ) + return ( len(self.da_state.time) - - self.ar_steps - - max(2, self.num_past_forcing_steps) - - self.num_future_forcing_steps + - required_time_span + - required_past_steps + - self.effective_future_forcing_steps ) def _slice_time( @@ -333,24 +416,39 @@ def _slice_time( 'forcing/boundary_feature_windowed'). If no forcing/boundary data is provided, this will be `None`. """ - # The current implementation requires at least 2 time steps for the - # initial state (see GraphCast). - init_steps = 2 + init_steps = self.INIT_STEPS + subsample_step = ( + self.boundary_subsample_step + if is_boundary + else self.interior_subsample_step + ) # slice the dataarray to include the required number of time steps if self.datastore.is_forecast: - start_idx = max(0, self.num_past_forcing_steps - init_steps) - end_idx = max(init_steps, self.num_past_forcing_steps) + n_steps # this implies that the data will have both `analysis_time` and # `elapsed_forecast_duration` dimensions for forecasts. We for now # simply select a analysis time and the first `n_steps` forecast # times (given no offset). Note that this means that we get one # sample per forecast, always starting at forecast time 2. + + # Calculate base offset and indices with subsampling + offset = ( + max(0, num_past_steps - init_steps) if num_past_steps else 0 + ) + + # Calculate initial and target indices + init_indices = [ + offset + i * subsample_step for i in range(init_steps) + ] + target_indices = [ + offset + (init_steps + i) * subsample_step + for i in range(n_steps) + ] + all_indices = init_indices + target_indices + da_state_sliced = da_state.isel( analysis_time=idx, - elapsed_forecast_duration=slice(start_idx, end_idx), + elapsed_forecast_duration=all_indices, ) - # create a new time dimension so that the produced sample has a - # `time` dimension, similarly to the analysis only data da_state_sliced["time"] = ( da_state_sliced.analysis_time + da_state_sliced.elapsed_forecast_duration @@ -360,12 +458,15 @@ def _slice_time( ) else: - # For analysis data we slice the time dimension directly. The offset - # is only relevant for the very first (and last) samples in the - # dataset. - start_idx = idx + max(0, num_past_steps - init_steps) - end_idx = idx + max(init_steps, num_past_steps) + n_steps - da_state_sliced = da_state.isel(time=slice(start_idx, end_idx)) + # Analysis data slicing, already correctly modified + start_idx = idx + ( + max(0, num_past_steps - init_steps) if num_past_steps else 0 + ) + all_indices = [ + start_idx + i * subsample_step + for i in range(init_steps + n_steps) + ] + da_state_sliced = da_state.isel(time=all_indices) if da_forcing is None: return da_state_sliced, None @@ -385,20 +486,33 @@ def _slice_time( forcing_analysis_time_idx = da_forcing.analysis_time.get_index( "analysis_time" ).get_indexer([state_time], method="pad")[0] + + # Adjust window indices for subsampled steps for step_idx in range(init_steps, len(state_times)): - start_idx = offset + step_idx - num_past_steps - end_idx = offset + step_idx + num_future_steps + 1 + window_start = ( + offset + + step_idx * subsample_step + - num_past_steps * subsample_step + ) + window_end = ( + offset + + step_idx * subsample_step + + (num_future_steps + 1) * subsample_step + ) current_time = ( forcing_analysis_time_idx - + da_forcing.elapsed_forecast_duration[step_idx] + + da_forcing.elapsed_forecast_duration[ + step_idx * subsample_step + ] ) da_sliced = da_forcing.isel( analysis_time=forcing_analysis_time_idx, - elapsed_forecast_duration=slice(start_idx, end_idx), + elapsed_forecast_duration=slice( + window_start, window_end, subsample_step + ), ) - da_sliced = da_sliced.rename( {"elapsed_forecast_duration": "window"} ) @@ -410,9 +524,11 @@ def _slice_time( # Calculate window time deltas for forecast data window_time_deltas = ( da_forcing.elapsed_forecast_duration[ - start_idx:end_idx + window_start:window_end:subsample_step + ].values + - da_forcing.elapsed_forecast_duration[ + step_idx * subsample_step ].values - - da_forcing.elapsed_forecast_duration[step_idx].values ) # Assign window time delta coordinate da_sliced["window_time_deltas"] = ("window", window_time_deltas) @@ -433,21 +549,32 @@ def _slice_time( "time" ).get_indexer([state_time], method="pad")[0] - # Use isel to select the window + window_start = ( + forcing_time_idx - num_past_steps * subsample_step + ) + window_end = ( + forcing_time_idx + (num_future_steps + 1) * subsample_step + ) + da_window = da_forcing.isel( - time=slice( - forcing_time_idx - num_past_steps, - forcing_time_idx + num_future_steps + 1, - ), + time=slice(window_start, window_end, subsample_step) ) - window_time_deltas = (da_window.time - state_time).values + + # Rename the time dimension to window for consistency da_window = da_window.rename({"time": "window"}) - # Assign 'window' coordinate + # Assign the 'window' coordinate to be relative positions da_window = da_window.assign_coords( window=np.arange(-num_past_steps, num_future_steps + 1) ) - # Assign window time delta coordinate + + # Calculate window time deltas for analysis data + window_time_deltas = ( + da_forcing.time[ + window_start:window_end:subsample_step + ].values + - da_forcing.time[forcing_time_idx].values + ) da_window["window_time_deltas"] = ("window", window_time_deltas) da_window = da_window.expand_dims(dim={"time": [state_time]}) diff --git a/tests/dummy_datastore.py b/tests/dummy_datastore.py index ede2aa13..6b050e6b 100644 --- a/tests/dummy_datastore.py +++ b/tests/dummy_datastore.py @@ -37,7 +37,7 @@ class DummyDatastore(BaseRegularGridDatastore): bbox_size_km = [500, 500] # km def __init__( - self, config_path=None, n_grid_points=10000, n_timesteps=10 + self, config_path=None, n_grid_points=10000, n_timesteps=15 ) -> None: """ Create a dummy datastore with random data. diff --git a/tests/test_time_slicing.py b/tests/test_time_slicing.py index d1992669..9345c04b 100644 --- a/tests/test_time_slicing.py +++ b/tests/test_time_slicing.py @@ -4,8 +4,14 @@ import xarray as xr # First-party +from neural_lam.datastore import DATASTORES from neural_lam.datastore.base import BaseDatastore from neural_lam.weather_dataset import WeatherDataset +from tests.conftest import ( + DATASTORES_BOUNDARY_EXAMPLES, + init_datastore_boundary_example, + init_datastore_example, +) class SinglePointDummyDatastore(BaseDatastore): @@ -103,6 +109,12 @@ def get_vars_long_names(self, category): raise NotImplementedError() +class BoundaryDummyDatastore(SinglePointDummyDatastore): + """Dummy datastore with 6h timesteps for testing boundary conditions""" + + step_length = 6 # 6 hour timesteps + + INIT_STEPS = 2 STATE_VALUES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] @@ -328,3 +340,201 @@ def test_time_slicing_forecast( np.testing.assert_array_equal( forcing_values[i], expected_forcing_values[i] ) + + +@pytest.mark.parametrize("datastore_name", DATASTORES.keys()) +@pytest.mark.parametrize( + "datastore_boundary_name", DATASTORES_BOUNDARY_EXAMPLES.keys() +) +@pytest.mark.parametrize( + "subsample_config", + [ + # (interior_subsample, boundary_subsample, ar_steps) + (1, 1, 1), # Base case - no subsampling + (2, 1, 1), # Interior subsampling only + (1, 2, 1), # Boundary subsampling only + (2, 2, 1), # Equal subsampling + (2, 2, 2), # More AR steps + ], +) +def test_dataset_subsampling( + datastore_name, datastore_boundary_name, subsample_config +): + """Test that WeatherDataset handles different subsample steps correctly for + interior and boundary data. + + The test checks: + 1. Dataset creation succeeds with different subsample configurations + 2. Time differences between consecutive states match subsample steps + 3. Shapes of returned tensors are correct + 4. We can access the last item without errors + """ + interior_subsample, boundary_subsample, ar_steps = subsample_config + + datastore = init_datastore_example(datastore_name) + datastore_boundary = init_datastore_boundary_example( + datastore_boundary_name + ) + + # Configure dataset with subsampling + dataset = WeatherDataset( + datastore=datastore, + datastore_boundary=datastore_boundary, + split="train", + ar_steps=ar_steps, + num_past_forcing_steps=1, + num_future_forcing_steps=1, + num_past_boundary_steps=1, + num_future_boundary_steps=1, + interior_subsample_step=interior_subsample, + boundary_subsample_step=boundary_subsample, + ) + + # Get first sample + init_states, target_states, forcing, boundary, target_times = dataset[0] + + # Check shapes + assert init_states.shape[0] == 2 # Always 2 initial states + assert target_states.shape[0] == ar_steps + + # Check time differences + times = target_times.numpy() + for i in range(1, len(times)): + time_delta = np.timedelta64(times[i] - times[i - 1], "ns") + expected_hours = interior_subsample * datastore.step_length + np.testing.assert_equal( + time_delta.astype("timedelta64[h]").astype(int), expected_hours + ) + + # Verify boundary data timesteps if present + if boundary is not None: + assert boundary.shape[0] == ar_steps + # Each boundary window should have: + # (num_past + num_future + 1) timesteps * features * 2 (for time deltas) + expected_boundary_features = ( + datastore_boundary.get_num_data_vars("forcing") + 1 + ) * ( + 1 + 1 + 1 + ) # past + future + current + assert boundary.shape[2] == expected_boundary_features + + # Verify we can access the last item + dataset[len(dataset) - 1] + + +@pytest.mark.parametrize( + "num_past_steps,num_future_steps,interior_step,boundary_step", + [ + (1, 1, 1, 1), # Base case, no subsampling + (2, 1, 1, 1), # More past steps, no subsampling + (1, 2, 1, 1), # More future steps, no subsampling + (2, 2, 1, 1), # Equal past/future, no subsampling + (1, 1, 1, 2), # Basic case with boundary subsampling + (2, 2, 1, 2), # Equal past/future with boundary subsampling + (1, 1, 2, 1), # Basic case with interior subsampling + (2, 2, 2, 1), # Equal past/future with interior subsampling + (1, 1, 2, 2), # Both subsamplings + ], +) +def test_time_deltas_in_boundary_data( + num_past_steps, num_future_steps, interior_step, boundary_step +): + """Test that time deltas are correctly calculated for boundary data. + + This test verifies: + 1. Time deltas are included in boundary data + 2. Time deltas are in units of state timesteps + 3. Time deltas are correctly calculated relative to current timestep + 4. Time steps scale correctly with subsampling + """ + # Create dummy data with known timesteps (3 hour intervals for interior) + time_values_interior = np.datetime64("2020-01-01") + np.arange( + 20 + ) * np.timedelta64(3, "h") + # 6 hour intervals for boundary + time_values_boundary = np.datetime64("2020-01-01") + np.arange( + 10 + ) * np.timedelta64(6, "h") + + time_step_ratio = ( + 6 / 3 + ) # Boundary step is 6 hours, interior step is 3 hours + + state_data = np.arange(20) + forcing_data = np.arange(20, 40) + boundary_data = np.arange(10) # Fewer points due to larger time step + + interior_datastore = SinglePointDummyDatastore( + state_data=state_data, + forcing_data=forcing_data, + time_values=time_values_interior, + is_forecast=False, + ) + + boundary_datastore = BoundaryDummyDatastore( + state_data=boundary_data, + forcing_data=boundary_data + 10, + time_values=time_values_boundary, + is_forecast=False, + ) + + dataset = WeatherDataset( + datastore=interior_datastore, + datastore_boundary=boundary_datastore, + split="train", + ar_steps=2, + num_past_boundary_steps=num_past_steps, + num_future_boundary_steps=num_future_steps, + interior_subsample_step=interior_step, + boundary_subsample_step=boundary_step, + standardize=False, + ) + + # Get first sample + _, _, _, boundary, target_times = dataset[0] + + # Extract time deltas from boundary data + # Time deltas are the last features in the boundary tensor + window_size = num_past_steps + num_future_steps + 1 + time_deltas = boundary[0, 0, -window_size:].numpy() + + # Expected time deltas in state timesteps, adjusted for boundary subsampling + # For each window position, calculate expected offset from current time + expected_deltas = ( + np.arange(-num_past_steps, num_future_steps + 1) + * boundary_step + * time_step_ratio + ) + + # Verify time deltas match expected values + np.testing.assert_array_equal(time_deltas, expected_deltas) + + # Calculate expected hours offset from current time + # Each state timestep is 3 hours, scale by boundary step + expected_hours = expected_deltas * boundary_datastore.step_length + time_delta_hours = time_deltas * boundary_datastore.step_length + + # Verify time delta hours match expected values + np.testing.assert_array_equal(time_delta_hours, expected_hours) + + # Verify relative hour differences between timesteps + expected_hour_diff = ( + boundary_step * boundary_datastore.step_length * time_step_ratio + ) + hour_diffs = np.diff(time_delta_hours) + np.testing.assert_array_equal( + hour_diffs, [expected_hour_diff] * (len(time_delta_hours) - 1) + ) + + # Extract boundary times and verify they match expected hours + for i in range(len(target_times)): + window_start_idx = i * (window_size * 2) + window_end_idx = window_start_idx + window_size + boundary_times = boundary[i, 0, window_start_idx:window_end_idx].numpy() + boundary_time_diffs = ( + np.diff(boundary_times) * boundary_datastore.step_length + ) + expected_diff = boundary_step * boundary_datastore.step_length + np.testing.assert_array_equal( + boundary_time_diffs, [expected_diff] * (len(boundary_times) - 1) + ) From 1d443d623a6c268b256bced5d1b29b542a1b0c3d Mon Sep 17 00:00:00 2001 From: sadamov Date: Fri, 17 Jan 2025 20:55:04 +0100 Subject: [PATCH 189/190] added train_model flags and Module args --- neural_lam/train_model.py | 14 ++++++++++++++ neural_lam/weather_dataset.py | 10 ++++++++++ 2 files changed, 24 insertions(+) diff --git a/neural_lam/train_model.py b/neural_lam/train_model.py index ad419ea7..2313fda4 100644 --- a/neural_lam/train_model.py +++ b/neural_lam/train_model.py @@ -247,6 +247,18 @@ def main(input_args=None): default=1, help="Number of future time steps to use as input for boundary data", ) + parser.add_argument( + "--interior_subsample_step", + type=int, + default=1, + help="Subsample step for interior grid nodes", + ) + parser.add_argument( + "--boundary_subsample_step", + type=int, + default=1, + help="Subsample step for boundary grid nodes", + ) args = parser.parse_args(input_args) args.var_leads_metrics_watch = { int(k): v for k, v in json.loads(args.var_leads_metrics_watch).items() @@ -285,6 +297,8 @@ def main(input_args=None): num_future_forcing_steps=args.num_future_forcing_steps, num_past_boundary_steps=args.num_past_boundary_steps, num_future_boundary_steps=args.num_future_boundary_steps, + interior_subsample_step=args.interior_subsample_step, + boundary_subsample_step=args.boundary_subsample_step, batch_size=args.batch_size, num_workers=args.num_workers, ) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index 743f5a5e..9bd2067a 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -955,6 +955,8 @@ def __init__( num_future_forcing_steps=1, num_past_boundary_steps=1, num_future_boundary_steps=1, + interior_subsample_step=1, + boundary_subsample_step=1, batch_size=4, num_workers=16, ): @@ -965,6 +967,8 @@ def __init__( self.num_future_forcing_steps = num_future_forcing_steps self.num_past_boundary_steps = num_past_boundary_steps self.num_future_boundary_steps = num_future_boundary_steps + self.interior_subsample_step = interior_subsample_step + self.boundary_subsample_step = boundary_subsample_step self.ar_steps_train = ar_steps_train self.ar_steps_eval = ar_steps_eval self.standardize = standardize @@ -994,6 +998,8 @@ def setup(self, stage=None): num_future_forcing_steps=self.num_future_forcing_steps, num_past_boundary_steps=self.num_past_boundary_steps, num_future_boundary_steps=self.num_future_boundary_steps, + interior_subsample_step=self.interior_subsample_step, + boundary_subsample_step=self.boundary_subsample_step, ) self.val_dataset = WeatherDataset( datastore=self._datastore, @@ -1005,6 +1011,8 @@ def setup(self, stage=None): num_future_forcing_steps=self.num_future_forcing_steps, num_past_boundary_steps=self.num_past_boundary_steps, num_future_boundary_steps=self.num_future_boundary_steps, + interior_subsample_step=self.interior_subsample_step, + boundary_subsample_step=self.boundary_subsample_step, ) if stage == "test" or stage is None: @@ -1018,6 +1026,8 @@ def setup(self, stage=None): num_future_forcing_steps=self.num_future_forcing_steps, num_past_boundary_steps=self.num_past_boundary_steps, num_future_boundary_steps=self.num_future_boundary_steps, + interior_subsample_step=self.interior_subsample_step, + boundary_subsample_step=self.boundary_subsample_step, ) def train_dataloader(self): From aae85530a1cc4bb95ccccd7eaae5f69c9fbf6711 Mon Sep 17 00:00:00 2001 From: joeloskarsson Date: Tue, 21 Jan 2025 17:07:45 +0100 Subject: [PATCH 190/190] Small fix for when adjusting level_refinement_factor in graph creation --- neural_lam/build_rectangular_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_lam/build_rectangular_graph.py b/neural_lam/build_rectangular_graph.py index fea578c5..b22eaae8 100644 --- a/neural_lam/build_rectangular_graph.py +++ b/neural_lam/build_rectangular_graph.py @@ -61,7 +61,7 @@ def main(input_args=None): ) parser.add_argument( "--level_refinement_factor", - type=float, + type=int, default=3, help="Refinement factor between grid points and bottom level of " "mesh hierarchy",