From 81af5316189f5857bf4370e7947b32662848b21f Mon Sep 17 00:00:00 2001 From: Dylan <52908667+smellycloud@users.noreply.github.com> Date: Mon, 28 Oct 2024 18:52:23 +0100 Subject: [PATCH] stuff --- common_querysets/queryset_meow_meow.py | 41 +++ common_utils/ensemble_path.py | 118 +++++++- meta_tools/ensemble_scaffold_builder.py | 226 ++++++++++++++ meta_tools/model_scaffold_builder.py | 2 +- .../template_config_deployment.py | 0 .../template_config_hyperparameters.py | 29 ++ .../ensemble/template_config_meta.py | 39 +++ .../templates/ensemble/template_main.py | 64 ++++ .../model/template_config_deployment.py | 75 +++++ .../template_config_hyperparameters.py | 0 .../{ => model}/template_config_input_data.py | 0 .../{ => model}/template_config_meta.py | 0 .../{ => model}/template_config_sweep.py | 0 .../templates/{ => model}/template_main.py | 0 .../tests/test_ensemble_scaffold_builder.py | 282 ++++++++++++++++++ .../tests/test_model_scaffold_builder.py | 2 - 16 files changed, 868 insertions(+), 10 deletions(-) create mode 100644 common_querysets/queryset_meow_meow.py create mode 100644 meta_tools/ensemble_scaffold_builder.py rename meta_tools/templates/{ => ensemble}/template_config_deployment.py (100%) create mode 100644 meta_tools/templates/ensemble/template_config_hyperparameters.py create mode 100644 meta_tools/templates/ensemble/template_config_meta.py create mode 100644 meta_tools/templates/ensemble/template_main.py create mode 100644 meta_tools/templates/model/template_config_deployment.py rename meta_tools/templates/{ => model}/template_config_hyperparameters.py (100%) rename meta_tools/templates/{ => model}/template_config_input_data.py (100%) rename meta_tools/templates/{ => model}/template_config_meta.py (100%) rename meta_tools/templates/{ => model}/template_config_sweep.py (100%) rename meta_tools/templates/{ => model}/template_main.py (100%) create mode 100644 meta_tools/tests/test_ensemble_scaffold_builder.py diff --git a/common_querysets/queryset_meow_meow.py b/common_querysets/queryset_meow_meow.py new file mode 100644 index 00000000..02b00ecd --- /dev/null +++ b/common_querysets/queryset_meow_meow.py @@ -0,0 +1,41 @@ +from viewser import Queryset, Column + +def generate(): + """ + Contains the configuration for the input data in the form of a viewser queryset. That is the data from viewser that is used to train the model. + This configuration is "behavioral" so modifying it will affect the model's runtime behavior and integration into the deployment system. + There is no guarantee that the model will work if the input data configuration is changed here without changing the model settings and algorithm accordingly. + + Returns: + - queryset_base (Queryset): A queryset containing the base data for the model training. + """ + + # VIEWSER 6, Example configuration. Modify as needed. + + queryset_base = (Queryset("meow_meow", "priogrid_month") + # Create a new column 'ln_sb_best' using data from 'priogrid_month' and 'ged_sb_best_count_nokgi' column + # Apply logarithmic transformation, handle missing values by replacing them with NA + .with_column(Column("ln_sb_best", from_loa="priogrid_month", from_column="ged_sb_best_count_nokgi") + .transform.ops.ln().transform.missing.replace_na()) + + # Create a new column 'ln_ns_best' using data from 'priogrid_month' and 'ged_ns_best_count_nokgi' column + # Apply logarithmic transformation, handle missing values by replacing them with NA + .with_column(Column("ln_ns_best", from_loa="priogrid_month", from_column="ged_ns_best_count_nokgi") + .transform.ops.ln().transform.missing.replace_na()) + + # Create a new column 'ln_os_best' using data from 'priogrid_month' and 'ged_os_best_count_nokgi' column + # Apply logarithmic transformation, handle missing values by replacing them with NA + .with_column(Column("ln_os_best", from_loa="priogrid_month", from_column="ged_os_best_count_nokgi") + .transform.ops.ln().transform.missing.replace_na()) + + # Create columns for month and year_id + .with_column(Column("month", from_loa="month", from_column="month")) + .with_column(Column("year_id", from_loa="country_year", from_column="year_id")) + + # Create columns for country_id, col, and row + .with_column(Column("c_id", from_loa="country_year", from_column="country_id")) + .with_column(Column("col", from_loa="priogrid", from_column="col")) + .with_column(Column("row", from_loa="priogrid", from_column="row")) + ) + + return queryset_base diff --git a/common_utils/ensemble_path.py b/common_utils/ensemble_path.py index 9f590b57..1d37954e 100644 --- a/common_utils/ensemble_path.py +++ b/common_utils/ensemble_path.py @@ -2,7 +2,7 @@ import logging from pathlib import Path from typing import Union - +import sys logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" ) @@ -36,11 +36,115 @@ def __init__( """ super().__init__(ensemble_name_or_path, validate) # Additional ensemble-specific initialization... + print(self._validate) + + def _initialize_directories(self) -> None: + """ + Initializes the necessary directories for the ensemble. + + Creates and sets up various directories required for the ensemble, such as architectures, artifacts, configs, data, etc. + """ + # Call the parent class's _initialize_directories method + super()._initialize_directories() + + # List of directories to keep + keep_dirs = { + "artifacts", + "configs", + "data", + "data/generated", + "data/processed", + "notebooks", + "reports", + "reports/figures", + "reports/papers", + "reports/plots", + "reports/slides", + "reports/timelapse", + "src", + "src/dataloaders", + "src/forecasting", + "src/management", + "src/offline_evaluation", + "src/training", + "src/utils", + "src/visualization", + } + + # Remove directories that are not in the keep_dirs list + for attr, value in list(self.__dict__.items()): + if Path(value).relative_to(self.model_dir) not in keep_dirs: + delattr(self, attr) + + # Initialize directories as per the new structure + self.model_dir = self._get_model_dir() + self.artifacts = self._build_absolute_directory(Path("artifacts")) + self.configs = self._build_absolute_directory(Path("configs")) + self.data = self._build_absolute_directory(Path("data")) + self.data_generated = self._build_absolute_directory(Path("data/generated")) + self.data_processed = self._build_absolute_directory(Path("data/processed")) + self.notebooks = self._build_absolute_directory(Path("notebooks")) + self.reports = self._build_absolute_directory(Path("reports")) + self.reports_figures = self._build_absolute_directory(Path("reports/figures")) + self.reports_papers = self._build_absolute_directory(Path("reports/papers")) + self.reports_plots = self._build_absolute_directory(Path("reports/plots")) + self.reports_slides = self._build_absolute_directory(Path("reports/slides")) + self.reports_timelapse = self._build_absolute_directory( + Path("reports/timelapse") + ) + self.src = self._build_absolute_directory(Path("src")) + self.dataloaders = self._build_absolute_directory(Path("src/dataloaders")) + self.forecasting = self._build_absolute_directory(Path("src/forecasting")) + self.management = self._build_absolute_directory(Path("src/management")) + self.offline_evaluation = self._build_absolute_directory( + Path("src/offline_evaluation") + ) + self.training = self._build_absolute_directory(Path("src/training")) + self.utils = self._build_absolute_directory(Path("src/utils")) + self.visualization = self._build_absolute_directory(Path("src/visualization")) + self._templates = self.meta_tools / "templates" + self._sys_paths = None + # if self.common_querysets not in sys.path: + # sys.path.insert(0, str(self.common_querysets)) + # self.queryset_path = self.common_querysets / f"queryset_{self.model_name}.py" + # self._queryset = None + + def _initialize_scripts(self) -> None: + """ + Initializes the necessary scripts for the ensemble. + + Creates and sets up various scripts required for the ensemble, such as configuration scripts, main script, and other utility scripts. + """ + self.scripts = [ + self._build_absolute_directory(Path("configs/config_deployment.py")), + self._build_absolute_directory(Path("configs/config_hyperparameters.py")), + self._build_absolute_directory(Path("configs/config_meta.py")), + self._build_absolute_directory(Path("main.py")), + self._build_absolute_directory(Path("README.md")), + self._build_absolute_directory(Path("requirements.txt")), + self._build_absolute_directory(Path("artifacts/model_metadata_dict.py")), + self._build_absolute_directory(Path("src/dataloaders/get_data.py")), + self._build_absolute_directory( + Path("src/forecasting/generate_forecast.py") + ), + self._build_absolute_directory( + Path("src/management/execute_model_runs.py") + ), + self._build_absolute_directory( + Path("src/management/execute_model_tasks.py") + ), + self._build_absolute_directory( + Path("src/offline_evaluation/evaluate_ensemble.py") + ), + self._build_absolute_directory(Path("src/training/train_ensemble.py")), + self._build_absolute_directory(Path("src/utils/utils_outputs.py")), + self._build_absolute_directory(Path("src/utils/utils_run.py")), + self._build_absolute_directory(Path("src/visualization/visual.py")), + # self.common_querysets / f"queryset_{self.model_name}.py", + ] -# if __name__ == "__main__": -# ensemble_path = EnsemblePath("white_mustang", validate=True) -# ensemble_path.view_directories() -# ensemble_path.view_scripts() -# print(ensemble_path.get_queryset()) -# del ensemble_path +if __name__ == "__main__": + ensemble_path = EnsemblePath("white_mustang", validate=True) + print(ensemble_path.get_directories()) + del ensemble_path diff --git a/meta_tools/ensemble_scaffold_builder.py b/meta_tools/ensemble_scaffold_builder.py new file mode 100644 index 00000000..7a094723 --- /dev/null +++ b/meta_tools/ensemble_scaffold_builder.py @@ -0,0 +1,226 @@ +from pathlib import Path +from utils.utils_model_naming import validate_model_name +import datetime +import logging +import sys + +PATH = Path(__file__) +if 'views_pipeline' in PATH.parts: + PATH_ROOT = Path(*PATH.parts[:PATH.parts.index('views_pipeline') + 1]) + PATH_COMMON_UTILS = PATH_ROOT / 'common_utils' + if not PATH_COMMON_UTILS.exists(): + raise ValueError("The 'common_utils' directory was not found in the provided path.") + sys.path.insert(0, str(PATH_COMMON_UTILS)) + sys.path.insert(0, str(PATH_ROOT)) +else: + raise ValueError("The 'views_pipeline' directory was not found in the provided path.") + +# print(str(Path(__file__).parent.parent)) +from common_utils import model_path, ensemble_path + +from templates.ensemble import ( + template_config_deployment, + template_config_hyperparameters, + template_config_meta, + template_main, +) + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class EnsembleScaffoldBuilder: + """ + A class to create and manage the directory structure and scripts for a machine learning ensemble. + + Attributes: + model_name (str): The name of the ensemble for which the directory structure is to be created. + _model (ModelPath): An instance of the ModelPath class to manage model paths. + _subdirs (list of str): A list of subdirectories to be created within the ensemble directory. + _scripts (list of str): A list of script paths to be created within the ensemble directory. + + Methods: + __init__(model_name: str) -> None: + Initializes the EnsembleScaffoldBuilder with the given model name and sets up paths. + + build_model_directory() -> Path: + Creates the ensemble directory and its subdirectories, and initializes necessary files such as README.md + and requirements.txt. + + Returns: + Path: The path to the created ensemble directory. + + Raises: + FileExistsError: If the ensemble directory already exists. + + build_model_scripts() -> None: + Generates the necessary configuration and main scripts for the ensemble. + + Raises: + FileNotFoundError: If the ensemble directory does not exist. + + assess_model_directory() -> dict: + Assesses the ensemble directory by checking for the presence of expected directories. + + Returns: + dict: A dictionary containing assessment results with two keys: + - 'model_dir': The path to the ensemble directory. + - 'structure_errors': A list of errors related to missing directories. + + assess_model_scripts() -> dict: + Assesses the ensemble directory by checking for the presence of expected scripts. + + Returns: + dict: A dictionary containing assessment results with two keys: + - 'model_dir': The path to the ensemble directory. + - 'missing_scripts': A set of missing script paths. + """ + + def __init__(self, model_name) -> None: + """ + Initialize a EnsembleScaffoldBuilder object with the given ensemble name and set up paths. + + Args: + model_name (str): The name of the model for which directories and files are to be created. + + Returns: + None + """ + self._model = ensemble_path.EnsemblePath(model_name, validate=False) + self._subdirs = self._model.get_directories().values() + self._scripts = self._model.get_scripts().values() + + def build_model_directory(self) -> Path: + """ + Create the ensemble directory and its subdirectories, and initialize necessary files such as README.md and requirements.txt. + + Returns: + Path: The path to the created ensemble directory. + + Raises: + FileExistsError: If the ensemble directory already exists. + """ + if self._model.model_dir.exists(): + logger.info( + f"Ensemble directory already exists: {self._model.model_dir}. Proceeding with existing directory." + ) + else: + self._model.model_dir.mkdir(parents=True, exist_ok=False) + logger.info(f"Created new ensemble directory: {self._model.model_dir}") + + for subdir in self._subdirs: + subdir = Path(subdir) + if not subdir.exists(): + try: + subdir.mkdir(parents=True, exist_ok=True) + if subdir.exists(): + logging.info(f"Created subdirectory: {subdir}") + else: + logging.error(f"Did not create subdirectory: {subdir}") + except Exception as e: + logging.error(f"Error creating subdirectory: {subdir}. {e}") + else: + logging.info(f"Subdirectory already exists: {subdir}. Skipping.") + + # Create README.md and requirements.txt + readme_path = self._model.model_dir / "README.md" + with open(readme_path, "w") as readme_file: + readme_file.write( + f"# Ensemble README\n## Ensemble name: {self._model.model_name}\n## Created on: {str(datetime.datetime.now())}" + ) + if readme_path.exists(): + logging.info(f"Created README.md: {readme_path}") + else: + logging.error(f"Did not create README.md: {readme_path}") + + requirements_path = self._model.model_dir / "requirements.txt" + with open(requirements_path, "w") as requirements_file: + requirements_file.write("# Requirements\n") + if requirements_path.exists(): + logging.info(f"Created requirements.txt: {requirements_path}") + else: + logging.error(f"Did not create requirements.txt: {requirements_path}") + return self._model.model_dir + + def build_model_scripts(self): + if not self._model.model_dir.exists(): + raise FileNotFoundError( + f"Ensemble directory {self._model.model_dir} does not exist. Please call build_model_directory() first. Aborting script generation." + ) + template_config_deployment.generate( + script_dir=self._model.model_dir / "configs/config_deployment.py" + ) + template_config_hyperparameters.generate( + script_dir=self._model.model_dir / "configs/config_hyperparameters.py", + ) + template_config_meta.generate( + script_dir=self._model.model_dir / "configs/config_meta.py", + model_name=self._model.model_name, + ) + template_main.generate(script_dir=self._model.model_dir / "main.py") + + def assess_model_directory(self) -> dict: + """ + Assess the ensemble directory by checking for the presence of expected directories. + + Returns: + dict: A dictionary containing assessment results with two keys: + - 'model_dir': The path to the ensemble directory. + - 'structure_errors': A list of errors related to missing directories or files. + """ + assessment = {"model_dir": self._model.model_dir, "structure_errors": []} + if not self._model.model_dir.exists(): + raise FileNotFoundError( + f"Ensemble directory {self._model.model_dir} does not exist. Please call build_model_directory() first." + ) + updated_model_path = ensemble_path.EnsemblePath(self._model.model_name, validate=True) + assessment["structure_errors"] = set( + updated_model_path.get_directories().values() + ) - set(self._subdirs) + del updated_model_path + return assessment + + def assess_model_scripts(self) -> dict: + """ + Assess the ensemble directory by checking for the presence of expected directories. + + Returns: + dict: A dictionary containing assessment results with two keys: + - 'model_dir': The path to the ensemble directory. + - 'structure_errors': A list of errors related to missing directories or files. + """ + assessment = {"model_dir": self._model.model_dir, "missing_scripts": set()} + if not self._model.model_dir.exists(): + raise FileNotFoundError( + f"Ensemble directory {self._model.model_dir} does not exist. Please call build_model_directory() first." + ) + for script_path in self._scripts: + script_path = Path(script_path) + if not script_path.exists(): + assessment["missing_scripts"].add(script_path) + return assessment + + +if __name__ == "__main__": + model_name = str(input("Enter the name of the ensemble: ")) + while ( + not validate_model_name(model_name) + or model_path.ModelPath.check_if_model_dir_exists(model_name) + or ensemble_path.EnsemblePath.check_if_model_dir_exists(model_name) + ): + error = "Invalid input. Please use the format 'adjective_noun' in lowercase, e.g., 'happy_kitten' that does not already exist as a model or ensemble." + logging.error(error) + model_name = str(input("Enter the name of the model: ")) + model_directory_builder = EnsembleScaffoldBuilder(model_name) + model_directory_builder.build_model_directory() + assessment = model_directory_builder.assess_model_directory() + if not assessment["structure_errors"]: + logging.info("Ensemble directory structure is complete.") + else: + logging.warning(f"Structure errors: {assessment['structure_errors']}") + model_directory_builder.build_model_scripts() + assessment = model_directory_builder.assess_model_scripts() + if not assessment["missing_scripts"]: + logging.info("All scripts have been successfully generated.") + else: + logging.warning(f"Missing scripts: {assessment['missing_scripts']}") diff --git a/meta_tools/model_scaffold_builder.py b/meta_tools/model_scaffold_builder.py index 94bf68e8..6fa48bb1 100644 --- a/meta_tools/model_scaffold_builder.py +++ b/meta_tools/model_scaffold_builder.py @@ -18,7 +18,7 @@ # print(str(Path(__file__).parent.parent)) from common_utils import model_path, ensemble_path -from templates import ( +from templates.model import ( template_config_deployment, template_config_hyperparameters, template_config_input_data, diff --git a/meta_tools/templates/template_config_deployment.py b/meta_tools/templates/ensemble/template_config_deployment.py similarity index 100% rename from meta_tools/templates/template_config_deployment.py rename to meta_tools/templates/ensemble/template_config_deployment.py diff --git a/meta_tools/templates/ensemble/template_config_hyperparameters.py b/meta_tools/templates/ensemble/template_config_hyperparameters.py new file mode 100644 index 00000000..1c15f722 --- /dev/null +++ b/meta_tools/templates/ensemble/template_config_hyperparameters.py @@ -0,0 +1,29 @@ +from utils import utils_script_gen +from pathlib import Path + + +def generate(script_dir: Path) -> bool: + """ + Generates a script that defines a function for obtaining hyperparameter configurations + necessary for model training. + + Parameters: + script_dir (Path): + The directory where the generated deployment configuration script will be saved. + This should be a valid writable path. + + model_algorithm (str): + The architecture of the model to be used for training. This string will be included in the + hyperparameter configuration and can be modified to test different algorithms. + + Returns: + bool: + True if the script was written and compiled successfully, False otherwise. + """ + code = """def get_hp_config(): + hp_config = { + "steps": [*range(1, 36 + 1, 1)] + } + return hp_config +""" + return utils_script_gen.save_script(script_dir, code) \ No newline at end of file diff --git a/meta_tools/templates/ensemble/template_config_meta.py b/meta_tools/templates/ensemble/template_config_meta.py new file mode 100644 index 00000000..a21246a2 --- /dev/null +++ b/meta_tools/templates/ensemble/template_config_meta.py @@ -0,0 +1,39 @@ +from utils import utils_script_gen +from pathlib import Path + + +def generate(script_dir: Path, model_name: str) -> bool: + """ + Generates a script that defines the `get_meta_config` function for model metadata. + + Parameters: + script_dir (Path): + The directory where the generated deployment configuration script will be saved. + This should be a valid writable path. + + model_name (str): + The name of the model. This will be included in the metadata configuration. + + Returns: + bool: + True if the script was written and compiled successfully, False otherwise. + """ + code = f"""def get_meta_config(): + \""" + Contains the metadata for the model (model architecture, name, target variable, and level of analysis). + This config is for documentation purposes only, and modifying it will not affect the model, the training, or the evaluation. + + Returns: + - meta_config (dict): A dictionary containing model meta configuration. + \""" + meta_config = {{ + "name": "{model_name}", # Eg. "happy_kitten" + "models": [], # Eg. ["model1", "model2", "model3"] + "depvar": "ln_ged_sb_dep", # Eg. "ln_ged_sb_dep" + "level": "pgm", # Eg. "pgm", "cm" + "aggregation": "median", # Eg. "median", "mean" + "creator": "Your name here" + }} + return meta_config +""" + return utils_script_gen.save_script(script_dir, code) \ No newline at end of file diff --git a/meta_tools/templates/ensemble/template_main.py b/meta_tools/templates/ensemble/template_main.py new file mode 100644 index 00000000..fa175ea0 --- /dev/null +++ b/meta_tools/templates/ensemble/template_main.py @@ -0,0 +1,64 @@ +from utils import utils_script_gen +from pathlib import Path + + +def generate(script_dir: Path) -> bool: + """ + Generates a Python script that sets up and executes model runs with Weights & Biases (WandB) integration. + + This function creates a script that imports necessary modules, sets up project paths, and defines the + main execution logic for running either a single model run or a sweep of model configurations. The + generated script includes command-line argument parsing, validation, and runtime logging. + + Parameters: + script_dir (Path): + The directory where the generated Python script will be saved. This should be a valid writable + path that exists within the project structure. + + Returns: + bool: + True if the script was successfully written to the specified directory, False otherwise. + + The generated script includes the following features: + - Imports required libraries and sets up the path to include the `common_utils` module. + - Initializes project paths using the `setup_project_paths` function. + - Parses command-line arguments with `parse_args`. + - Validates arguments to ensure correctness with `validate_arguments`. + - Logs into Weights & Biases using `wandb.login()`. + - Executes a model run based on the provided command-line flags, either initiating a sweep or a single run. + - Calculates and prints the runtime of the execution in minutes. + + Note: + - Ensure that the `common_utils` module and all other imported modules are accessible from the + specified script directory. + - The generated script is designed to be executed as a standalone Python script. + """ + code = """import wandb +import sys +import warnings + +from pathlib import Path +PATH = Path(__file__) +sys.path.insert(0, str(Path( + *[i for i in PATH.parts[:PATH.parts.index("views_pipeline") + 1]]) / "common_utils")) # PATH_COMMON_UTILS +from set_path import setup_project_paths +setup_project_paths(PATH) + +from utils_cli_parser import parse_args, validate_arguments +from utils_logger import setup_logging +from execute_model_runs import execute_single_run + +warnings.filterwarnings("ignore") + +logger = setup_logging('run.log') + + +if __name__ == "__main__": + wandb.login() + + args = parse_args() + validate_arguments(args) + + execute_single_run(args) +""" + return utils_script_gen.save_script(script_dir, code) \ No newline at end of file diff --git a/meta_tools/templates/model/template_config_deployment.py b/meta_tools/templates/model/template_config_deployment.py new file mode 100644 index 00000000..185bc6ba --- /dev/null +++ b/meta_tools/templates/model/template_config_deployment.py @@ -0,0 +1,75 @@ +from typing import Dict +from utils import utils_script_gen +from pathlib import Path +import logging + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def generate( + script_dir: Path, + deployment_type: str = "shadow", + additional_settings: Dict[str, any] = None, +) -> bool: + """ + Generates a script that defines the `get_deployment_config` function for configuring the deployment status and settings. + + Parameters: + script_dir (Path): The directory where the generated deployment configuration script will be saved. + This should be a valid writable path. + deployment_type (str, optional): + The type of deployment. Must be one of "shadow", "deployed", "baseline", or "deprecated". + Default is "shadow". + - "shadow": The deployment is shadowed and not yet active. + - "deployed": The deployment is active and in use. + - "baseline": The deployment is in a baseline state, for reference or comparison. + - "deprecated": The deployment is deprecated and no longer supported. + additional_settings (dict, optional): + A dictionary of additional settings to include in the deployment configuration. + These settings will be merged with the default configuration. Defaults to None. + + Raises: + ValueError: If `deployment_type` is not one of the valid types. + + Returns: + bool: True if the script was written and compiled successfully, False otherwise. + """ + valid_types = {"shadow", "deployed", "baseline", "deprecated"} + if deployment_type.lower() not in valid_types: + logging.error( + f"Invalid deployment_type: {deployment_type}. Must be one of {valid_types}." + ) + raise ValueError( + f"Invalid deployment_type: {deployment_type}. Must be one of {valid_types}." + ) + + deployment_config = {"deployment_status": deployment_type.lower()} + + # Merge additional settings if provided + if additional_settings and isinstance(additional_settings, dict): + deployment_config.update(additional_settings) + + # Generate the script code + code = f"""\"\"\" +Deployment Configuration Script + +This script defines the deployment configuration settings for the application. +It includes the deployment status and any additional settings specified. + +Deployment Status: +- shadow: The deployment is shadowed and not yet active. +- deployed: The deployment is active and in use. +- baseline: The deployment is in a baseline state, for reference or comparison. +- deprecated: The deployment is deprecated and no longer supported. + +Additional settings can be included in the configuration dictionary as needed. + +\"\"\" + +def get_deployment_config(): + # Deployment settings + deployment_config = {deployment_config} + return deployment_config +""" + return utils_script_gen.save_script(script_dir, code) diff --git a/meta_tools/templates/template_config_hyperparameters.py b/meta_tools/templates/model/template_config_hyperparameters.py similarity index 100% rename from meta_tools/templates/template_config_hyperparameters.py rename to meta_tools/templates/model/template_config_hyperparameters.py diff --git a/meta_tools/templates/template_config_input_data.py b/meta_tools/templates/model/template_config_input_data.py similarity index 100% rename from meta_tools/templates/template_config_input_data.py rename to meta_tools/templates/model/template_config_input_data.py diff --git a/meta_tools/templates/template_config_meta.py b/meta_tools/templates/model/template_config_meta.py similarity index 100% rename from meta_tools/templates/template_config_meta.py rename to meta_tools/templates/model/template_config_meta.py diff --git a/meta_tools/templates/template_config_sweep.py b/meta_tools/templates/model/template_config_sweep.py similarity index 100% rename from meta_tools/templates/template_config_sweep.py rename to meta_tools/templates/model/template_config_sweep.py diff --git a/meta_tools/templates/template_main.py b/meta_tools/templates/model/template_main.py similarity index 100% rename from meta_tools/templates/template_main.py rename to meta_tools/templates/model/template_main.py diff --git a/meta_tools/tests/test_ensemble_scaffold_builder.py b/meta_tools/tests/test_ensemble_scaffold_builder.py new file mode 100644 index 00000000..9510a6b5 --- /dev/null +++ b/meta_tools/tests/test_ensemble_scaffold_builder.py @@ -0,0 +1,282 @@ +import sys +from pathlib import Path + +PATH = Path(__file__) +if 'views_pipeline' in PATH.parts: + PATH_ROOT = Path(*PATH.parts[:PATH.parts.index('views_pipeline') + 1]) + PATH_META_TOOLS = PATH_ROOT / 'meta_tools' + if not PATH_META_TOOLS.exists(): + raise ValueError("The 'meta_tools' directory was not found in the provided path.") + sys.path.insert(0, str(PATH_META_TOOLS)) +else: + raise ValueError("The 'views_pipeline' directory was not found in the provided path.") + +import os +import pytest +from unittest.mock import patch, MagicMock +import tempfile +import shutil +from ensemble_scaffold_builder import EnsembleScaffoldBuilder + + +@pytest.fixture +def temp_dir(): + """ + Fixture to create a temporary directory for testing. + + Yields: + Path: The path to the temporary directory. + """ + temp_dir = tempfile.mkdtemp() + yield Path(temp_dir) + shutil.rmtree(temp_dir) + + +@pytest.fixture +def mock_validate_model_name(): + """ + Fixture to mock the `validate_model_name` function. + + Yields: + MagicMock: The mock object for `validate_model_name`. + """ + with patch("ensemble_scaffold_builder.validate_model_name") as mock: + yield mock + + +@pytest.fixture +def mock_ensemble_path(): + """ + Fixture to mock the `ensemble_path.EnsemblePath` class. + + Yields: + MagicMock: The mock object for `EnsemblePath`. + """ + with patch("ensemble_scaffold_builder.ensemble_path.EnsemblePath") as mock: + yield mock + + +@pytest.fixture +def mock_templates(): + """ + Fixture to mock the template generation functions. + + Yields: + dict: A dictionary of mock objects for template functions. + """ + with patch( + "ensemble_scaffold_builder.template_config_deployment.generate" + ) as mock_deployment, patch( + "ensemble_scaffold_builder.template_config_hyperparameters.generate" + ) as mock_hyperparameters, patch( + "ensemble_scaffold_builder.template_config_meta.generate" + ) as mock_meta, patch( + "ensemble_scaffold_builder.template_main.generate" + ) as mock_main: + yield { + "deployment": mock_deployment, + "hyperparameters": mock_hyperparameters, + "meta": mock_meta, + "main": mock_main, + } + + +def test_ensemble_builder_init(mock_ensemble_path): + """ + Test the initialization of the EnsembleScaffoldBuilder class. + + Args: + mock_ensemble_path (MagicMock): The mock object for `EnsemblePath`. + + Asserts: + - The `EnsembleScaffoldBuilder` attributes are correctly initialized. + """ + mock_ensemble_instance = mock_ensemble_path.return_value + mock_ensemble_instance.get_directories.return_value = { + "dir1": "path1", + "dir2": "path2", + } + mock_ensemble_instance.get_scripts.return_value = { + "script1": "path1", + "script2": "path2", + } + + builder = EnsembleScaffoldBuilder("test_ensemble") + + assert builder._model == mock_ensemble_instance + assert set(builder._subdirs) == {"path1", "path2"} + assert set(builder._scripts) == {"path1", "path2"} + + +def test_build_ensemble_directory(temp_dir, mock_ensemble_path): + """ + Test the `build_model_directory` method. + + Args: + temp_dir (Path): The path to the temporary directory. + mock_ensemble_path (MagicMock): The mock object for `EnsemblePath`. + + Asserts: + - The ensemble directory and subdirectories are created correctly. + - The README.md and requirements.txt files are created. + - Appropriate logging messages are generated. + """ + mock_ensemble_instance = mock_ensemble_path.return_value + mock_ensemble_instance.model_dir = temp_dir / "test_ensemble" + mock_ensemble_instance.get_directories.return_value = { + "subdir1": temp_dir / "test_ensemble" / "subdir1" + } + + builder = EnsembleScaffoldBuilder("test_ensemble") + model_dir = builder.build_model_directory() + + assert model_dir == mock_ensemble_instance.model_dir + assert (model_dir / "subdir1").exists() + assert (model_dir / "README.md").exists() + assert (model_dir / "requirements.txt").exists() + + +def test_build_ensemble_scripts(temp_dir, mock_ensemble_path, mock_templates): + """ + Test the `build_model_scripts` method. + + Args: + temp_dir (Path): The path to the temporary directory. + mock_ensemble_path (MagicMock): The mock object for `EnsemblePath`. + mock_templates (dict): A dictionary of mock objects for template functions. + + Asserts: + - The template scripts are generated correctly. + - Appropriate logging messages are generated. + """ + mock_ensemble_instance = mock_ensemble_path.return_value + mock_ensemble_instance.model_dir = temp_dir / "test_ensemble" + mock_ensemble_instance.get_scripts.return_value = { + "script1": temp_dir / "test_ensemble" / "script1.py" + } + + builder = EnsembleScaffoldBuilder("test_ensemble") + builder.build_model_directory() + builder.build_model_scripts() + + # Create the script file to simulate actual script creation + script_path = temp_dir / "test_ensemble" / "script1.py" + os.makedirs(script_path.parent, exist_ok=True) + with open(script_path, "w") as f: + f.write("# Script content") + + for template in mock_templates.values(): + template.assert_called() + + # Ensure the script is created + assert script_path.exists() + + +def test_assess_ensemble_directory(temp_dir, mock_ensemble_path): + """ + Test the `assess_model_directory` method. + + Args: + temp_dir (Path): The path to the temporary directory. + mock_ensemble_path (MagicMock): The mock object for `EnsemblePath`. + + Asserts: + - The directory assessment is performed correctly. + - Missing directories are detected. + """ + mock_ensemble_instance = mock_ensemble_path.return_value + mock_ensemble_instance.model_dir = temp_dir / "test_ensemble" + mock_ensemble_instance.get_directories.return_value = { + "subdir1": temp_dir / "test_ensemble" / "subdir1" + } + + builder = EnsembleScaffoldBuilder("test_ensemble") + builder.build_model_directory() + assessment = builder.assess_model_directory() + + assert "structure_errors" in assessment + assert not assessment["structure_errors"] + + +def test_assess_ensemble_scripts(temp_dir, mock_ensemble_path): + """ + Test the `assess_model_scripts` method. + + Args: + temp_dir (Path): The path to the temporary directory. + mock_ensemble_path (MagicMock): The mock object for `EnsemblePath`. + + Asserts: + - The script assessment is performed correctly. + - Missing scripts are detected. + """ + mock_ensemble_instance = mock_ensemble_path.return_value + mock_ensemble_instance.model_dir = temp_dir / "test_ensemble" + mock_ensemble_instance.get_scripts.return_value = { + "script1": temp_dir / "test_ensemble" / "script1.py" + } + + builder = EnsembleScaffoldBuilder("test_ensemble") + builder.build_model_directory() + builder.build_model_scripts() + + # Create the script file to simulate actual script creation + script_path = temp_dir / "test_ensemble" / "script1.py" + os.makedirs(script_path.parent, exist_ok=True) + with open(script_path, "w") as f: + f.write("# Script content") + + assessment = builder.assess_model_scripts() + + assert "missing_scripts" in assessment + assert not assessment["missing_scripts"] + + +# def test_assess_ensemble_directory_with_missing_dirs(temp_dir, mock_ensemble_path): +# """ +# Test the `assess_model_directory` method with missing directories. + +# Args: +# temp_dir (Path): The path to the temporary directory. +# mock_ensemble_path (MagicMock): The mock object for `EnsemblePath`. + +# Asserts: +# - The directory assessment detects missing directories. +# """ +# mock_ensemble_instance = mock_ensemble_path.return_value +# mock_ensemble_instance.model_dir = temp_dir / "test_ensemble" +# mock_ensemble_instance.get_directories.return_value = { +# "subdir1": temp_dir / "test_ensemble" / "subdir1" +# } + +# builder = EnsembleScaffoldBuilder("test_ensemble") +# builder.build_model_directory() + +# with patch('ensemble_scaffold_builder.Path.exists', side_effect=[True, False, True]): +# assessment = builder.assess_model_directory() +# assert assessment["structure_errors"] == {temp_dir / "test_ensemble" / "subdir1"} + + +# def test_assess_ensemble_scripts_with_missing_scripts(temp_dir, mock_ensemble_path): +# """ +# Test the `assess_model_scripts` method with missing scripts. + +# Args: +# temp_dir (Path): The path to the temporary directory. +# mock_ensemble_path (MagicMock): The mock object for `EnsemblePath`. + +# Asserts: +# - The script assessment detects missing scripts. +# """ +# mock_ensemble_instance = mock_ensemble_path.return_value +# mock_ensemble_instance.model_dir = temp_dir / "test_ensemble" +# mock_ensemble_instance.get_scripts.return_value = { +# "script1": temp_dir / "test_ensemble" / "script1.py" +# } + +# builder = EnsembleScaffoldBuilder("test_ensemble") +# builder.build_model_directory() + +# with patch('ensemble_scaffold_builder.Path.exists', side_effect=[True, False, True, True]): +# assessment = builder.assess_model_scripts() +# assert assessment["missing_scripts"] == {temp_dir / "test_ensemble" / "script1.py"} \ No newline at end of file diff --git a/meta_tools/tests/test_model_scaffold_builder.py b/meta_tools/tests/test_model_scaffold_builder.py index ade56523..70442760 100644 --- a/meta_tools/tests/test_model_scaffold_builder.py +++ b/meta_tools/tests/test_model_scaffold_builder.py @@ -11,8 +11,6 @@ else: raise ValueError("The 'views_pipeline' directory was not found in the provided path.") -print(sys.path) - import os import pytest from unittest.mock import patch, MagicMock