diff --git a/common_utils/ensemble_path.py b/common_utils/ensemble_path.py index 1de5e446..6ee76a2c 100644 --- a/common_utils/ensemble_path.py +++ b/common_utils/ensemble_path.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import Union import sys + logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" ) @@ -46,45 +47,11 @@ def _initialize_directories(self) -> None: """ # Call the parent class's _initialize_directories method super()._initialize_directories() + # Initialize ensemble-specific directories only if the class is EnsemblePath + if self.__class__.__name__ == "EnsemblePath": + self._initialize_ensemble_specific_directories() - # List of directories to keep - keep_dirs = { - "artifacts", - "configs", - "data", - "data/generated", - "data/processed", - "notebooks", - "reports", - "reports/figures", - "reports/papers", - "reports/plots", - "reports/slides", - "reports/timelapse", - "src", - "src/dataloaders", - "src/forecasting", - "src/management", - "src/offline_evaluation", - "src/training", - "src/utils", - "src/visualization", - } - - # Remove directories that are not in the keep_dirs list - for attr, value in list(self.__dict__.items()): - if Path(value).relative_to(self.model_dir) not in keep_dirs: - delattr(self, attr) - - # Initialize directories as per the new structure - self.model_dir = self._get_model_dir() - self.artifacts = self._build_absolute_directory(Path("artifacts")) - self.configs = self._build_absolute_directory(Path("configs")) - self.data = self._build_absolute_directory(Path("data")) - self.data_generated = self._build_absolute_directory(Path("data/generated")) - self.data_processed = self._build_absolute_directory(Path("data/processed")) - self.notebooks = self._build_absolute_directory(Path("notebooks")) - self.reports = self._build_absolute_directory(Path("reports")) + def _initialize_ensemble_specific_directories(self): self.reports_figures = self._build_absolute_directory(Path("reports/figures")) self.reports_papers = self._build_absolute_directory(Path("reports/papers")) self.reports_plots = self._build_absolute_directory(Path("reports/plots")) @@ -92,22 +59,6 @@ def _initialize_directories(self) -> None: self.reports_timelapse = self._build_absolute_directory( Path("reports/timelapse") ) - self.src = self._build_absolute_directory(Path("src")) - self.dataloaders = self._build_absolute_directory(Path("src/dataloaders")) - self.forecasting = self._build_absolute_directory(Path("src/forecasting")) - self.management = self._build_absolute_directory(Path("src/management")) - self.offline_evaluation = self._build_absolute_directory( - Path("src/offline_evaluation") - ) - self.training = self._build_absolute_directory(Path("src/training")) - self.utils = self._build_absolute_directory(Path("src/utils")) - self.visualization = self._build_absolute_directory(Path("src/visualization")) - self._templates = self.meta_tools / "templates" - self._sys_paths = None - # if self.common_querysets not in sys.path: - # sys.path.insert(0, str(self.common_querysets)) - # self.queryset_path = self.common_querysets / f"queryset_{self.model_name}.py" - # self._queryset = None def _initialize_scripts(self) -> None: """ @@ -115,23 +66,23 @@ def _initialize_scripts(self) -> None: Creates and sets up various scripts required for the ensemble, such as configuration scripts, main script, and other utility scripts. """ - self.scripts = [ - self._build_absolute_directory(Path("configs/config_deployment.py")), - self._build_absolute_directory(Path("configs/config_hyperparameters.py")), - self._build_absolute_directory(Path("configs/config_meta.py")), - self._build_absolute_directory(Path("main.py")), - self._build_absolute_directory(Path("README.md")), - self._build_absolute_directory(Path("requirements.txt")), + super()._initialize_scripts() + # Initialize ensemble-specific scripts only if the class is EnsemblePath + if self.__class__.__name__ == "EnsemblePath": + self._initialize_ensemble_specific_scripts() + + def _initialize_ensemble_specific_scripts(self): + """ + Initializes the ensemble-specific scripts by appending their absolute paths + to the `self.scripts` list. + + The paths are built using the `_build_absolute_directory` method. + + Returns: + None + """ + self.scripts += [ self._build_absolute_directory(Path("artifacts/model_metadata_dict.py")), - self._build_absolute_directory( - Path("src/forecasting/generate_forecast.py") - ), - self._build_absolute_directory( - Path("src/management/execute_model_runs.py") - ), - self._build_absolute_directory( - Path("src/management/execute_model_tasks.py") - ), self._build_absolute_directory( Path("src/offline_evaluation/evaluate_ensemble.py") ), @@ -140,7 +91,6 @@ def _initialize_scripts(self) -> None: self._build_absolute_directory(Path("src/utils/utils_check.py")), self._build_absolute_directory(Path("src/utils/utils_run.py")), self._build_absolute_directory(Path("src/visualization/visual.py")), - # self.common_querysets / f"queryset_{self.model_name}.py", ] diff --git a/common_utils/model_path.py b/common_utils/model_path.py index 51ae9c9b..3a17d1fe 100644 --- a/common_utils/model_path.py +++ b/common_utils/model_path.py @@ -6,11 +6,13 @@ from typing import Union, Optional, List, Dict PATH = Path(__file__) -if 'views_pipeline' in PATH.parts: - PATH_ROOT = Path(*PATH.parts[:PATH.parts.index('views_pipeline') + 1]) +if "views_pipeline" in PATH.parts: + PATH_ROOT = Path(*PATH.parts[: PATH.parts.index("views_pipeline") + 1]) sys.path.insert(0, str(PATH_ROOT)) else: - raise ValueError("The 'views_pipeline' directory was not found in the provided path.") + raise ValueError( + "The 'views_pipeline' directory was not found in the provided path." + ) from meta_tools.utils import utils_model_naming, utils_model_paths logging.basicConfig( @@ -123,14 +125,14 @@ def get_meta_tools(cls) -> Path: if cls._meta_tools is None: cls._initialize_class_paths() return cls._meta_tools - + @classmethod def get_common_logs(cls) -> Path: """Get the common logs path.""" if cls._common_logs is None: cls._initialize_class_paths() return cls._common_logs - + @classmethod def check_if_model_dir_exists(cls, model_name: str) -> bool: """ @@ -314,6 +316,7 @@ def _handle_global_cache(self) -> None: """ try: from global_cache import GlobalCache + cached_instance = GlobalCache[self._instance_hash] if cached_instance and not self._force_cache_overwrite: logger.info( @@ -324,7 +327,7 @@ def _handle_global_cache(self) -> None: logger.error( f"Error adding model {self.model_name} to cache: {e}. Initializing new ModelPath instance." ) - + def _write_to_global_cache(self) -> None: """ Writes the current model instance to the global cache if it doesn't exist. @@ -332,13 +335,17 @@ def _write_to_global_cache(self) -> None: Adds the model instance to the global cache using the instance hash as the key. """ from global_cache import GlobalCache - + if GlobalCache[self._instance_hash] is None: - logger.info(f"Writing {self.target.title}Path object to cache for model {self.model_name}.") + logger.info( + f"Writing {self.target.title}Path object to cache for model {self.model_name}." + ) GlobalCache[self._instance_hash] = self else: if self._force_cache_overwrite: - logger.info(f"Overwriting {self.target.title}Path object in cache for model {self.model_name}. (_force_cache_overwrite is set to True)") + logger.info( + f"Overwriting {self.target.title}Path object in cache for model {self.model_name}. (_force_cache_overwrite is set to True)" + ) GlobalCache[self._instance_hash] = self def _initialize_directories(self) -> None: @@ -348,23 +355,19 @@ def _initialize_directories(self) -> None: Creates and sets up various directories required for the model, such as architectures, artifacts, configs, data, etc. """ self.model_dir = self._get_model_dir() - self.architectures = self._build_absolute_directory(Path("src/architectures")) + self.artifacts = self._build_absolute_directory(Path("artifacts")) self.configs = self._build_absolute_directory(Path("configs")) self.data = self._build_absolute_directory(Path("data")) self.data_generated = self._build_absolute_directory(Path("data/generated")) self.data_processed = self._build_absolute_directory(Path("data/processed")) - self.data_raw = self._build_absolute_directory(Path("data/raw")) + self.dataloaders = self._build_absolute_directory(Path("src/dataloaders")) self.forecasting = self._build_absolute_directory(Path("src/forecasting")) self.management = self._build_absolute_directory(Path("src/management")) - self.notebooks = self._build_absolute_directory(Path("notebooks")) self.offline_evaluation = self._build_absolute_directory( Path("src/offline_evaluation") ) - self.online_evaluation = self._build_absolute_directory( - Path("src/online_evaluation") - ) self.reports = self._build_absolute_directory(Path("reports")) self.src = self._build_absolute_directory(Path("src")) self._templates = self.meta_tools / "templates" @@ -377,6 +380,18 @@ def _initialize_directories(self) -> None: self.queryset_path = self.common_querysets / f"queryset_{self.model_name}.py" self._queryset = None + # Initialize model-specific directories only if the class is ModelPath + if self.__class__.__name__ == "ModelPath": + self._initialize_model_specific_directories() + + def _initialize_model_specific_directories(self) -> None: + self.architectures = self._build_absolute_directory(Path("src/architectures")) + self.data_raw = self._build_absolute_directory(Path("data/raw")) + self.notebooks = self._build_absolute_directory(Path("notebooks")) + self.online_evaluation = self._build_absolute_directory( + Path("src/online_evaluation") + ) + def _initialize_scripts(self) -> None: """ Initializes the necessary scripts for the model. @@ -387,10 +402,8 @@ def _initialize_scripts(self) -> None: self._build_absolute_directory(Path("configs/config_deployment.py")), self._build_absolute_directory(Path("configs/config_hyperparameters.py")), self._build_absolute_directory(Path("configs/config_meta.py")), - self._build_absolute_directory(Path("configs/config_sweep.py")), self._build_absolute_directory(Path("main.py")), self._build_absolute_directory(Path("README.md")), - self._build_absolute_directory(Path("src/dataloaders/get_data.py")), self._build_absolute_directory( Path("src/forecasting/generate_forecast.py") ), @@ -400,11 +413,28 @@ def _initialize_scripts(self) -> None: self._build_absolute_directory( Path("src/management/execute_model_tasks.py") ), + ] + # Initialize model-specific directories only if the class is ModelPath + if self.__class__.__name__ == "ModelPath": + self._initialize_model_specific_directories() + + def _initialize_model_specific_scripts(self) -> None: + """ + Initializes and appends model-specific script paths to the `scripts` attribute. + + Returns: + None + """ + self.scripts += [ + self._build_absolute_directory(Path("configs/config_sweep.py")), + self._build_absolute_directory(Path("src/dataloaders/get_data.py")), self._build_absolute_directory( Path("src/offline_evaluation/evaluate_model.py") ), - self._build_absolute_directory(Path(f"src/training/train_{self.target}.py")), - self.common_querysets / f"queryset_{self.model_name}.py" + self._build_absolute_directory( + Path(f"src/training/train_{self.target}.py") + ), + self.common_querysets / f"queryset_{self.model_name}.py", ] def _is_path(self, path_input: Union[str, Path]) -> bool: