Merge pull request #137 from prio-data/update_generel_logger

Update generel logger
prio-data · Oct 31, 2024 · b20115c · b20115c
2 parents 9ac24f0 + 41e9591
commit b20115c
Show file tree

Hide file tree

Showing 14 changed files with 414 additions and 73 deletions.
diff --git a/common_configs/config_log.yaml b/common_configs/config_log.yaml
@@ -0,0 +1,62 @@
+version: 1
+disable_existing_loggers: False
+
+formatters:
+  detailed:
+    format: '%(asctime)s %(pathname)s [%(filename)s:%(lineno)d] [%(process)d] [%(threadName)s] - %(levelname)s - %(message)s'
+
+handlers:
+  console:
+    class: logging.StreamHandler
+    level: INFO
+    formatter: detailed
+    stream: ext://sys.stdout
+
+  info_file_handler:
+    class: logging.handlers.TimedRotatingFileHandler
+    level: INFO
+    formatter: detailed
+    filename: "{LOG_PATH}/views_pipeline_INFO.log"
+    when: "midnight"
+    backupCount: 30
+    encoding: "utf8"
+
+  debug_file_handler:
+    class: logging.handlers.TimedRotatingFileHandler
+    level: DEBUG
+    formatter: detailed
+    filename: "{LOG_PATH}/views_pipeline_DEBUG.log"
+    when: "midnight"
+    backupCount: 10
+    encoding: "utf8"
+
+  warning_file_handler:
+    class: logging.handlers.TimedRotatingFileHandler
+    level: WARNING
+    formatter: detailed
+    filename: "{LOG_PATH}/views_pipeline_WARNING.log"
+    when: "midnight"
+    backupCount: 20
+    encoding: "utf8"
+
+  error_file_handler:
+    class: logging.handlers.TimedRotatingFileHandler
+    level: ERROR
+    formatter: detailed
+    filename: "{LOG_PATH}/views_pipeline_ERROR.log"
+    when: "midnight"
+    backupCount: 60
+    encoding: "utf8"
+
+  critical_file_handler:
+    class: logging.handlers.TimedRotatingFileHandler
+    level: CRITICAL
+    formatter: detailed
+    filename: "{LOG_PATH}/views_pipeline_CRITICAL.log"
+    when: "midnight"
+    backupCount: 90
+    encoding: "utf8"
+
+root:
+  level: DEBUG
+  handlers: [console, info_file_handler, debug_file_handler, warning_file_handler, error_file_handler, critical_file_handler]
diff --git a/common_logs/.gitkeep b/common_logs/.gitkeep
diff --git a/common_utils/global_cache.py b/common_utils/global_cache.py
@@ -113,12 +113,12 @@ def ensure_cache_file_exists(self):
         Ensures that the cache file exists. If it does not exist, creates a new cache file.
         """
         if not self.filepath.exists():
-            logging.info(
+            logging.warning(
                 f"Cache file: {self.filepath} does not exist. Creating new cache file..."
             )
             with open(self.filepath, "wb") as f:
                 pickle.dump({}, f)
-            logging.info(f"Created new cache file: {self.filepath}")
+            logging.debug(f"Created new cache file: {self.filepath}")
 
     def set(self, key, value):
         """
@@ -167,7 +167,7 @@ def save_cache(self):
         """
         with open(self.filepath, "wb") as f:
             pickle.dump(self.cache, f)
-        logging.info(f"Cache saved to file: {self.filepath}")
+        logging.debug(f"Cache saved to file: {self.filepath}")
 
     def load_cache(self):
         """
@@ -179,7 +179,7 @@ def load_cache(self):
                     loaded_cache = pickle.loads(f.read())
                     if isinstance(loaded_cache, dict):
                         self.cache = loaded_cache
-                        logging.info(f"Cache loaded from file: {self.filepath}")
+                        logging.debug(f"Cache loaded from file: {self.filepath}")
                     else:
                         logging.error(
                             f"Loaded cache is not a dictionary. Initializing empty cache."
@@ -192,7 +192,7 @@ def load_cache(self):
                 self.cache = {}
         else:
             self.cache = {}
-            logging.info(f"Cache file does not exist. Initialized empty cache.")
+            logging.debug(f"Cache file does not exist. Initialized empty cache.")
 
 
 def cleanup_cache_file():

diff --git a/common_utils/model_path.py b/common_utils/model_path.py
@@ -61,46 +61,6 @@ class ModelPath:
         _ignore_attributes (list): A list of paths to ignore.
     """
 
-    __slots__ = (
-        "_validate",
-        "target",
-        "use_global_cache",
-        "_force_cache_overwrite",
-        "root",
-        "models",
-        "common_utils",
-        "common_configs",
-        "_ignore_attributes",
-        "model_name",
-        "_instance_hash",
-        "_queryset",
-        "model_dir",
-        "architectures",
-        "artifacts",
-        "configs",
-        "data",
-        "data_generated",
-        "data_processed",
-        "data_raw",
-        "dataloaders",
-        "forecasting",
-        "management",
-        "notebooks",
-        "offline_evaluation",
-        "online_evaluation",
-        "reports",
-        "src",
-        "_templates",
-        "training",
-        "utils",
-        "visualization",
-        "_sys_paths",
-        "common_querysets",
-        "queryset_path",
-        "scripts",
-        "meta_tools",
-    )
-
     _target = "model"
     _use_global_cache = True
     __instances__ = 0
@@ -120,6 +80,7 @@ def _initialize_class_paths(cls):
         cls._common_configs = cls._root / "common_configs"
         cls._common_querysets = cls._root / "common_querysets"
         cls._meta_tools = cls._root / "meta_tools"
+        cls._common_logs = cls._root / "common_logs"
 
     @classmethod
     def get_root(cls) -> Path:
@@ -163,6 +124,13 @@ def get_meta_tools(cls) -> Path:
             cls._initialize_class_paths()
         return cls._meta_tools
 
+    @classmethod
+    def get_common_logs(cls) -> Path:
+        """Get the common logs path."""
+        if cls._common_logs is None:
+            cls._initialize_class_paths()
+        return cls._common_logs
+
     @classmethod
     def check_if_model_dir_exists(cls, model_name: str) -> bool:
         """
@@ -575,8 +543,8 @@ def add_paths_to_sys(self) -> List[str]:
                     )
         if self._sys_paths is None:
             self._sys_paths = []
-        for attr in self.__slots__:
-            value = getattr(self, attr)
+        for attr, value in self.__dict__.items():
+            # value = getattr(self, attr)
             if str(attr) not in self._ignore_attributes:
                 if (
                     isinstance(value, Path)
@@ -641,8 +609,8 @@ def view_directories(self) -> None:
         """
         print("\n{:<20}\t{:<50}".format("Name", "Path"))
         print("=" * 72)
-        for attr in self.__slots__:
-            value = getattr(self, attr)
+        for attr, value in self.__dict__.items():
+            # value = getattr(self, attr)
             if attr not in self._ignore_attributes and isinstance(value, Path):
                 print("{:<20}\t{:<50}".format(str(attr), str(value)))
 
@@ -687,8 +655,8 @@ def get_directories(self) -> Dict[str, Optional[str]]:
         # ]
         directories = {}
         relative = False
-        for attr in self.__slots__:
-            value = getattr(self, attr)
+        for attr, value in self.__dict__.items():
+
             if str(attr) not in [
                 "model_name",
                 "root",

diff --git a/common_utils/utils_logger.py b/common_utils/utils_logger.py
@@ -1,33 +1,79 @@
 import logging
+import logging.config
+import yaml
+import os
+from pathlib import Path
+from model_path import ModelPath
+from global_cache import GlobalCache
 
+_split_by_model = True # Only works for lavender_haze
 
-def setup_logging(log_file: str, log_level=logging.INFO) -> logging.Logger:
+def ensure_log_directory(log_path: str) -> None:
     """
-    Sets up logging to both a specified file and the terminal (console).
+    Ensure the log directory exists for file-based logging handlers.
 
-    Args:
-        log_file (str): The file where logs should be written.
-        log_level (int): The logging level. Default is logging.INFO.
+    Parameters:
+    log_path (str): The full path to the log file for which the directory should be verified.
     """
+    log_dir = os.path.dirname(log_path)
+    if log_dir and not os.path.exists(log_dir):
+        os.makedirs(log_dir)
 
-    basic_logger = logging.getLogger()
-    basic_logger.setLevel(log_level)
 
-    file_handler = logging.FileHandler(log_file)
-    console_handler = logging.StreamHandler()
+def setup_logging(
+    default_level: int = logging.INFO, env_key: str = 'LOG_CONFIG') -> logging.Logger:
 
-    file_handler.setLevel(log_level)
-    console_handler.setLevel(log_level)
+    """
+    Setup the logging configuration from a YAML file and return the root logger.
+
+    Parameters:
+    default_level (int): The default logging level if the configuration file is not found
+                         or cannot be loaded. Default is logging.INFO.
+                         
+    env_key (str): Environment variablei key to override the default path to the logging
+                   configuration file. Default is 'LOG_CONFIG'.
+
+    Returns:
+    logging.Logger: The root logger configured based on the loaded configuration.
+
+    Example Usage:
+    >>> logger = setup_logging()
+    >>> logger.info("Logging setup complete.")
+    """
+
+    CONFIG_LOGS_PATH = ModelPath.get_common_configs() / 'config_log.yaml'
+    if _split_by_model:
+        try:
+            COMMON_LOGS_PATH = ModelPath.get_common_logs() / GlobalCache["current_model"]
+        except:
+            # Pretection in case model name is not available or GlobalCache fails.
+            COMMON_LOGS_PATH = ModelPath.get_common_logs()
+    else:
+        COMMON_LOGS_PATH = ModelPath.get_common_logs()
 
-    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
-    file_handler.setFormatter(formatter)
-    console_handler.setFormatter(formatter)
+    # Load YAML configuration
+    path = os.getenv(env_key, CONFIG_LOGS_PATH)
 
-    # Clear previous handlers if they exist
-    if basic_logger.hasHandlers():
-        basic_logger.handlers.clear()
+    if os.path.exists(path):
+        try:
+            with open(path, 'rt') as f:
+                config = yaml.safe_load(f.read())
+
+            # Replace placeholder with actual log directory path
+            for handler in config.get("handlers", {}).values():
+                if "filename" in handler and "{LOG_PATH}" in handler["filename"]:
+                    handler["filename"] = handler["filename"].replace("{LOG_PATH}", str(COMMON_LOGS_PATH))
+                    ensure_log_directory(handler["filename"])
+
+            # Apply logging configuration
+            logging.config.dictConfig(config)
 
-    basic_logger.addHandler(file_handler)
-    basic_logger.addHandler(console_handler)
+        except Exception as e:
+            logging.basicConfig(level=default_level)
+            logging.error(f"Failed to load logging configuration from {path}. Using basic configuration. Error: {e}")
+    else:
+        logging.basicConfig(level=default_level)
+        logging.warning(f"Logging configuration file not found at {path}. Using basic configuration.")
+
+    return logging.getLogger()
 
-    return basic_logger
diff --git a/documentation/ADRs/015_log_files_general_strategy.md b/documentation/ADRs/015_log_files_general_strategy.md
@@ -0,0 +1,84 @@
+# log files general strategy
+
+| ADR Info            | Details                      |
+|---------------------|------------------------------|
+| Subject             | log files general strategy   |
+| ADR Number          | 015                          |
+| Status              | Accepted                     |
+| Author              | Simon                        |
+| Date                | 28.10.2024                   |
+
+
+## Context
+
+Effective logging is essential for maintaining data integrity, monitoring model behavior, and troubleshooting issues within the model pipeline. A cohesive, centralized logging strategy ensures that logs are structured and accessible, enhancing transparency, auditability, and reliability across the model deployment lifecycle. The main goals of this logging strategy are to:
+
+1. **Enable Reproducibility and Traceability**: Log details such as timestamps, script paths, and process IDs are standardized to help trace model behavior and system states effectively across different environments.
+2. **Support Monitoring and Real-Time Alerts**: Logs will provide data for monitoring tools, enabling real-time alerting on critical errors and pipeline health checks.
+3. **Align with MLOps Best Practices**: This strategy follows MLOps standards for consistent error handling, observability, scalability, and storage management, preparing the pipeline for scalable deployment and future monitoring enhancements.
+
+For additional information, see also: 
+- [009_log_file_for_generated_data.md](009_log_file_for_generated_data.md) 
+- [016_log_files_for_input_data.md](016_log_files_for_input_data.md) 
+- [017_log_files_for_offline_evaluation.md](017_log_files_for_offline_evaluation.md) 
+- [018_log_files_for_online_evaluation.md](018_log_files_for_online_evaluation.md) 
+- [019_log_files_for_model_training.md](019_log_files_for_model_training.md) 
+- [020_log_files_realtime_alerts.md](020_log_files_realtime_alerts.md) 
+
+## Decision
+
+To implement a robust and unified logging strategy, we have decided on the following practices:
+
+### Overview
+
+1. **Standardized Log Configuration**: All logs will follow a centralized structure defined in the configurable `common_config/config_log.yaml` file. This configuration file controls logging levels, file rotation schedules, log output formats, and target log destinations. By centralizing log settings, all models within the pipeline will have a consistent logging structure, making the setup easier to maintain and adapt across environments.
+
+2. **Daily Rotation and Retention Policy**: Logs will rotate daily, keeping the last 30 days of logs by default. This policy provides sufficient historical data for troubleshooting and auditing without excessive storage usage. Rotation is achieved using a `TimedRotatingFileHandler`, with daily timestamped log filenames for easy access.
+
+3. **Log Separation by Level**: Logs are separated into `INFO`, `DEBUG`, and `ERROR` files and stored under `views_pipeline/common_logs`. This separation improves monitoring and helps maintain focus on the desired logging level when troubleshooting (e.g., reviewing only errors or detailed debugging information). Each log file will capture messages specific to its level, ensuring modularity and readability in logs.
+
+4. **Inclusion of Path and Process Details**: Log messages include additional context such as script path (`%(pathname)s`), filename (`%(filename)s`), line number (`%(lineno)d`), process ID (`%(process)d`), and thread name (`%(threadName)s`). This information aids in tracing logs back to their source, supporting traceability and aiding debugging.
+
+5. **Error Handling and Alerts**: Real-time alerting will be implemented for critical errors and unmet conditions. Integration with alerting tools (such as Slack or email) will provide immediate notifications of key pipeline issues. Alerts will include relevant metadata like timestamps, log level, and error specifics to support rapid troubleshooting.
+
+6. **Dual Logging to Local Storage and Weights & Biases (W&B)**:
+   - **Local Storage**: Logs will be stored locally on a rotating basis for easy access and immediate troubleshooting.
+   - **Weights & Biases (W&B) Integration**: Model training and evaluation logs will also be sent to W&B, which allows for centralized logging of metrics, model performance tracking, and experiment comparison. The W&B integration supports MLOps best practices by making logs easily searchable, taggable (e.g., by model or pipeline stage), and accessible for experiment analysis and auditing.
+
+7. **Access Control and Data Sensitivity**: Logs will avoid capturing sensitive data (such as configuration secrets or personally identifiable information) to align with data governance standards. While access controls for log files are not implemented at this stage, we may restrict log access in the future as the project scales, ensuring that sensitive log data is adequately protected.
+
+8. **Testing and Validation**: Automated tests will validate that logs are created accurately and that rotation and level-specific separation operate as expected. These tests will cover:
+   - Log creation and rotation validation.
+   - Level-specific log file checks to confirm appropriate separation (e.g., that `INFO` logs do not include `DEBUG` messages).
+   - Functional testing of real-time alerts to verify that notifications trigger as configured.
+
+## Consequences
+
+**Positive Effects:**
+- Provides a consistent and structured logging framework, improving troubleshooting, auditability, and compliance.
+- Supports MLOps best practices by establishing robust monitoring, traceability, and data governance standards.
+- Facilitates scalability and onboarding by providing a standardized, centralized approach to logging across all pipeline models.
+
+**Negative Effects:**
+- Additional storage resources are required for log retention and rotation, and periodic monitoring of storage usage is needed.
+- Initial setup and adjustment period may add complexity as team members adapt to the standardized logging and alerting practices.
+- Some refactoring of the current codebase will be needed as this ADR is accepted.
+
+## Rationale
+
+The unified logging strategy aligns with MLOps best practices by combining flexibility, scalability, and robustness. This approach ensures that logging configurations are adaptable, reproducible, and traceable across the model pipeline. By establishing standardized configuration files and integrating alerting, this logging strategy proactively supports system monitoring and provides a foundation for future observability and security enhancements.
+
+## Considerations
+
+1. **Future Alerting Integrations**: Additional alerting tools, such as W&B alerts, Slack, and email notifications, will be incorporated as the project matures to ensure real-time visibility into pipeline states and failures.
+
+2. **Centralized Logging Platform**: In future updates, the logging system may transition to a centralized platform (e.g., ELK Stack, Grafana) to improve scalability, visualization, and monitoring. This would require adjusting the current setup to work seamlessly with a logging infrastructure, which could involve additional configurations or external services.
+
+3. **Access Control Expansion**: As the project scales, access control measures will be considered to ensure data protection. Log files should avoid sensitive information to comply with best practices in data governance and avoid potential data exposure risks.
+
+4. **Testing Resource Allocation**: Implementing automated tests for logging mechanisms may require resources such as mock environments or testing frameworks to ensure the system functions as expected under different scenarios and that alert conditions trigger correctly.
+
+## Additional Notes
+
+Future updates may involve enhancing logging with a centralized platform, providing a more scalable and observable solution for monitoring and auditability. Access control measures and security protocols will also be revisited as the project scales to protect data integrity and confidentiality. Team members are encouraged to provide feedback on specific logging configuration details or suggest improvements to the alerting and monitoring system.
+
diff --git a/documentation/ADRs/016_log_files_for_input_data.md b/documentation/ADRs/016_log_files_for_input_data.md
diff --git a/documentation/ADRs/017_log_files_for_offline_evaluation.md b/documentation/ADRs/017_log_files_for_offline_evaluation.md
diff --git a/documentation/ADRs/018_log_files_for_online_evaluation.md b/documentation/ADRs/018_log_files_for_online_evaluation.md
@@ -0,0 +1 @@
+TODO
diff --git a/documentation/ADRs/019_log_files_for_model_training.md b/documentation/ADRs/019_log_files_for_model_training.md
@@ -0,0 +1 @@
+TODO