Skip to content

Commit

Permalink
Merge pull request #137 from prio-data/update_generel_logger
Browse files Browse the repository at this point in the history
Update generel logger
  • Loading branch information
Polichinel authored Oct 31, 2024
2 parents 9ac24f0 + 41e9591 commit b20115c
Show file tree
Hide file tree
Showing 14 changed files with 414 additions and 73 deletions.
62 changes: 62 additions & 0 deletions common_configs/config_log.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
version: 1
disable_existing_loggers: False

formatters:
detailed:
format: '%(asctime)s %(pathname)s [%(filename)s:%(lineno)d] [%(process)d] [%(threadName)s] - %(levelname)s - %(message)s'

handlers:
console:
class: logging.StreamHandler
level: INFO
formatter: detailed
stream: ext://sys.stdout

info_file_handler:
class: logging.handlers.TimedRotatingFileHandler
level: INFO
formatter: detailed
filename: "{LOG_PATH}/views_pipeline_INFO.log"
when: "midnight"
backupCount: 30
encoding: "utf8"

debug_file_handler:
class: logging.handlers.TimedRotatingFileHandler
level: DEBUG
formatter: detailed
filename: "{LOG_PATH}/views_pipeline_DEBUG.log"
when: "midnight"
backupCount: 10
encoding: "utf8"

warning_file_handler:
class: logging.handlers.TimedRotatingFileHandler
level: WARNING
formatter: detailed
filename: "{LOG_PATH}/views_pipeline_WARNING.log"
when: "midnight"
backupCount: 20
encoding: "utf8"

error_file_handler:
class: logging.handlers.TimedRotatingFileHandler
level: ERROR
formatter: detailed
filename: "{LOG_PATH}/views_pipeline_ERROR.log"
when: "midnight"
backupCount: 60
encoding: "utf8"

critical_file_handler:
class: logging.handlers.TimedRotatingFileHandler
level: CRITICAL
formatter: detailed
filename: "{LOG_PATH}/views_pipeline_CRITICAL.log"
when: "midnight"
backupCount: 90
encoding: "utf8"

root:
level: DEBUG
handlers: [console, info_file_handler, debug_file_handler, warning_file_handler, error_file_handler, critical_file_handler]
Empty file added common_logs/.gitkeep
Empty file.
10 changes: 5 additions & 5 deletions common_utils/global_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,12 @@ def ensure_cache_file_exists(self):
Ensures that the cache file exists. If it does not exist, creates a new cache file.
"""
if not self.filepath.exists():
logging.info(
logging.warning(
f"Cache file: {self.filepath} does not exist. Creating new cache file..."
)
with open(self.filepath, "wb") as f:
pickle.dump({}, f)
logging.info(f"Created new cache file: {self.filepath}")
logging.debug(f"Created new cache file: {self.filepath}")

def set(self, key, value):
"""
Expand Down Expand Up @@ -167,7 +167,7 @@ def save_cache(self):
"""
with open(self.filepath, "wb") as f:
pickle.dump(self.cache, f)
logging.info(f"Cache saved to file: {self.filepath}")
logging.debug(f"Cache saved to file: {self.filepath}")

def load_cache(self):
"""
Expand All @@ -179,7 +179,7 @@ def load_cache(self):
loaded_cache = pickle.loads(f.read())
if isinstance(loaded_cache, dict):
self.cache = loaded_cache
logging.info(f"Cache loaded from file: {self.filepath}")
logging.debug(f"Cache loaded from file: {self.filepath}")
else:
logging.error(
f"Loaded cache is not a dictionary. Initializing empty cache."
Expand All @@ -192,7 +192,7 @@ def load_cache(self):
self.cache = {}
else:
self.cache = {}
logging.info(f"Cache file does not exist. Initialized empty cache.")
logging.debug(f"Cache file does not exist. Initialized empty cache.")


def cleanup_cache_file():
Expand Down
60 changes: 14 additions & 46 deletions common_utils/model_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,46 +61,6 @@ class ModelPath:
_ignore_attributes (list): A list of paths to ignore.
"""

__slots__ = (
"_validate",
"target",
"use_global_cache",
"_force_cache_overwrite",
"root",
"models",
"common_utils",
"common_configs",
"_ignore_attributes",
"model_name",
"_instance_hash",
"_queryset",
"model_dir",
"architectures",
"artifacts",
"configs",
"data",
"data_generated",
"data_processed",
"data_raw",
"dataloaders",
"forecasting",
"management",
"notebooks",
"offline_evaluation",
"online_evaluation",
"reports",
"src",
"_templates",
"training",
"utils",
"visualization",
"_sys_paths",
"common_querysets",
"queryset_path",
"scripts",
"meta_tools",
)

_target = "model"
_use_global_cache = True
__instances__ = 0
Expand All @@ -120,6 +80,7 @@ def _initialize_class_paths(cls):
cls._common_configs = cls._root / "common_configs"
cls._common_querysets = cls._root / "common_querysets"
cls._meta_tools = cls._root / "meta_tools"
cls._common_logs = cls._root / "common_logs"

@classmethod
def get_root(cls) -> Path:
Expand Down Expand Up @@ -163,6 +124,13 @@ def get_meta_tools(cls) -> Path:
cls._initialize_class_paths()
return cls._meta_tools

@classmethod
def get_common_logs(cls) -> Path:
"""Get the common logs path."""
if cls._common_logs is None:
cls._initialize_class_paths()
return cls._common_logs

@classmethod
def check_if_model_dir_exists(cls, model_name: str) -> bool:
"""
Expand Down Expand Up @@ -575,8 +543,8 @@ def add_paths_to_sys(self) -> List[str]:
)
if self._sys_paths is None:
self._sys_paths = []
for attr in self.__slots__:
value = getattr(self, attr)
for attr, value in self.__dict__.items():
# value = getattr(self, attr)
if str(attr) not in self._ignore_attributes:
if (
isinstance(value, Path)
Expand Down Expand Up @@ -641,8 +609,8 @@ def view_directories(self) -> None:
"""
print("\n{:<20}\t{:<50}".format("Name", "Path"))
print("=" * 72)
for attr in self.__slots__:
value = getattr(self, attr)
for attr, value in self.__dict__.items():
# value = getattr(self, attr)
if attr not in self._ignore_attributes and isinstance(value, Path):
print("{:<20}\t{:<50}".format(str(attr), str(value)))

Expand Down Expand Up @@ -687,8 +655,8 @@ def get_directories(self) -> Dict[str, Optional[str]]:
# ]
directories = {}
relative = False
for attr in self.__slots__:
value = getattr(self, attr)
for attr, value in self.__dict__.items():

if str(attr) not in [
"model_name",
"root",
Expand Down
86 changes: 66 additions & 20 deletions common_utils/utils_logger.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,79 @@
import logging
import logging.config
import yaml
import os
from pathlib import Path
from model_path import ModelPath
from global_cache import GlobalCache

_split_by_model = True # Only works for lavender_haze

def setup_logging(log_file: str, log_level=logging.INFO) -> logging.Logger:
def ensure_log_directory(log_path: str) -> None:
"""
Sets up logging to both a specified file and the terminal (console).
Ensure the log directory exists for file-based logging handlers.
Args:
log_file (str): The file where logs should be written.
log_level (int): The logging level. Default is logging.INFO.
Parameters:
log_path (str): The full path to the log file for which the directory should be verified.
"""
log_dir = os.path.dirname(log_path)
if log_dir and not os.path.exists(log_dir):
os.makedirs(log_dir)

basic_logger = logging.getLogger()
basic_logger.setLevel(log_level)

file_handler = logging.FileHandler(log_file)
console_handler = logging.StreamHandler()
def setup_logging(
default_level: int = logging.INFO, env_key: str = 'LOG_CONFIG') -> logging.Logger:

file_handler.setLevel(log_level)
console_handler.setLevel(log_level)
"""
Setup the logging configuration from a YAML file and return the root logger.
Parameters:
default_level (int): The default logging level if the configuration file is not found
or cannot be loaded. Default is logging.INFO.
env_key (str): Environment variablei key to override the default path to the logging
configuration file. Default is 'LOG_CONFIG'.
Returns:
logging.Logger: The root logger configured based on the loaded configuration.
Example Usage:
>>> logger = setup_logging()
>>> logger.info("Logging setup complete.")
"""

CONFIG_LOGS_PATH = ModelPath.get_common_configs() / 'config_log.yaml'
if _split_by_model:
try:
COMMON_LOGS_PATH = ModelPath.get_common_logs() / GlobalCache["current_model"]
except:
# Pretection in case model name is not available or GlobalCache fails.
COMMON_LOGS_PATH = ModelPath.get_common_logs()
else:
COMMON_LOGS_PATH = ModelPath.get_common_logs()

formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)
# Load YAML configuration
path = os.getenv(env_key, CONFIG_LOGS_PATH)

# Clear previous handlers if they exist
if basic_logger.hasHandlers():
basic_logger.handlers.clear()
if os.path.exists(path):
try:
with open(path, 'rt') as f:
config = yaml.safe_load(f.read())

# Replace placeholder with actual log directory path
for handler in config.get("handlers", {}).values():
if "filename" in handler and "{LOG_PATH}" in handler["filename"]:
handler["filename"] = handler["filename"].replace("{LOG_PATH}", str(COMMON_LOGS_PATH))
ensure_log_directory(handler["filename"])

# Apply logging configuration
logging.config.dictConfig(config)

basic_logger.addHandler(file_handler)
basic_logger.addHandler(console_handler)
except Exception as e:
logging.basicConfig(level=default_level)
logging.error(f"Failed to load logging configuration from {path}. Using basic configuration. Error: {e}")
else:
logging.basicConfig(level=default_level)
logging.warning(f"Logging configuration file not found at {path}. Using basic configuration.")

return logging.getLogger()

return basic_logger
84 changes: 84 additions & 0 deletions documentation/ADRs/015_log_files_general_strategy.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# log files general strategy

| ADR Info | Details |
|---------------------|------------------------------|
| Subject | log files general strategy |
| ADR Number | 015 |
| Status | Accepted |
| Author | Simon |
| Date | 28.10.2024 |


## Context

Effective logging is essential for maintaining data integrity, monitoring model behavior, and troubleshooting issues within the model pipeline. A cohesive, centralized logging strategy ensures that logs are structured and accessible, enhancing transparency, auditability, and reliability across the model deployment lifecycle. The main goals of this logging strategy are to:

1. **Enable Reproducibility and Traceability**: Log details such as timestamps, script paths, and process IDs are standardized to help trace model behavior and system states effectively across different environments.
2. **Support Monitoring and Real-Time Alerts**: Logs will provide data for monitoring tools, enabling real-time alerting on critical errors and pipeline health checks.
3. **Align with MLOps Best Practices**: This strategy follows MLOps standards for consistent error handling, observability, scalability, and storage management, preparing the pipeline for scalable deployment and future monitoring enhancements.

For additional information, see also:
- [009_log_file_for_generated_data.md](009_log_file_for_generated_data.md)
- [016_log_files_for_input_data.md](016_log_files_for_input_data.md)
- [017_log_files_for_offline_evaluation.md](017_log_files_for_offline_evaluation.md)
- [018_log_files_for_online_evaluation.md](018_log_files_for_online_evaluation.md)
- [019_log_files_for_model_training.md](019_log_files_for_model_training.md)
- [020_log_files_realtime_alerts.md](020_log_files_realtime_alerts.md)

## Decision

To implement a robust and unified logging strategy, we have decided on the following practices:

### Overview

1. **Standardized Log Configuration**: All logs will follow a centralized structure defined in the configurable `common_config/config_log.yaml` file. This configuration file controls logging levels, file rotation schedules, log output formats, and target log destinations. By centralizing log settings, all models within the pipeline will have a consistent logging structure, making the setup easier to maintain and adapt across environments.

2. **Daily Rotation and Retention Policy**: Logs will rotate daily, keeping the last 30 days of logs by default. This policy provides sufficient historical data for troubleshooting and auditing without excessive storage usage. Rotation is achieved using a `TimedRotatingFileHandler`, with daily timestamped log filenames for easy access.

3. **Log Separation by Level**: Logs are separated into `INFO`, `DEBUG`, and `ERROR` files and stored under `views_pipeline/common_logs`. This separation improves monitoring and helps maintain focus on the desired logging level when troubleshooting (e.g., reviewing only errors or detailed debugging information). Each log file will capture messages specific to its level, ensuring modularity and readability in logs.

4. **Inclusion of Path and Process Details**: Log messages include additional context such as script path (`%(pathname)s`), filename (`%(filename)s`), line number (`%(lineno)d`), process ID (`%(process)d`), and thread name (`%(threadName)s`). This information aids in tracing logs back to their source, supporting traceability and aiding debugging.

5. **Error Handling and Alerts**: Real-time alerting will be implemented for critical errors and unmet conditions. Integration with alerting tools (such as Slack or email) will provide immediate notifications of key pipeline issues. Alerts will include relevant metadata like timestamps, log level, and error specifics to support rapid troubleshooting.

6. **Dual Logging to Local Storage and Weights & Biases (W&B)**:
- **Local Storage**: Logs will be stored locally on a rotating basis for easy access and immediate troubleshooting.
- **Weights & Biases (W&B) Integration**: Model training and evaluation logs will also be sent to W&B, which allows for centralized logging of metrics, model performance tracking, and experiment comparison. The W&B integration supports MLOps best practices by making logs easily searchable, taggable (e.g., by model or pipeline stage), and accessible for experiment analysis and auditing.

7. **Access Control and Data Sensitivity**: Logs will avoid capturing sensitive data (such as configuration secrets or personally identifiable information) to align with data governance standards. While access controls for log files are not implemented at this stage, we may restrict log access in the future as the project scales, ensuring that sensitive log data is adequately protected.

8. **Testing and Validation**: Automated tests will validate that logs are created accurately and that rotation and level-specific separation operate as expected. These tests will cover:
- Log creation and rotation validation.
- Level-specific log file checks to confirm appropriate separation (e.g., that `INFO` logs do not include `DEBUG` messages).
- Functional testing of real-time alerts to verify that notifications trigger as configured.

## Consequences

**Positive Effects:**
- Provides a consistent and structured logging framework, improving troubleshooting, auditability, and compliance.
- Supports MLOps best practices by establishing robust monitoring, traceability, and data governance standards.
- Facilitates scalability and onboarding by providing a standardized, centralized approach to logging across all pipeline models.

**Negative Effects:**
- Additional storage resources are required for log retention and rotation, and periodic monitoring of storage usage is needed.
- Initial setup and adjustment period may add complexity as team members adapt to the standardized logging and alerting practices.
- Some refactoring of the current codebase will be needed as this ADR is accepted.

## Rationale

The unified logging strategy aligns with MLOps best practices by combining flexibility, scalability, and robustness. This approach ensures that logging configurations are adaptable, reproducible, and traceable across the model pipeline. By establishing standardized configuration files and integrating alerting, this logging strategy proactively supports system monitoring and provides a foundation for future observability and security enhancements.

## Considerations

1. **Future Alerting Integrations**: Additional alerting tools, such as W&B alerts, Slack, and email notifications, will be incorporated as the project matures to ensure real-time visibility into pipeline states and failures.

2. **Centralized Logging Platform**: In future updates, the logging system may transition to a centralized platform (e.g., ELK Stack, Grafana) to improve scalability, visualization, and monitoring. This would require adjusting the current setup to work seamlessly with a logging infrastructure, which could involve additional configurations or external services.

3. **Access Control Expansion**: As the project scales, access control measures will be considered to ensure data protection. Log files should avoid sensitive information to comply with best practices in data governance and avoid potential data exposure risks.

4. **Testing Resource Allocation**: Implementing automated tests for logging mechanisms may require resources such as mock environments or testing frameworks to ensure the system functions as expected under different scenarios and that alert conditions trigger correctly.

## Additional Notes

Future updates may involve enhancing logging with a centralized platform, providing a more scalable and observable solution for monitoring and auditability. Access control measures and security protocols will also be revisited as the project scales to protect data integrity and confidentiality. Team members are encouraged to provide feedback on specific logging configuration details or suggest improvements to the alerting and monitoring system.

Empty file.
Empty file.
1 change: 1 addition & 0 deletions documentation/ADRs/018_log_files_for_online_evaluation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
TODO
1 change: 1 addition & 0 deletions documentation/ADRs/019_log_files_for_model_training.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
TODO
Loading

0 comments on commit b20115c

Please sign in to comment.