Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

General logging script #94

Merged
merged 14 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 19 additions & 35 deletions common_utils/set_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,10 @@ def setup_model_paths(PATH):
PATH_model: The path (pathlib path object) including the "models" directory and its immediate subdirectory.
"""

if "models" in PATH.parts:
PATH_MODEL = Path(*[i for i in PATH.parts[:PATH.parts.index("models") + 2]])
return PATH_MODEL
else:
# error_message = "The 'models' directory was not found in the provided path."
# logger.warning(error_message)
# raise ValueError(error_message)
return None
PATH_MODEL = Path(*[i for i in PATH.parts[:PATH.parts.index("models") + 2]])

return PATH_MODEL



def setup_ensemble_paths(PATH):
Expand All @@ -66,15 +62,11 @@ def setup_ensemble_paths(PATH):
Returns:
PATH_ENSEMBLE: The path (pathlib path object) including the "ensembles" directory and its immediate subdirectory.
"""
if "ensembles" in PATH.parts:
PATH_ENSEMBLE = Path(*[i for i in PATH.parts[:PATH.parts.index("ensembles") + 2]])
return PATH_ENSEMBLE

else:
# error_message = "The 'ensembles' directory was not found in the provided path."
# logger.warning(error_message)
# raise ValueError(error_message)
return None

PATH_ENSEMBLE = Path(*[i for i in PATH.parts[:PATH.parts.index("ensembles") + 2]])

return PATH_ENSEMBLE


def setup_project_paths(PATH) -> None:
"""
Expand Down Expand Up @@ -108,19 +100,15 @@ def setup_project_paths(PATH) -> None:
# PATH_MODEL = Path(*[i for i in PATH.parts[:PATH.parts.index("models")+2]]) # The +2 is to include the "models" and the individual model name in the path

PATH_ROOT = setup_root_paths(PATH)


try:
if "models" in PATH.parts:
PATH_MODEL = setup_model_paths(PATH)
except ValueError as e:
PATH_ENSEMBLE = None
elif "ensembles" in PATH.parts:
PATH_MODEL = None
logger.warning(e)

try:
PATH_ENSEMBLE = setup_ensemble_paths(PATH)
except ValueError as e:
PATH_ENSEMBLE = None
logger.warning(e)
else:
logger.error("The provided path does not contain a model or ensemble directory.")

# print(f"Root path: {PATH_ROOT}") # debug
# print(f"Model path: {PATH_MODEL}") # debug
Expand Down Expand Up @@ -199,18 +187,14 @@ def setup_data_paths(PATH) -> Path:

"""

# PATH_MODEL = Path(*[i for i in PATH.parts[:PATH.parts.index("models")+2]]) # The +2 is to include the "models" and the individual model name in the path
try:
if "models" in PATH.parts:
PATH_MODEL = setup_model_paths(PATH)
except ValueError as e:
PATH_ENSEMBLE = None
elif "ensembles" in PATH.parts:
PATH_MODEL = None
logger.warning(e)

try:
PATH_ENSEMBLE = setup_ensemble_paths(PATH)
except ValueError as e:
PATH_ENSEMBLE = None
logger.warning(e)
else:
logger.error("The provided path does not contain a model or ensemble directory.")

PATH_DATA = PATH_MODEL / "data" if PATH_MODEL else PATH_ENSEMBLE / "data"
PATH_RAW = PATH_DATA / "raw"
Expand Down
7 changes: 5 additions & 2 deletions common_utils/utils_artifacts.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import os
import logging
from pathlib import Path

logger = logging.getLogger(__name__)


def get_artifact_files(PATH, run_type):
"""
Retrieve artifact files from a directory that match the given run type and common extensions.
Expand Down Expand Up @@ -51,7 +54,7 @@ def get_latest_model_artifact(PATH, run_type):

#print statements for debugging
# print(f"artifacts availible: {model_files}")
print(f"artifact used: {model_files[0]}")
logger.info(f"artifact used: {model_files[0]}")

# Return the latest model file
#PATH_MODEL_ARTIFACT = os.path.join(path, model_files[0])
Expand Down
27 changes: 15 additions & 12 deletions common_utils/utils_dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import numpy as np
import pandas as pd
import logging

# from config_partitioner import get_partitioner_dict
from set_partition import get_partitioner_dict
Expand All @@ -10,6 +11,8 @@
from utils_df_to_vol_conversion import df_to_vol
from viewser import Queryset, Column

logger = logging.getLogger(__name__)


def fetch_data_from_viewser(month_first, month_last, drift_config_dict, self_test):
"""
Expand All @@ -20,7 +23,7 @@ def fetch_data_from_viewser(month_first, month_last, drift_config_dict, self_tes
Returns:
pd.DataFrame: The prepared DataFrame with initial processing done.
"""
print(f'Beginning file download through viewser with month range {month_first},{month_last}')
logger.info(f'Beginning file download through viewser with month range {month_first},{month_last}')
queryset_base = get_input_data_config() # just used here..
df, alerts = queryset_base.publish().fetch_with_drift_detection(start_date=month_first,
end_date=month_last - 1,
Expand Down Expand Up @@ -167,7 +170,7 @@ def get_views_df(partition, override_month=None, self_test=False):

if partition == 'forecasting' and override_month is not None:
month_last = override_month
print(f'\n ***Warning: overriding end month in forecasting partition to {month_last} ***\n')
logger.warning(f'Overriding end month in forecasting partition to {month_last} ***\n')

df, alerts = fetch_data_from_viewser(month_first, month_last, drift_config_dict, self_test)

Expand Down Expand Up @@ -203,15 +206,15 @@ def fetch_or_load_views_df(partition, PATH_RAW, self_test=False, use_saved=False
# Check if the VIEWSER data file exists
try:
df = pd.read_pickle(path_viewser_df)
print(f'Reading saved data from {path_viewser_df}')
logger.info(f'Reading saved data from {path_viewser_df}')

except:
raise RuntimeError(f'Use of saved data was specified but {path_viewser_df} not found')

else:
print(f'Fetching file...')
logger.info(f'Fetching file...')
df, alerts = get_views_df(partition, override_month, self_test) # which is then used here
print(f'Saving file to {path_viewser_df}')
logger.info(f'Saving file to {path_viewser_df}')
df.to_pickle(path_viewser_df)

if validate_df_partition(df, partition, override_month):
Expand Down Expand Up @@ -249,17 +252,17 @@ def create_or_load_views_vol(partition, PATH_PROCESSED, PATH_RAW):

# Check if the volume exists
if os.path.isfile(path_vol):
print('Volume already created')
logger.info('Volume already created')
vol = np.load(path_vol)
else:
print('Creating volume...')
logger.info('Creating volume...')
path_raw = os.path.join(str(PATH_RAW), f'{partition}_viewser_df.pkl')
vol = df_to_vol(pd.read_pickle(path_raw))
print(f'shape of volume: {vol.shape}')
print(f'Saving volume to {path_vol}')
logger.info(f'shape of volume: {vol.shape}')
logger.info(f'Saving volume to {path_vol}')
np.save(path_vol, vol)

print('Done')
logger.info('Done')

return vol

Expand Down Expand Up @@ -337,8 +340,8 @@ def ensure_float64(df):
df.select_dtypes(include=['number']).dtypes != np.float64]

if len(non_float64_cols) > 0:
print(
f"Warning: DataFrame contains non-np.float64 numeric columns. Converting the following columns: {', '.join(non_float64_cols)}")
logger.warning(
f"DataFrame contains non-np.float64 numeric columns. Converting the following columns: {', '.join(non_float64_cols)}")

for col in non_float64_cols:
df[col] = df[col].astype(np.float64)
Expand Down
33 changes: 33 additions & 0 deletions common_utils/utils_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import logging


def setup_logging(log_file: str, log_level=logging.INFO):
"""
Sets up logging to both a specified file and the terminal (console).

Args:
log_file (str): The file where logs should be written.
log_level (int): The logging level. Default is logging.INFO.
"""

basic_logger = logging.getLogger()
basic_logger.setLevel(log_level)

file_handler = logging.FileHandler(log_file)
console_handler = logging.StreamHandler()

file_handler.setLevel(log_level)
console_handler.setLevel(log_level)

formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)

# Clear previous handlers if they exist
if basic_logger.hasHandlers():
basic_logger.handlers.clear()

basic_logger.addHandler(file_handler)
basic_logger.addHandler(console_handler)

return basic_logger
28 changes: 10 additions & 18 deletions ensembles/cruel_summer/main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
import sys
import time
import wandb

import logging
logging.basicConfig(filename='run.log', encoding='utf-8', level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
import sys
import warnings

from pathlib import Path
PATH = Path(__file__)
Expand All @@ -14,22 +9,19 @@
from set_path import setup_project_paths
setup_project_paths(PATH)

from execute_model_runs import execute_single_run
from utils_cli_parser import parse_args, validate_arguments
from utils_logger import setup_logging
from execute_model_runs import execute_single_run

warnings.filterwarnings("ignore")

logger = setup_logging('run.log')

if __name__ == "__main__":
args = parse_args()
validate_arguments(args)

# wandb login
if __name__ == "__main__":
wandb.login()

start_t = time.time()
args = parse_args()
validate_arguments(args)

execute_single_run(args)

end_t = time.time()
minutes = (end_t - start_t) / 60
logger.info(f'Done. Runtime: {minutes:.3f} minutes.\n')

6 changes: 2 additions & 4 deletions ensembles/cruel_summer/src/forecasting/generate_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@
from datetime import datetime
import pandas as pd
import pickle

import logging
logging.basicConfig(filename='../../run.log', encoding='utf-8', level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

from pathlib import Path
PATH = Path(__file__)
Expand All @@ -21,6 +17,8 @@
from utils_run import get_standardized_df, get_aggregated_df
from utils_artifacts import get_latest_model_artifact

logger = logging.getLogger(__name__)


def forecast_ensemble(config):
run_type = config['run_type']
Expand Down
13 changes: 9 additions & 4 deletions ensembles/cruel_summer/src/management/execute_model_tasks.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import sys
import wandb

import logging
logging.basicConfig(filename='../../run.log', encoding='utf-8', level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
import time

from pathlib import Path
PATH = Path(__file__)
Expand All @@ -17,6 +14,8 @@
from generate_forecast import forecast_ensemble
from utils_wandb import add_wandb_monthly_metrics

logger = logging.getLogger(__name__)


def execute_model_tasks(config=None, project=None, eval=None, forecast=None):
"""
Expand All @@ -34,6 +33,8 @@ def execute_model_tasks(config=None, project=None, eval=None, forecast=None):
artifact_name (optional): Specific names of the model artifact to load for evaluation or forecasting.
"""

start_t = time.time()

# Initialize WandB
with wandb.init(project=project, entity="views_pipeline",
config=config): # project and config ignored when running a sweep
Expand All @@ -52,3 +53,7 @@ def execute_model_tasks(config=None, project=None, eval=None, forecast=None):
if forecast:
logger.info(f"Forecasting ensemble model {config['name']}...")
forecast_ensemble(config)

end_t = time.time()
minutes = (end_t - start_t) / 60
logger.info(f'Done. Runtime: {minutes:.3f} minutes.\n')
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
import sys
import warnings
warnings.filterwarnings("ignore")

import logging
logging.basicConfig(filename='../../run.log', encoding='utf-8', level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

from pathlib import Path
PATH = Path(__file__)
Expand All @@ -23,6 +17,8 @@
from utils_wandb import log_wandb_log_dict
from views_forecasts.extensions import *

logger = logging.getLogger(__name__)


def evaluate_ensemble(config):
run_type = config['run_type']
Expand Down
7 changes: 3 additions & 4 deletions ensembles/cruel_summer/src/utils/utils_checks.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
import sys
from datetime import datetime

import logging
logging.basicConfig(filename='../../run.log', encoding='utf-8', level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

from pathlib import Path
PATH = Path(__file__)
Expand All @@ -15,6 +11,9 @@

from utils_log_files import read_log_file

logger = logging.getLogger(__name__)


def check_model_conditions(PATH_GENERATED, config):
"""
Checks if the model meets the required conditions based on the log file.
Expand Down
4 changes: 2 additions & 2 deletions ensembles/cruel_summer/src/utils/utils_log_files.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from pathlib import Path
import logging
logging.basicConfig(filename='../../run.log', encoding='utf-8', level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')

logger = logging.getLogger(__name__)


def create_log_file(PATH_GENERATED,
config,
model_timestamp,
Expand Down
Loading
Loading