Skip to content

Commit

Permalink
configure lr scheduler (#41)
Browse files Browse the repository at this point in the history
* update configs

* fix HF datasets tests, add mocking (#42)

* add mocking

* rename

* update configs

* get_scheduler

* refactor

---------

Co-authored-by: Karina Zadorozhny <karina.zadorozhny@gmail.com>
Co-authored-by: freyn6 <freyn6@gene.com>
  • Loading branch information
3 people authored Mar 6, 2025
1 parent 2fd64d3 commit 6cf2d1c
Show file tree
Hide file tree
Showing 17 changed files with 276 additions and 83 deletions.
7 changes: 7 additions & 0 deletions src/lobster/hydra_config/lr_scheduler/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
defaults:
- _self_

scheduler:
_target_: "transformers.optimization.get_linear_schedule_with_warmup"
num_warmup_steps: ${model.num_warmup_steps}
num_training_steps: ${model.num_training_steps}
11 changes: 11 additions & 0 deletions src/lobster/hydra_config/model/clm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,14 @@ max_length: 512
num_training_steps: ${trainer.max_steps}
num_key_value_heads: null
attention_bias: false

# Model-specific configuration parameters
model_kwargs:
embedding_layer: linear_pos
hidden_act: gelu

# Scheduler-specific configuration parameters
scheduler_kwargs:
# Any specific scheduler parameters would go here
# For example:
# min_lr: 1e-7 # For cosine_with_min_lr scheduler
11 changes: 11 additions & 0 deletions src/lobster/hydra_config/model/mlm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,14 @@ num_warmup_steps: 10_000
tokenizer_dir: pmlm_tokenizer
max_length: 512
num_training_steps: ${trainer.max_steps}

# Model-specific configuration parameters
model_kwargs:
embedding_layer: linear_pos
hidden_act: gelu

# Scheduler-specific configuration parameters
scheduler_kwargs:
# Any specific scheduler parameters would go here
# For example:
# min_lr: 1e-7 # For cosine_with_min_lr scheduler
16 changes: 14 additions & 2 deletions src/lobster/hydra_config/model/modern_bert.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
_target_: lobster.model.modern_bert.FlexBERT

# Base model parameters
lr: 1e-3
num_training_steps: ${trainer.max_steps}
model_name: UME_mini
num_warmup_steps: 10_000
max_length: 512
tokenizer_dir: pmlm_tokenizer
embedding_layer: linear_pos
hidden_act: gelu
mask_percentage: 0.25
scheduler: "constant_with_warmup"
ckpt_path: null

# Model-specific configuration parameters
model_kwargs:
embedding_layer: linear_pos
hidden_act: gelu

# Scheduler-specific configuration parameters
scheduler_kwargs:
# Any specific scheduler parameters would go here
# For example:
# min_lr: 1e-7 # For cosine_with_min_lr scheduler

1 change: 1 addition & 0 deletions src/lobster/hydra_config/train.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ defaults:
- trainer: default.yaml
- setup: default.yaml
- paths: default.yaml
- lr_scheduler: default.yaml
- plugins: null
- experiment: null

Expand Down
8 changes: 2 additions & 6 deletions src/lobster/model/_cbmlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import torch
import torch.nn.functional as F
from huggingface_hub import hf_hub_download
from hydra.utils import instantiate
from transformers.configuration_utils import PretrainedConfig
from transformers.optimization import get_linear_schedule_with_warmup

from lobster.tokenization import CUSTOM_TOKENIZER, PmlmConceptTokenizerTransform, PmlmTokenizer

Expand Down Expand Up @@ -291,11 +291,7 @@ def configure_optimizers(self):
optimizer = torch.optim.AdamW(
self.model.parameters(), lr=self._lr, betas=(self._beta1, self._beta2), eps=self._eps
)
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=self._num_warmup_steps,
num_training_steps=self._num_training_steps,
)
scheduler = instantiate(self.scheduler_cfg, optimizer=optimizer)
scheduler = {"scheduler": scheduler, "interval": "step", "frequency": 1}
return {"optimizer": optimizer, "lr_scheduler": scheduler}

Expand Down
48 changes: 37 additions & 11 deletions src/lobster/model/_clm.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import importlib.resources
from typing import Callable, Optional, Tuple, Union
from typing import Callable, Literal, Optional, Tuple, Union

import lightning.pytorch as pl
import torch
from torch.nn import CrossEntropyLoss
from transformers import LlamaConfig, LlamaForCausalLM, pipeline
from transformers.optimization import get_linear_schedule_with_warmup
from transformers import LlamaConfig, LlamaForCausalLM, get_scheduler, pipeline

from lobster.tokenization import PmlmTokenizer, PmlmTokenizerTransform
from lobster.transforms import Transform
Expand All @@ -29,6 +28,20 @@ def __init__(
max_length: int = 512,
num_key_value_heads: int = None,
attention_bias: bool = False,
scheduler: Literal[
"linear",
"cosine",
"cosine_with_restarts",
"polynomial",
"constant",
"constant_with_warmup",
"inverse_sqrt",
"reduce_lr_on_plateau",
"cosine_with_min_lr",
"warmup_stable_decay",
] = "constant_with_warmup",
model_kwargs: dict = None,
scheduler_kwargs: dict = None,
):
"""
Prescient Protein Causal Language Model.
Expand All @@ -40,6 +53,12 @@ def __init__(
Grouped Query Attention. If`num_key_value_heads=num_attention_heads`, the model will
use Multi Head Attention (MHA), if `num_key_value_heads=1 the model will use
Multi Query Attention (MQA) otherwise GQA is used.
scheduler: str, optional
The type of learning rate scheduler.
model_kwargs: dict, optional
Additional keyword arguments to pass to the model.
scheduler_kwargs: dict, optional
Additional keyword arguments to pass to the scheduler.
"""
super().__init__()
Expand All @@ -55,6 +74,9 @@ def __init__(
self._tokenizer_dir = tokenizer_dir
self._max_length = max_length
self._attention_bias = attention_bias
self.scheduler = scheduler
self.scheduler_kwargs = scheduler_kwargs or {}
model_kwargs = model_kwargs or {}

if self._tokenizer_dir is not None:
path = importlib.resources.files("lobster") / "assets" / self._tokenizer_dir
Expand All @@ -73,6 +95,7 @@ def __init__(
self._num_key_value_heads = num_key_value_heads

config = LlamaConfig(
**config_args,
mask_token_id=self.tokenizer.mask_token_id,
pad_token_id=self.tokenizer.pad_token_id,
cls_token_id=self.tokenizer.cls_token_id,
Expand All @@ -81,7 +104,7 @@ def __init__(
max_position_embeddings=self._max_length,
num_key_value_heads=self._num_key_value_heads,
attention_bias=self._attention_bias,
**config_args,
**model_kwargs,
)
self.model = LlamaForCausalLM(config)
self.config = self.model.config
Expand All @@ -95,8 +118,6 @@ def training_step(self, batch, batch_idx):
self.log("train_loss", loss, sync_dist=True)
self.log("train_perplexity", ppl, sync_dist=True)

# self.log("loss", loss, batch_size=len(batch["input_ids"]), sync_dist=True)

return {"loss": loss}

def validation_step(self, batch, batch_idx):
Expand All @@ -115,11 +136,16 @@ def configure_optimizers(self):
eps=self._eps,
)

scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=self._num_warmup_steps,
num_training_steps=self._num_training_steps,
)
# Create base kwargs for the scheduler
scheduler_params = {
"num_warmup_steps": self._num_warmup_steps,
"num_training_steps": self._num_training_steps,
}

# Add any additional scheduler kwargs from initialization
scheduler_params.update(self.scheduler_kwargs)

scheduler = get_scheduler(self.scheduler, optimizer, **scheduler_params)

scheduler = {"scheduler": scheduler, "interval": "step", "frequency": 1}

Expand Down
8 changes: 2 additions & 6 deletions src/lobster/model/_conditioanalclassifiermlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import pandas as pd
import torch
import torch.nn.functional as F
from hydra.utils import instantiate
from transformers.configuration_utils import PretrainedConfig
from transformers.optimization import get_linear_schedule_with_warmup

from lobster.tokenization import CUSTOM_TOKENIZER, PmlmConceptTokenizerTransform, PmlmTokenizer
from lobster.transforms import Transform
Expand Down Expand Up @@ -218,11 +218,7 @@ def configure_optimizers(self):
optimizer = torch.optim.AdamW(
self.model.parameters(), lr=self._lr, betas=(self._beta1, self._beta2), eps=self._eps
)
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=self._num_warmup_steps,
num_training_steps=self._num_training_steps,
)
scheduler = instantiate(self.scheduler_cfg, optimizer=optimizer)

scheduler = {"scheduler": scheduler, "interval": "step", "frequency": 1}

Expand Down
8 changes: 2 additions & 6 deletions src/lobster/model/_conditioanalmlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import lightning.pytorch as pl
import pandas as pd
import torch
from hydra.utils import instantiate
from transformers.configuration_utils import PretrainedConfig
from transformers.optimization import get_linear_schedule_with_warmup

from lobster.tokenization import CUSTOM_TOKENIZER, PmlmConceptTokenizerTransform, PmlmTokenizer
from lobster.transforms import Transform
Expand Down Expand Up @@ -180,11 +180,7 @@ def configure_optimizers(self):
optimizer = torch.optim.AdamW(
self.model.parameters(), lr=self._lr, betas=(self._beta1, self._beta2), eps=self._eps
)
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=self._num_warmup_steps,
num_training_steps=self._num_training_steps,
)
scheduler = instantiate(self.scheduler_cfg, optimizer=optimizer)

scheduler = {"scheduler": scheduler, "interval": "step", "frequency": 1}

Expand Down
3 changes: 3 additions & 0 deletions src/lobster/model/_dyab.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from omegaconf import DictConfig
from torchmetrics import (
MeanAbsoluteError,
R2Score,
Expand Down Expand Up @@ -34,6 +35,7 @@ def __init__(
diff_channel_0: Literal["diff", "add", "mul", "div"] = "diff",
diff_channel_1: Optional[Literal["sub", "add", "mul", "div"]] = None,
diff_channel_2: Optional[Literal["diff", "add", "mul", "div"]] = None,
scheduler_cfg: DictConfig = None,
):
"""
DyAb head.
Expand Down Expand Up @@ -78,6 +80,7 @@ def __init__(
self._diff_channel_0 = diff_channel_0
self._diff_channel_1 = diff_channel_1
self._diff_channel_2 = diff_channel_2
self.scheduler_cfg = scheduler_cfg

if model_name is None and checkpoint is None:
model_name = "esm2_t6_8M_UR50D"
Expand Down
11 changes: 5 additions & 6 deletions src/lobster/model/_lobster_fold.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@

import lightning.pytorch as pl
import torch
from hydra.utils import instantiate
from omegaconf import DictConfig
from transformers import AutoTokenizer, EsmForProteinFolding
from transformers.configuration_utils import PretrainedConfig
from transformers.optimization import get_linear_schedule_with_warmup

from lobster.extern.openfold_utils import atom14_to_atom37, backbone_loss
from lobster.transforms import AutoTokenizerTransform, Transform
Expand All @@ -33,6 +34,7 @@ def __init__(
tokenizer_dir: Optional[str] = "pmlm_tokenizer",
max_length: int = 512,
cache_dir: str = None,
scheduler_cfg: DictConfig = None,
):
"""
Prescient Protein Language Model for Folding.
Expand Down Expand Up @@ -62,6 +64,7 @@ def __init__(
self._num_warmup_steps = num_warmup_steps
self._tokenizer_dir = tokenizer_dir
self._max_length = max_length
self.scheduler_cfg = scheduler_cfg

cache_dir = cache_dir or "~/.cache/huggingface/datasets"
self._cache_dir = cache_dir
Expand Down Expand Up @@ -176,11 +179,7 @@ def configure_optimizers(self):
betas=(self._beta1, self._beta2),
eps=self._eps,
)
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=self._num_warmup_steps,
num_training_steps=self._num_training_steps,
)
scheduler = instantiate(self.scheduler_cfg, optimizer=optimizer)

scheduler = {"scheduler": scheduler, "interval": "step", "frequency": 1}

Expand Down
11 changes: 5 additions & 6 deletions src/lobster/model/_mgm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
import lightning.pytorch as pl
import pandas as pd
import torch
from hydra.utils import instantiate
from omegaconf import DictConfig
from transformers import AutoTokenizer, EsmForMaskedLM
from transformers.configuration_utils import PretrainedConfig
from transformers.optimization import get_linear_schedule_with_warmup

from lobster.tokenization import MgmTokenizer, MgmTokenizerTransform, PmlmTokenizer, PmlmTokenizerTransform
from lobster.transforms import AutoTokenizerTransform, Transform
Expand Down Expand Up @@ -34,6 +35,7 @@ def __init__(
tokenizer_dir: Optional[str] = "mgm_tokenizer",
max_length: int = 512,
position_embedding_type: Literal["rotary", "absolute"] = "rotary",
scheduler_cfg: DictConfig = None,
):
"""
Multi-granularity model (MGM).
Expand Down Expand Up @@ -68,6 +70,7 @@ def __init__(
self._tokenizer_dir = tokenizer_dir
self._max_length = max_length
self._position_embedding_type = position_embedding_type
self.scheduler_cfg = scheduler_cfg

if model_name and "esm2" in model_name:
self.tokenizer = AutoTokenizer.from_pretrained(f"facebook/{model_name}", do_lower_case=False)
Expand Down Expand Up @@ -239,11 +242,7 @@ def configure_optimizers(self):
betas=(self._beta1, self._beta2),
eps=self._eps,
)
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=self._num_warmup_steps,
num_training_steps=self._num_training_steps,
)
scheduler = instantiate(self.scheduler_cfg, optimizer=optimizer)

scheduler = {"scheduler": scheduler, "interval": "step", "frequency": 1}

Expand Down
Loading

0 comments on commit 6cf2d1c

Please sign in to comment.