1
1
# @package _global_
2
2
3
- # lobster_train experiment=train_ume logger.entity=zadorozk logger.project=lobster logger.group=ume-dev ++trainer.accelerator=gpu ++trainer.devices=auto
3
+ # lobster_train experiment=train_ume ++trainer.devices=auto
4
4
5
5
defaults :
6
6
- override /model : modern_bert.yaml
7
7
- override /data : ume
8
- - override /callbacks : [base, tokens_per_second, moleculeace_linear_probe_fast, calm_linear_probe_fast]
8
+ - override /callbacks : [base, moleculeace_linear_probe_fast, throughput, tokens_per_second]
9
+
10
+ compile : true
9
11
10
12
data :
11
13
_target_ : lobster.data.UmeLightningDataModule
12
14
root : ${paths.root_dir}/data
13
15
datasets : ["M320M", "Calm", "AMPLIFY"]
14
- batch_size : 16
16
+ batch_size : 128
15
17
tokenizer_max_length : ${model.max_length}
16
18
pin_memory : true
17
- shuffle_buffer_size : 10_000
18
- num_workers : 32
19
+ shuffle_buffer_size : 1000
20
+ num_workers : 8
19
21
seed : 0
20
22
21
23
paths :
22
- root_dir : dev
24
+ root_dir : ./runs
23
25
24
26
trainer :
25
27
max_steps : 50_000
26
- val_check_interval : 1_000
28
+ val_check_interval : 2000
27
29
precision : 16-mixed
28
30
gradient_clip_val : 0.5
29
31
accumulate_grad_batches : 8
30
32
max_time : " 00:24:00:00"
31
-
33
+ limit_val_batches : 20_000
34
+ # limit_train_batches: 1000 # for debugging
35
+ # profiler: simple
32
36
33
37
model :
34
- model_name : UME_mini
35
- vocab_size : 627
38
+ model_name : UME_medium
39
+ vocab_size : 640
36
40
pad_token_id : 1
37
41
cls_token_id : 0
38
42
mask_token_id : 4
@@ -51,10 +55,8 @@ model:
51
55
callbacks :
52
56
moleculeace_linear_probe :
53
57
max_length : ${model.max_length}
54
- calm_linear_probe :
55
- max_length : ${model.max_length}
56
58
57
59
logger :
58
60
name : ume_amplify_m320m_calm_${model.model_name}_${now:%Y-%m-%d_%H-%M-%S}
59
61
project : lobster
60
- group : ume-dev
62
+ group : ume-dev-${now:%Y-%m-%d-%H-%M-%S}
0 commit comments