File tree 2 files changed +1
-4
lines changed
2 files changed +1
-4
lines changed Original file line number Diff line number Diff line change @@ -23,8 +23,6 @@ python quant.py --model_path /path/to/DeepSeek/R1/BF16/ --qmodel_path /path/to/D
23
23
```
24
24
25
25
> [ !NOTE]
26
- >
27
- > - Skip quantize ` lm-head ` .
28
26
> - ` WEIGHT_BACKOFF = 0.5 `
29
27
> - ` SCALE_DTYPE = torch.bfloat16 `
30
28
Original file line number Diff line number Diff line change 24
24
"model.norm" ,
25
25
"layernorm" ,
26
26
"e_score_correction_bias" ,
27
- "lm_head.weight" ,
27
+ # "lm_head.weight",
28
28
"embed_tokens" ,
29
29
"mlp.gate.weight" , # mlp.gate is not linear
30
30
}
@@ -80,7 +80,6 @@ def _maybe_create_dir(qmodel_path):
80
80
81
81
82
82
def quant_model_weight_with_low_cpu_usage (model_path , qmodel_path ):
83
- # FIXME: need to skip some layers like embedding
84
83
_maybe_create_dir (qmodel_path )
85
84
all_weight_filename = get_all_weight_filename (model_path )
86
85
files_cnt = len (all_weight_filename )
You can’t perform that action at this time.
0 commit comments