Skip to content

Commit 558b734

Browse files
committed
quant lm-head
Change-Id: I086fe5228c55526630a2e480d7532e2727884d36 Signed-off-by: Yi Liu <yiliu4@habana.ai>
1 parent 4800b2b commit 558b734

File tree

2 files changed

+1
-4
lines changed

2 files changed

+1
-4
lines changed

examples/ds/README.md

-2
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ python quant.py --model_path /path/to/DeepSeek/R1/BF16/ --qmodel_path /path/to/D
2323
```
2424

2525
> [!NOTE]
26-
>
27-
> - Skip quantize `lm-head`.
2826
> - `WEIGHT_BACKOFF = 0.5`
2927
> - `SCALE_DTYPE = torch.bfloat16`
3028

examples/ds/quant.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
"model.norm",
2525
"layernorm",
2626
"e_score_correction_bias",
27-
"lm_head.weight",
27+
# "lm_head.weight",
2828
"embed_tokens",
2929
"mlp.gate.weight", # mlp.gate is not linear
3030
}
@@ -80,7 +80,6 @@ def _maybe_create_dir(qmodel_path):
8080

8181

8282
def quant_model_weight_with_low_cpu_usage(model_path, qmodel_path):
83-
# FIXME: need to skip some layers like embedding
8483
_maybe_create_dir(qmodel_path)
8584
all_weight_filename = get_all_weight_filename(model_path)
8685
files_cnt = len(all_weight_filename)

0 commit comments

Comments
 (0)