quant lm-head

Yi4Liu · Yi4Liu · commit 558b734de158 · 2025-02-11T16:17:14.000+02:00
Change-Id: I086fe5228c55526630a2e480d7532e2727884d36
Signed-off-by: Yi Liu &lt;yiliu4@habana.ai&gt;
diff --git a/examples/ds/README.md b/examples/ds/README.md
@@ -23,8 +23,6 @@ python quant.py --model_path /path/to/DeepSeek/R1/BF16/ --qmodel_path /path/to/D
 ```
 
 > [!NOTE]
->
-> - Skip quantize `lm-head`.
 > - `WEIGHT_BACKOFF = 0.5`
 > - `SCALE_DTYPE = torch.bfloat16`
 
diff --git a/examples/ds/quant.py b/examples/ds/quant.py
@@ -24,7 +24,7 @@
     "model.norm",
     "layernorm",
     "e_score_correction_bias",
-    "lm_head.weight",
+    # "lm_head.weight",
     "embed_tokens",
     "mlp.gate.weight",  # mlp.gate is not linear
 }
@@ -80,7 +80,6 @@ def _maybe_create_dir(qmodel_path):
 
 
 def quant_model_weight_with_low_cpu_usage(model_path, qmodel_path):
-    # FIXME: need to skip some layers like embedding
     _maybe_create_dir(qmodel_path)
     all_weight_filename = get_all_weight_filename(model_path)
     files_cnt = len(all_weight_filename)