Skip to content

Commit 3fa9ab1

Browse files
authored
align the parameters between 2x and 3x example (#1636)
Signed-off-by: chensuyue <suyue.chen@intel.com>
1 parent 853dc71 commit 3fa9ab1

File tree

4 files changed

+17
-12
lines changed

4 files changed

+17
-12
lines changed

examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,8 @@ def get_user_model():
260260
use_sym=weight_sym,
261261
group_size=args.woq_group_size,
262262
group_dim=args.woq_group_dim,
263-
use_full_range = args.woq_use_full_range,
264-
use_mse_search = args.woq_use_mse_search,
263+
use_full_range=args.woq_use_full_range,
264+
use_mse_search=args.woq_use_mse_search,
265265
export_compressed_model=args.woq_export_compressed_model,
266266
use_double_quant=False,
267267
double_quant_bits=args.double_quant_bits,

examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh

+9-9
Original file line numberDiff line numberDiff line change
@@ -45,30 +45,30 @@ function run_tuning {
4545
if [ "${topology}" = "opt_125m_woq_gptq_int4" ]; then
4646
model_name_or_path="facebook/opt-125m"
4747
approach="weight_only"
48-
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 --gptq_use_max_length"
48+
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length"
4949
elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_bnb" ]; then
5050
model_name_or_path="facebook/opt-125m"
5151
approach="weight_only"
52-
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 --gptq_use_max_length"
52+
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length"
5353
extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
5454
elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_ggml" ]; then
5555
model_name_or_path="facebook/opt-125m"
5656
approach="weight_only"
57-
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 --gptq_use_max_length"
57+
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length"
5858
extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
5959
elif [ "${topology}" = "llama2_7b_gptq_int4" ]; then
6060
model_name_or_path="meta-llama/Llama-2-7b-hf"
6161
approach="weight_only"
62-
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 --gptq_use_max_length"
62+
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
6363
elif [ "${topology}" = "llama2_7b_gptq_int4_dq_bnb" ]; then
6464
model_name_or_path="meta-llama/Llama-2-7b-hf"
6565
approach="weight_only"
66-
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 --gptq_use_max_length"
66+
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
6767
extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
6868
elif [ "${topology}" = "llama2_7b_gptq_int4_dq_ggml" ]; then
6969
model_name_or_path="meta-llama/Llama-2-7b-hf"
7070
approach="weight_only"
71-
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 --gptq_use_max_length"
71+
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
7272
extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
7373
elif [ "${topology}" = "gpt_j_woq_rtn_int4" ]; then
7474
model_name_or_path="EleutherAI/gpt-j-6b"
@@ -87,16 +87,16 @@ function run_tuning {
8787
elif [ "${topology}" = "gpt_j_woq_gptq_int4" ]; then
8888
model_name_or_path="EleutherAI/gpt-j-6b"
8989
approach="weight_only"
90-
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 --gptq_use_max_length"
90+
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
9191
elif [ "${topology}" = "gpt_j_woq_gptq_int4_dq_bnb" ]; then
9292
model_name_or_path="EleutherAI/gpt-j-6b"
9393
approach="weight_only"
94-
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 --gptq_use_max_length"
94+
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
9595
extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
9696
elif [ "${topology}" = "gpt_j_woq_gptq_int4_dq_ggml" ]; then
9797
model_name_or_path="EleutherAI/gpt-j-6b"
9898
approach="weight_only"
99-
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 --gptq_use_max_length"
99+
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
100100
extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
101101
fi
102102

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py

+1
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ def calib_func(prepared_model):
279279
'use_max_length': args.gptq_use_max_length,
280280
'pad_max_length': args.gptq_pad_max_length,
281281
'static_groups': args.gptq_static_groups,
282+
"enable_mse_search": args.woq_enable_mse_search,
282283
}
283284
# GPTQ: use assistive functions to modify calib_dataloader and calib_func
284285
# TEQ: set calib_func=None, use default training func as calib_func

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh

+5-1
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,18 @@ function run_tuning {
8080
elif [ "${topology}" = "gpt_j_woq_gptq_int4" ]; then
8181
model_name_or_path="EleutherAI/gpt-j-6b"
8282
approach="weight_only"
83-
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length"
83+
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_enable_mse_search --gptq_use_max_length"
8484
elif [ "${topology}" = "falcon_7b_sq" ]; then
8585
model_name_or_path="tiiuae/falcon-7b-instruct"
8686
extra_cmd=$extra_cmd" --sq --alpha 0.5"
8787
elif [ "${topology}" = "falcon_7b_woq_gptq_int4" ]; then
8888
model_name_or_path="tiiuae/falcon-7b-instruct"
8989
approach="weight_only"
9090
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length"
91+
elif [ "${topology}" = "llama2_7b_gptq_int4" ]; then
92+
model_name_or_path="meta-llama/Llama-2-7b-hf"
93+
approach="weight_only"
94+
extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_enable_mse_search --gptq_use_max_length"
9195
fi
9296

9397
python -u run_clm_no_trainer.py \

0 commit comments

Comments
 (0)