@@ -45,30 +45,30 @@ function run_tuning {
45
45
if [ " ${topology} " = " opt_125m_woq_gptq_int4" ]; then
46
46
model_name_or_path=" facebook/opt-125m"
47
47
approach=" weight_only"
48
- extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 -- gptq_use_max_length"
48
+ extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length"
49
49
elif [ " ${topology} " = " opt_125m_woq_gptq_int4_dq_bnb" ]; then
50
50
model_name_or_path=" facebook/opt-125m"
51
51
approach=" weight_only"
52
- extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 -- gptq_use_max_length"
52
+ extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length"
53
53
extra_cmd=$extra_cmd " --double_quant_type BNB_NF4"
54
54
elif [ " ${topology} " = " opt_125m_woq_gptq_int4_dq_ggml" ]; then
55
55
model_name_or_path=" facebook/opt-125m"
56
56
approach=" weight_only"
57
- extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 -- gptq_use_max_length"
57
+ extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length"
58
58
extra_cmd=$extra_cmd " --double_quant_type GGML_TYPE_Q4_K"
59
59
elif [ " ${topology} " = " llama2_7b_gptq_int4" ]; then
60
60
model_name_or_path=" meta-llama/Llama-2-7b-hf"
61
61
approach=" weight_only"
62
- extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 -- gptq_use_max_length"
62
+ extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
63
63
elif [ " ${topology} " = " llama2_7b_gptq_int4_dq_bnb" ]; then
64
64
model_name_or_path=" meta-llama/Llama-2-7b-hf"
65
65
approach=" weight_only"
66
- extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 -- gptq_use_max_length"
66
+ extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
67
67
extra_cmd=$extra_cmd " --double_quant_type BNB_NF4"
68
68
elif [ " ${topology} " = " llama2_7b_gptq_int4_dq_ggml" ]; then
69
69
model_name_or_path=" meta-llama/Llama-2-7b-hf"
70
70
approach=" weight_only"
71
- extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 -- gptq_use_max_length"
71
+ extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
72
72
extra_cmd=$extra_cmd " --double_quant_type GGML_TYPE_Q4_K"
73
73
elif [ " ${topology} " = " gpt_j_woq_rtn_int4" ]; then
74
74
model_name_or_path=" EleutherAI/gpt-j-6b"
@@ -87,16 +87,16 @@ function run_tuning {
87
87
elif [ " ${topology} " = " gpt_j_woq_gptq_int4" ]; then
88
88
model_name_or_path=" EleutherAI/gpt-j-6b"
89
89
approach=" weight_only"
90
- extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 -- gptq_use_max_length"
90
+ extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
91
91
elif [ " ${topology} " = " gpt_j_woq_gptq_int4_dq_bnb" ]; then
92
92
model_name_or_path=" EleutherAI/gpt-j-6b"
93
93
approach=" weight_only"
94
- extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 -- gptq_use_max_length"
94
+ extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
95
95
extra_cmd=$extra_cmd " --double_quant_type BNB_NF4"
96
96
elif [ " ${topology} " = " gpt_j_woq_gptq_int4_dq_ggml" ]; then
97
97
model_name_or_path=" EleutherAI/gpt-j-6b"
98
98
approach=" weight_only"
99
- extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_max_seq_length 2048 -- gptq_use_max_length"
99
+ extra_cmd=$extra_cmd " --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
100
100
extra_cmd=$extra_cmd " --double_quant_type GGML_TYPE_Q4_K"
101
101
fi
102
102
0 commit comments