@@ -226,7 +226,7 @@ def test_vlm(self):
226
226
bits = 4 ,
227
227
group_size = 128 ,
228
228
is_vlm = True ,
229
- dataset = "liuhaotian/llava_conv_58k" ,
229
+ dataset = "NeelNanda/pile-10k" ,
230
230
iters = 2 ,
231
231
n_samples = 5 ,
232
232
seq_len = 512 ,
@@ -248,17 +248,17 @@ def test_vlm(self):
248
248
loaded_model = Qwen2VLForConditionalGeneration .from_pretrained ("transformers_vlm_tmp" )
249
249
assert isinstance (loaded_model .model .layers [0 ].self_attn .k_proj , WeightOnlyQuantizedLinear ), "loaing model failed."
250
250
251
- # phi-3-vision-128k-instruct
252
- woq_config = AutoRoundConfig (
253
- bits = 4 ,
254
- group_size = 128 ,
255
- is_vlm = True ,
256
- dataset = "NeelNanda/pile-10k" ,
257
- iters = 2 ,
258
- n_samples = 5 ,
259
- seq_len = 64 ,
260
- batch_size = 1 ,
261
- )
262
- model_name = "microsoft/Phi-3-vision-128k-instruct"
263
- woq_model = AutoModelForCausalLM .from_pretrained (model_name , quantization_config = woq_config , trust_remote_code = True , attn_implementation = 'eager' )
264
- assert isinstance (woq_model .model .layers [0 ].self_attn .o_proj , WeightOnlyQuantizedLinear ), "quantizaion failed."
251
+ # phi-3-vision-128k-instruct, disable as CI consumes too much time
252
+ # woq_config = AutoRoundConfig(
253
+ # bits=4,
254
+ # group_size=128,
255
+ # is_vlm=True,
256
+ # dataset="liuhaotian/llava_conv_58k",
257
+ # iters=2,
258
+ # n_samples=5,
259
+ # seq_len=64,
260
+ # batch_size=1,
261
+ # )
262
+ # model_name = "microsoft/Phi-3-vision-128k-instruct"
263
+ # woq_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=woq_config, trust_remote_code=True, attn_implementation='eager')
264
+ # assert isinstance(woq_model.model.layers[0].self_attn.o_proj, WeightOnlyQuantizedLinear), "quantizaion failed."
0 commit comments