We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent d249af4 commit cf9ad2fCopy full SHA for cf9ad2f
test/3x/torch/quantization/weight_only/test_transformers.py
@@ -252,6 +252,4 @@ def test_vlm(self):
252
# phi-3-vision-128k-instruct
253
model_name = "microsoft/Phi-3-vision-128k-instruct"
254
woq_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=woq_config, attn_implementation='eager')
255
-
256
- from intel_extension_for_pytorch.nn.modules import WeightOnlyQuantizedLinear
257
assert isinstance(woq_model.model.layers[0].self_attn.o_proj, WeightOnlyQuantizedLinear), "quantizaion failed."
0 commit comments