@@ -130,40 +130,6 @@ def test_pipeline(self, model_arch):
130
130
_ = pipe (text )
131
131
self .assertEqual (pipe .device , model .device )
132
132
133
- @parameterized .expand (SUPPORTED_ARCHITECTURES )
134
- @unittest .skipIf (not is_bitsandbytes_available (), reason = "Test requires bitsandbytes" )
135
- def test_bnb (self , model_arch ):
136
- model_id = MODEL_NAMES [model_arch ]
137
- quantization_config = BitsAndBytesConfig (load_in_8bit = True )
138
- ipex_model = self .IPEX_MODEL_CLASS .from_pretrained (
139
- model_id , device_map = DEVICE , quantization_config = quantization_config
140
- )
141
- transformers_model = self .IPEX_MODEL_CLASS .auto_model_class .from_pretrained (
142
- model_id , device_map = DEVICE , quantization_config = quantization_config
143
- )
144
- tokenizer = AutoTokenizer .from_pretrained (model_id )
145
- inputs = "This is a sample input"
146
- tokens = tokenizer (inputs , return_tensors = "pt" ).to (DEVICE )
147
- with torch .no_grad ():
148
- transformers_outputs = transformers_model (** tokens )
149
- outputs = ipex_model (** tokens )
150
-
151
- # Test re-load model
152
- with tempfile .TemporaryDirectory () as tmpdirname :
153
- ipex_model .save_pretrained (tmpdirname )
154
- loaded_model = self .IPEX_MODEL_CLASS .from_pretrained (tmpdirname , device_map = DEVICE )
155
- loaded_model_outputs = loaded_model (** tokens )
156
- # Test init method
157
- init_model = self .IPEX_MODEL_CLASS (transformers_model )
158
- init_model_outputs = init_model (** tokens )
159
-
160
- # Compare tensor outputs
161
- for output_name in {"logits" , "last_hidden_state" }:
162
- if output_name in transformers_outputs :
163
- self .assertTrue (torch .allclose (outputs [output_name ], transformers_outputs [output_name ], atol = 1e-3 ))
164
- self .assertTrue (torch .allclose (outputs [output_name ], loaded_model_outputs [output_name ]))
165
- self .assertTrue (torch .allclose (outputs [output_name ], init_model_outputs [output_name ]))
166
-
167
133
168
134
class IPEXModelForSequenceClassificationTest (IPEXModelTest ):
169
135
IPEX_MODEL_CLASS = IPEXModelForSequenceClassification
@@ -248,46 +214,6 @@ def test_patched_model(self):
248
214
self .assertTrue (torch .allclose (outputs .start_logits , transformers_outputs .start_logits , atol = 1e-4 ))
249
215
self .assertTrue (torch .allclose (outputs .end_logits , transformers_outputs .end_logits , atol = 1e-4 ))
250
216
251
- @parameterized .expand (SUPPORTED_ARCHITECTURES )
252
- @unittest .skipIf (not is_bitsandbytes_available (), reason = "Test requires bitsandbytes" )
253
- def test_bnb (self , model_arch ):
254
- model_id = MODEL_NAMES [model_arch ]
255
- set_seed (SEED )
256
- quantization_config = BitsAndBytesConfig (load_in_8bit = True )
257
- ipex_model = IPEXModelForQuestionAnswering .from_pretrained (
258
- model_id , device_map = DEVICE , quantization_config = quantization_config
259
- )
260
- self .assertIsInstance (ipex_model .config , PretrainedConfig )
261
- transformers_model = AutoModelForQuestionAnswering .from_pretrained (
262
- model_id , device_map = DEVICE , quantization_config = quantization_config
263
- )
264
- tokenizer = AutoTokenizer .from_pretrained (model_id )
265
- inputs = "This is a sample input"
266
- tokens = tokenizer (inputs , return_tensors = "pt" ).to (DEVICE )
267
- with torch .no_grad ():
268
- transformers_outputs = transformers_model (** tokens )
269
- outputs = ipex_model (** tokens )
270
-
271
- # Test re-load model
272
- with tempfile .TemporaryDirectory () as tmpdirname :
273
- ipex_model .save_pretrained (tmpdirname )
274
- loaded_model = self .IPEX_MODEL_CLASS .from_pretrained (tmpdirname , device_map = DEVICE )
275
- loaded_model_outputs = loaded_model (** tokens )
276
-
277
- # Test init method
278
- init_model = self .IPEX_MODEL_CLASS (transformers_model )
279
- init_model_outputs = init_model (** tokens )
280
-
281
- self .assertIn ("start_logits" , outputs )
282
- self .assertIn ("end_logits" , outputs )
283
- # Compare tensor outputs
284
- self .assertTrue (torch .allclose (outputs .start_logits , transformers_outputs .start_logits , atol = 1e-4 ))
285
- self .assertTrue (torch .allclose (outputs .end_logits , transformers_outputs .end_logits , atol = 1e-4 ))
286
- self .assertTrue (torch .equal (outputs .start_logits , loaded_model_outputs .start_logits ))
287
- self .assertTrue (torch .equal (outputs .end_logits , loaded_model_outputs .end_logits ))
288
- self .assertTrue (torch .equal (outputs .start_logits , init_model_outputs .start_logits ))
289
- self .assertTrue (torch .equal (outputs .end_logits , init_model_outputs .end_logits ))
290
-
291
217
292
218
class IPEXModelForCausalLMTest (unittest .TestCase ):
293
219
IPEX_MODEL_CLASS = IPEXModelForCausalLM
@@ -799,52 +725,6 @@ def test_ipex_beam_search(self, test_name, model_arch, use_cache):
799
725
self .assertIsInstance (outputs , torch .Tensor )
800
726
self .assertTrue (torch .equal (outputs , transformers_outputs ))
801
727
802
- @parameterized .expand (SUPPORTED_ARCHITECTURES )
803
- @unittest .skipIf (not is_bitsandbytes_available (), reason = "Test requires bitsandbytes" )
804
- def test_bnb (self , model_arch ):
805
- model_id = MODEL_NAMES [model_arch ]
806
- set_seed (SEED )
807
- dtype = torch .float16 if IS_XPU_AVAILABLE else torch .float32
808
- quantization_config = BitsAndBytesConfig (load_in_8bit = True )
809
- # Test model forward do not need cache.
810
- ipex_model = self .IPEX_MODEL_CLASS .from_pretrained (
811
- model_id , torch_dtype = dtype , quantization_config = quantization_config
812
- )
813
- transformers_model = AutoModelForSeq2SeqLM .from_pretrained (
814
- model_id , torch_dtype = dtype , quantization_config = quantization_config
815
- )
816
- self .assertIsInstance (ipex_model .config , PretrainedConfig )
817
- tokenizer = AutoTokenizer .from_pretrained (model_id )
818
- tokens = tokenizer (
819
- "This is a sample" ,
820
- return_tensors = "pt" ,
821
- return_token_type_ids = False if model_arch in ("llama" , "llama2" ) else None ,
822
- )
823
- decoder_start_token_id = transformers_model .config .decoder_start_token_id if model_arch != "mbart" else 2
824
- decoder_inputs = {"decoder_input_ids" : torch .ones ((1 , 1 ), dtype = torch .long ) * decoder_start_token_id }
825
- outputs = ipex_model (** tokens , ** decoder_inputs )
826
-
827
- self .assertIsInstance (outputs .logits , torch .Tensor )
828
-
829
- with torch .no_grad ():
830
- transformers_outputs = transformers_model (** tokens , ** decoder_inputs )
831
-
832
- # Test re-load model
833
- with tempfile .TemporaryDirectory () as tmpdirname :
834
- ipex_model .save_pretrained (tmpdirname )
835
- loaded_model = self .IPEX_MODEL_CLASS .from_pretrained (tmpdirname , torch_dtype = dtype )
836
- loaded_model_outputs = loaded_model (** tokens , ** decoder_inputs )
837
-
838
- # Test init method
839
- init_model = self .IPEX_MODEL_CLASS (transformers_model )
840
- init_model_outputs = init_model (** tokens , ** decoder_inputs )
841
-
842
- # Compare tensor outputs
843
- self .assertTrue (torch .allclose (outputs .logits , transformers_outputs .logits , atol = 1e-4 ))
844
- # To avoid float pointing error
845
- self .assertTrue (torch .allclose (outputs .logits , loaded_model_outputs .logits , atol = 1e-7 ))
846
- self .assertTrue (torch .allclose (outputs .logits , init_model_outputs .logits , atol = 1e-7 ))
847
-
848
728
849
729
class IPEXSTModel (unittest .TestCase ):
850
730
SUPPORTED_ARCHITECTURES = (
0 commit comments