@@ -192,7 +192,7 @@ def test_apply_chat_template(model_tmp_path, chat_config: Tuple[str, Dict]):
192
192
@pytest .mark .nightly
193
193
def test_set_chat_template ():
194
194
model_descr = get_chat_models_list ()[0 ]
195
- model_id , path , hf_tokenizer , opt_model , ov_pipe = read_model ((model_descr [0 ], model_descr [1 ] / '_test_chat' ))
195
+ model_id , path , hf_tokenizer , opt_model , ov_pipe = read_model ((model_descr [0 ], model_descr [1 ]))
196
196
197
197
prompt = "how are you?"
198
198
dummy_conversation = [
@@ -223,24 +223,36 @@ def test_set_chat_template():
223
223
]
224
224
@pytest .mark .precommit
225
225
@pytest .mark .nightly
226
- @pytest .mark .parametrize ("add_special_tokens" , [True , False ])
227
- @pytest .mark .parametrize ("skip_special_tokens" , [True , False ])
228
226
@pytest .mark .parametrize ("prompt" , prompts )
229
- def test_encode_decode_with_special_tokens_option (add_special_tokens , skip_special_tokens , prompt ):
227
+ def test_encode_decode_with_special_tokens_option (prompt ):
230
228
import numpy as np
231
- model_descr = get_chat_models_list ()[0 ]
232
- model_id , path , hf_tokenizer , model_opt , ov_pipe = read_model ((model_descr [0 ], model_descr [1 ] / '_test_chat' ))
229
+ model_descr = get_models_list ()[0 ]
230
+ model_id , path , hf_tokenizer , model_opt , ov_pipe = read_model ((model_descr [0 ], model_descr [1 ]))
233
231
ov_tokenzier = ov_pipe .get_tokenizer ()
234
232
235
233
# Calling encode with 'add_special_tokens' will set state flag.
236
- ov_res = ov_tokenzier .encode (prompt , add_special_tokens = add_special_tokens ).input_ids .data
237
- hf_res = hf_tokenizer (prompt , return_tensors = "np" , add_special_tokens = add_special_tokens )["input_ids" ]
238
- assert np .all (ov_res == hf_res )
234
+ ov_res_add_spec = ov_tokenzier .encode (prompt , add_special_tokens = True ).input_ids .data
235
+ ov_res_no_spec = ov_tokenzier .encode (prompt , add_special_tokens = False ).input_ids .data
236
+ hf_res_add_spec = hf_tokenizer (prompt , return_tensors = "np" , add_special_tokens = True )["input_ids" ]
237
+ hf_res_no_spec = hf_tokenizer (prompt , return_tensors = "np" , add_special_tokens = False )["input_ids" ]
238
+ assert np .all (ov_res_add_spec == hf_res_add_spec )
239
+ assert np .all (ov_res_no_spec == hf_res_no_spec )
240
+
241
+ # Check that add_special_tokens flag indeed made any difference
242
+ assert ov_res_add_spec .size != ov_res_no_spec .size
243
+ assert hf_res_add_spec .size != hf_res_no_spec .size
239
244
240
245
# Decode with 'skip_special_tokens'
241
- decoded_genai = ov_tokenzier .decode (ov_res , skip_special_tokens = skip_special_tokens )[0 ]
242
- decoded_hf = hf_tokenizer .decode (hf_res [0 ], skip_special_tokens = skip_special_tokens )
243
- assert decoded_genai == decoded_hf
246
+ decoded_genai_skip_spec = ov_tokenzier .decode (hf_res_add_spec , skip_special_tokens = True )[0 ]
247
+ decoded_genai_no_skip = ov_tokenzier .decode (hf_res_add_spec , skip_special_tokens = False )[0 ]
248
+ decoded_hf_skip_spec = hf_tokenizer .decode (hf_res_add_spec [0 ], skip_special_tokens = True )
249
+ decoded_hf_no_skip = hf_tokenizer .decode (hf_res_add_spec [0 ], skip_special_tokens = False )
250
+ assert decoded_genai_skip_spec == decoded_hf_skip_spec
251
+ assert decoded_genai_no_skip == decoded_hf_no_skip
252
+
253
+ # Check that skip_special_tokens indeed made any difference
254
+ assert decoded_genai_skip_spec != decoded_genai_no_skip
255
+ assert decoded_hf_skip_spec != decoded_hf_no_skip
244
256
245
257
246
258
@pytest .mark .precommit
0 commit comments