Test add_special_tokens properly (#1586)

pavel-esir · ilya-lavrenov · web-flow · commit d4bb7c1426df · 2025-01-18T08:00:37.000Z
- In HF `tiny-random-phi3` add_special_tokens works, while for
`Qwen2-0.5B-Instruct` it does not work even in HF.
- Use `"katuni4ka/tiny-random-phi3"` instead of
`"Qwen/Qwen2-0.5B-Instruct"` for special tokens testing.

---------

Co-authored-by: Ilya Lavrenov &lt;ilya.lavrenov@intel.com&gt;
diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
@@ -117,7 +117,7 @@ def test_cb_streamer_vs_return_vs_stateful(prompt):
 @pytest.mark.parametrize("model_descr", get_chat_models_list())
 @pytest.mark.precommit
 def test_chat_scenario_vs_stateful(model_descr, generation_config_kwargs: Dict):
-    model_id, models_path, hf_tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
+    model_id, models_path, hf_tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1]))
     cb_pipe = get_continuous_batching(models_path)
 
     ov_pipe.start_chat()
diff --git a/tests/python_tests/test_llm_pipeline.py b/tests/python_tests/test_llm_pipeline.py
@@ -129,7 +129,7 @@ def test_chat_scenario(model_descr, generation_config_kwargs: Dict):
     chat_history_hf = []
     chat_history_ov = []
 
-    model_id, path, tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
+    model_id, path, tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1]))
 
     ov_generation_config = GenerationConfig(**generation_config_kwargs)
     hf_generation_config = convert_to_hf(opt_model.generation_config, ov_generation_config)
diff --git a/tests/python_tests/test_tokenizer.py b/tests/python_tests/test_tokenizer.py
@@ -192,7 +192,7 @@ def test_apply_chat_template(model_tmp_path, chat_config: Tuple[str, Dict]):
 @pytest.mark.nightly
 def test_set_chat_template():
     model_descr = get_chat_models_list()[0]
-    model_id, path, hf_tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
+    model_id, path, hf_tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1]))
 
     prompt = "how are you?"
     dummy_conversation = [
@@ -223,24 +223,36 @@ def test_set_chat_template():
 ]
 @pytest.mark.precommit
 @pytest.mark.nightly
-@pytest.mark.parametrize("add_special_tokens", [True, False])
-@pytest.mark.parametrize("skip_special_tokens", [True, False])
 @pytest.mark.parametrize("prompt", prompts)
-def test_encode_decode_with_special_tokens_option(add_special_tokens, skip_special_tokens, prompt):
+def test_encode_decode_with_special_tokens_option(prompt):
     import numpy as np
-    model_descr = get_chat_models_list()[0]
-    model_id, path, hf_tokenizer, model_opt, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
+    model_descr = get_models_list()[0]
+    model_id, path, hf_tokenizer, model_opt, ov_pipe = read_model((model_descr[0], model_descr[1]))
     ov_tokenzier = ov_pipe.get_tokenizer()
 
     # Calling encode with 'add_special_tokens' will set state flag.
-    ov_res = ov_tokenzier.encode(prompt, add_special_tokens=add_special_tokens).input_ids.data
-    hf_res = hf_tokenizer(prompt, return_tensors="np", add_special_tokens=add_special_tokens)["input_ids"]
-    assert np.all(ov_res == hf_res)
+    ov_res_add_spec = ov_tokenzier.encode(prompt, add_special_tokens=True).input_ids.data
+    ov_res_no_spec = ov_tokenzier.encode(prompt, add_special_tokens=False).input_ids.data
+    hf_res_add_spec = hf_tokenizer(prompt, return_tensors="np", add_special_tokens=True)["input_ids"]
+    hf_res_no_spec = hf_tokenizer(prompt, return_tensors="np", add_special_tokens=False)["input_ids"]
+    assert np.all(ov_res_add_spec == hf_res_add_spec)
+    assert np.all(ov_res_no_spec == hf_res_no_spec)
+    
+    # Check that add_special_tokens flag indeed made any difference
+    assert ov_res_add_spec.size != ov_res_no_spec.size
+    assert hf_res_add_spec.size != hf_res_no_spec.size
 
     # Decode with 'skip_special_tokens'
-    decoded_genai = ov_tokenzier.decode(ov_res, skip_special_tokens=skip_special_tokens)[0]
-    decoded_hf = hf_tokenizer.decode(hf_res[0], skip_special_tokens=skip_special_tokens)
-    assert decoded_genai == decoded_hf
+    decoded_genai_skip_spec = ov_tokenzier.decode(hf_res_add_spec, skip_special_tokens=True)[0]
+    decoded_genai_no_skip = ov_tokenzier.decode(hf_res_add_spec, skip_special_tokens=False)[0]
+    decoded_hf_skip_spec = hf_tokenizer.decode(hf_res_add_spec[0], skip_special_tokens=True)
+    decoded_hf_no_skip = hf_tokenizer.decode(hf_res_add_spec[0], skip_special_tokens=False)
+    assert decoded_genai_skip_spec == decoded_hf_skip_spec
+    assert decoded_genai_no_skip == decoded_hf_no_skip
+
+    # Check that skip_special_tokens indeed made any difference
+    assert decoded_genai_skip_spec != decoded_genai_no_skip
+    assert decoded_hf_skip_spec != decoded_hf_no_skip
 
 
 @pytest.mark.precommit