[ Test ][ PR2 ] Splitting Common.py (#1702)

iefode · ilya-lavrenov · web-flow · commit 81cd23eeaa8e · 2025-02-12T12:39:08.000Z
Details: * Separate `comparation` from `common.py` * Move `longbench` to `utils\` * Move tokenizer_config to data Tickets: * [159925](https://jira.devtools.intel.com/browse/CVS-159925) MERGE AFTER: * #1691 --------- Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
diff --git a/tests/python_tests/common.py b/tests/python_tests/common.py
@@ -16,6 +16,7 @@
 from utils.generation_config import get_greedy, get_beam_search
 from utils.constants import get_default_llm_properties
 from utils.hugging_face import convert_models, get_hugging_face_models, run_hugging_face
+from utils.comparation import compare_generation_results
 
 TESTS_ROOT = Path(__file__).parent
 
@@ -155,34 +156,6 @@ def run_llm_pipeline(
     return generation_results
 
 
-def compare_generation_result(hf_result: GenerationResult, ov_result: GenerationResult, generation_config: GenerationConfig):
-    if generation_config.is_beam_search():
-        assert len(hf_result.m_scores) == len(ov_result.m_scores)
-        for hf_score, ov_score in zip(hf_result.m_scores, ov_result.m_scores):
-            # Note, that for fp32 / fp16 models scores are different less than 0.001
-            assert abs(hf_score - ov_score) < 0.02
-
-    if not generation_config.include_stop_str_in_output and len(generation_config.stop_strings) > 0:
-        assert len(hf_result.m_generation_ids) >= len(ov_result.m_generation_ids)
-        for hf_text, ov_text in zip(hf_result.m_generation_ids, ov_result.m_generation_ids):
-            assert ov_text in hf_text
-    else:
-        assert len(hf_result.m_generation_ids) == len(ov_result.m_generation_ids)
-        for hf_text, ov_text in zip(hf_result.m_generation_ids, ov_result.m_generation_ids):
-            assert hf_text == ov_text
-
-
-def compare_generation_results(prompts: List[str], hf_results: List[GenerationResult], ov_results: List[GenerationResult], generation_configs: List[GenerationConfig] | GenerationConfig):
-    if type(generation_configs) is not list:
-        generation_configs = [generation_configs]
-
-    assert len(prompts) == len(hf_results)
-    assert len(prompts) == len(ov_results)
-
-    for prompt, ref_result, ov_result, generation_config in zip(prompts, hf_results, ov_results, generation_configs):
-        print(f"Prompt = {prompt}\nReference result = {ref_result}\nOpenVINO result = {ov_result.m_generation_ids}")
-        compare_generation_result(ref_result, ov_result, generation_config)
-
 def run_llm_pipeline_with_ref(model_id: str, 
                               prompts: List[str], 
                               generation_config: GenerationConfig | dict, 
@@ -238,20 +211,6 @@ def generate_and_compare_with_reference_text(models_path: Path, prompts: List[st
         for ref_text, ov_text in zip(ref_texts_for_this_prompt, ov_result.m_generation_ids):
             assert ref_text == ov_text
 
-
-def get_image_by_link(link):
-    from PIL import Image
-    import requests
-    from openvino import Tensor
-    import numpy as np
-
-    image = Image.open(requests.get(link, stream=True).raw)
-    if image.mode != 'RGB':
-        image = image.convert('RGB')
-    image_data = np.array((np.array(image.getdata()) - 128).astype(np.byte)).reshape(1, image.size[1], image.size[0], 3)
-    return Tensor(image_data)
-
-
 """rt_info has the highest priority. Delete it to respect configs."""
 def delete_rt_info(configs: List[Tuple], temp_path):
     core = openvino.Core()
diff --git a/tests/python_tests/data/__init__.py b/tests/python_tests/data/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2018-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/tests/python_tests/data/tokenizer_configs.py b/tests/python_tests/data/tokenizer_configs.py
@@ -1,3 +1,5 @@
+# Copyright (C) 2018-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
 
 def get_tokenizer_configs():
     return {
diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
@@ -10,6 +10,8 @@
 from pathlib import Path
 from openvino_genai import ContinuousBatchingPipeline, LLMPipeline, GenerationConfig, SchedulerConfig,  Tokenizer, draft_model
 
+from common import generate_and_compare_with_reference_text, \
+    get_scheduler_config, run_cb_pipeline_with_ref
 from common import generate_and_compare_with_reference_text, \
     get_scheduler_config, run_cb_pipeline_with_ref
 from test_sampling import RandomSamplingTestStruct, get_current_platform_ref_texts
diff --git a/tests/python_tests/test_kv_cache_eviction.py b/tests/python_tests/test_kv_cache_eviction.py
@@ -18,7 +18,7 @@
 from transformers import AutoTokenizer
 
 from common import TESTS_ROOT, run_cb_pipeline_with_ref
-from utils_longbench import dataset2maxlen, evaluate, preprocess_prompt, post_process_pred
+from utils.longbench import dataset2maxlen, evaluate, preprocess_prompt, post_process_pred
 
 from utils.constants import get_default_llm_properties
 
diff --git a/tests/python_tests/test_tokenizer.py b/tests/python_tests/test_tokenizer.py
@@ -94,7 +94,7 @@ def get_chat_templates():
         "BramVanroy/Llama-2-13b-chat-dutch"
     }
 
-    from tokenizer_configs import get_tokenizer_configs
+    from data.tokenizer_configs import get_tokenizer_configs
     return [(k, v) for k, v in get_tokenizer_configs().items() if k not in skipped_models]
 
 
diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
@@ -7,7 +7,6 @@
 import transformers
 from optimum.intel.openvino import OVModelForVisualCausalLM
 from openvino_genai import VLMPipeline, GenerationConfig
-from common import get_image_by_link
 
 from utils.generation_config import get_beam_search, get_multinomial_all_parameters
 from utils.constants import get_default_llm_properties
@@ -54,6 +53,20 @@ def get_ov_model(model_id, cache):
     "katuni4ka/tiny-random-qwen2vl",
 ]
 
+
+def get_image_by_link(link):
+    from PIL import Image
+    import requests
+    from openvino import Tensor
+    import numpy as np
+
+    image = Image.open(requests.get(link, stream=True).raw)
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    image_data = np.array((np.array(image.getdata()) - 128).astype(np.byte)).reshape(1, image.size[1], image.size[0], 3)
+    return Tensor(image_data)
+
+
 @pytest.mark.precommit
 @pytest.mark.nightly
 @pytest.mark.parametrize("model_id", model_ids)
diff --git a/tests/python_tests/utils/comparation.py b/tests/python_tests/utils/comparation.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2018-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from openvino_genai import GenerationResult, GenerationConfig
+from typing import List
+
+def compare_generation_result(hf_result: GenerationResult,
+                              ov_result: GenerationResult,
+                              generation_config: GenerationConfig):
+    if generation_config.is_beam_search():
+        assert len(hf_result.m_scores) == len(ov_result.m_scores)
+        for hf_score, ov_score in zip(hf_result.m_scores, ov_result.m_scores):
+            # Note, that for fp32 / fp16 models scores are different less than 0.001
+            assert abs(hf_score - ov_score) < 0.02
+
+    if not generation_config.include_stop_str_in_output and len(generation_config.stop_strings) > 0:
+        assert len(hf_result.m_generation_ids) >= len(ov_result.m_generation_ids)
+        for hf_text, ov_text in zip(hf_result.m_generation_ids, ov_result.m_generation_ids):
+            assert ov_text in hf_text
+    else:
+        assert len(hf_result.m_generation_ids) == len(ov_result.m_generation_ids)
+        for hf_text, ov_text in zip(hf_result.m_generation_ids, ov_result.m_generation_ids):
+            assert hf_text == ov_text
+            
+
+def compare_generation_results(prompts: List[str],
+                               hf_results: List[GenerationResult],
+                               ov_results: List[GenerationResult],
+                               generation_configs: List[GenerationConfig] | GenerationConfig):
+    if type(generation_configs) is not list:
+        generation_configs = [generation_configs]
+
+    assert len(prompts) == len(hf_results)
+    assert len(prompts) == len(ov_results)
+
+    for prompt, ref_result, ov_result, generation_config in zip(prompts, hf_results, ov_results, generation_configs):
+        print(f"Prompt = {prompt}\nReference result = {ref_result}\nOpenVINO result = {ov_result.m_generation_ids}")
+        compare_generation_result(ref_result, ov_result, generation_config)
diff --git a/tests/python_tests/utils/longbench.py b/tests/python_tests/utils/longbench.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# Copyright (C) 2018-2025 Intel Corporation`
	`2`	`+# SPDX-License-Identifier: Apache-2.0`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+# Copyright (C) 2018-2025 Intel Corporation`
	`2`	`+# SPDX-License-Identifier: Apache-2.0`
`1`	`3`
`2`	`4`	`def get_tokenizer_configs():`
`3`	`5`	`return {`
Original file line number	Diff line number	Diff line change
`@@ -94,7 +94,7 @@ def get_chat_templates():`
`94`	`94`	`"BramVanroy/Llama-2-13b-chat-dutch"`
`95`	`95`	`}`
`96`	`96`
`97`		`- from tokenizer_configs import get_tokenizer_configs`
	`97`	`+ from data.tokenizer_configs import get_tokenizer_configs`
`98`	`98`	`return [(k, v) for k, v in get_tokenizer_configs().items() if k not in skipped_models]`
`99`	`99`
`100`	`100`