From e7610aaf32e90894f6099bf04f297f271387d06a Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 8 Apr 2024 15:20:59 +0200 Subject: [PATCH 1/9] Schedule nightly slow tests --- .github/workflows/test_openvino_basic.yml | 4 ++-- tests/openvino/test_modeling.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_openvino_basic.yml b/.github/workflows/test_openvino_basic.yml index effb99a84d..c2626e91e5 100644 --- a/.github/workflows/test_openvino_basic.yml +++ b/.github/workflows/test_openvino_basic.yml @@ -25,7 +25,7 @@ jobs: # Testing lower and upper bound of supported Python versions # This also ensures that the test fails if dependencies break for Python 3.7 python-version: ["3.8", "3.11"] - transformers: ['transformers', 'git+https://github.com/huggingface/transformers.git'] + transformers: ['transformers'] optimum: ['optimum', 'git+https://github.com/huggingface/optimum.git'] runs-on: ubuntu-20.04 @@ -51,4 +51,4 @@ jobs: - name: Test with Pytest run: | pytest tests/openvino/test_modeling_basic.py - + RUN_SLOW=1 pytest tests/openvino/test_modeling.py -s -m "run_slow" --durations=0 \ No newline at end of file diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 32fc255a1f..70d345495c 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -20,6 +20,7 @@ from typing import Dict import numpy as np +import pytest import requests import timm import torch @@ -53,6 +54,7 @@ set_seed, ) from transformers.onnx.utils import get_preprocessor +from transformers.testing_utils import slow from utils_tests import MODEL_NAMES from optimum.intel import ( @@ -364,6 +366,8 @@ def test_compare_to_transformers(self, model_arch): gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) + @pytest.mark.run_slow + @slow def test_pipeline(self, model_arch): model_id = MODEL_NAMES[model_arch] model = OVModelForQuestionAnswering.from_pretrained(model_id, export=True) @@ -379,6 +383,8 @@ def test_pipeline(self, model_arch): del model gc.collect() + @pytest.mark.run_slow + @slow def test_metric(self): model_id = "distilbert-base-cased-distilled-squad" set_seed(SEED) @@ -431,6 +437,8 @@ def test_compare_to_transformers(self, model_arch): gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) + @pytest.mark.run_slow + @slow def test_pipeline(self, model_arch): model_id = MODEL_NAMES[model_arch] model = OVModelForTokenClassification.from_pretrained(model_id, export=True) @@ -481,6 +489,8 @@ def test_compare_to_transformers(self, model_arch): gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) + @pytest.mark.run_slow + @slow def test_pipeline(self, model_arch): model_id = MODEL_NAMES[model_arch] model = OVModelForFeatureExtraction.from_pretrained(model_id, export=True) @@ -851,6 +861,8 @@ def test_compare_to_transformers(self, model_arch): gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) + @pytest.mark.run_slow + @slow def test_pipeline(self, model_arch): model_id = MODEL_NAMES[model_arch] model = OVModelForImageClassification.from_pretrained(model_id, export=True) @@ -981,6 +993,8 @@ def test_pipeline(self, model_arch): gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) + @pytest.mark.run_slow + @slow def test_generate_utils(self, model_arch): model_id = MODEL_NAMES[model_arch] model = OVModelForSeq2SeqLM.from_pretrained(model_id, export=True) @@ -1438,6 +1452,8 @@ def test_load_vanilla_transformers_which_is_not_supported(self): self.assertIn("only supports the tasks", str(context.exception)) @parameterized.expand(SUPPORTED_ARCHITECTURES) + @pytest.mark.run_slow + @slow def test_generate_utils(self, model_arch: str): model_id = MODEL_NAMES[model_arch] model = OVModelForVision2Seq.from_pretrained(model_id, export=True) From aed62ba017ecfbb5ecde1a0cd835a27140691472 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 8 Apr 2024 16:11:58 +0200 Subject: [PATCH 2/9] add needed dependency --- .github/workflows/test_openvino_basic.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_openvino_basic.yml b/.github/workflows/test_openvino_basic.yml index c2626e91e5..3135e6c004 100644 --- a/.github/workflows/test_openvino_basic.yml +++ b/.github/workflows/test_openvino_basic.yml @@ -42,7 +42,7 @@ jobs: # Install openvino manually to prevent dependency conflicts when .[openvino] pins # optimum or transformers to a specific version # Install PyTorch CPU to prevent unnecessary downloading/installing of CUDA packages - pip install torch --extra-index-url https://download.pytorch.org/whl/cpu + pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu pip install .[tests] openvino onnx onnxruntime ${{ matrix.optimum}} ${{ matrix.transformers }} - name: Pip freeze From d83cce32bfdc873b28add0ebce20acc991c42c1d Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 8 Apr 2024 16:25:06 +0200 Subject: [PATCH 3/9] set test to slow --- tests/openvino/test_modeling.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 70d345495c..28b6642f61 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -594,6 +594,8 @@ def test_compare_to_transformers(self, model_arch): gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) + @pytest.mark.run_slow + @slow def test_pipeline(self, model_arch): model_kwargs = {} model_id = MODEL_NAMES[model_arch] From c95dee5d10424186283b4aaa944a6ccefe7c81f6 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 8 Apr 2024 17:35:53 +0200 Subject: [PATCH 4/9] add duration --- .github/workflows/test_openvino.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml index ba5b09ff81..bff5cb525f 100644 --- a/.github/workflows/test_openvino.yml +++ b/.github/workflows/test_openvino.yml @@ -35,7 +35,7 @@ jobs: pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime - name: Test with Pytest run: | - pytest tests/openvino/ --ignore test_modeling_basic + pytest tests/openvino/ --ignore test_modeling_basic --durations=0 - name: Test openvino-nightly run: | pip uninstall -y openvino From af8b85b74ab569c3d782f26298a0f785f00cad1f Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 8 Apr 2024 17:48:29 +0200 Subject: [PATCH 5/9] convert to slow --- tests/openvino/test_modeling.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 28b6642f61..e715f4b6fb 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -626,6 +626,8 @@ def test_pipeline(self, model_arch): gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) + @pytest.mark.run_slow + @slow def test_multiple_inputs(self, model_arch): model_id = MODEL_NAMES[model_arch] set_seed(SEED) From 8a4ff70354295c07ae608c59a4bd5511fea9e468 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 8 Apr 2024 17:48:39 +0200 Subject: [PATCH 6/9] add dependency --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index e80d0ea448..a8c43f51d4 100644 --- a/setup.py +++ b/setup.py @@ -52,6 +52,7 @@ "auto-gptq", "transformers_stream_generator", "einops", + "tiktoken", ] QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"] From 6d091a00b255cbc5464edafa991507c5b173b328 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 8 Apr 2024 18:21:37 +0200 Subject: [PATCH 7/9] merge tests --- tests/openvino/test_modeling.py | 92 +++++++++++++++------------------ tests/openvino/utils_tests.py | 1 + 2 files changed, 43 insertions(+), 50 deletions(-) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index e715f4b6fb..2229bc0ecd 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -536,9 +536,9 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "phi", "internlm2", "orion", + "falcon", ) GENERATION_LENGTH = 100 - IS_SUPPORT_STATEFUL = is_openvino_version(">=", "2023.3") REMOTE_CODE_MODELS = ("chatglm", "minicpm", "baichuan2", "jais", "qwen", "internlm2", "olmo", "orion") @parameterized.expand(SUPPORTED_ARCHITECTURES) @@ -563,36 +563,60 @@ def test_compare_to_transformers(self, model_arch): ov_model = OVModelForCausalLM.from_pretrained(model_id, export=True, ov_config=F32_CONFIG, **model_kwargs) self.assertIsInstance(ov_model.config, PretrainedConfig) self.assertTrue(ov_model.use_cache) - self.assertEqual( - ov_model.stateful, self.IS_SUPPORT_STATEFUL and ov_model.config.model_type not in not_stateful - ) - set_seed(SEED) - transformers_model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs) + self.assertEqual(ov_model.stateful, ov_model.config.model_type not in not_stateful) tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS) - if model_arch == "qwen": - transformers_model.to(torch.float32) - tokens = tokenizer( - "This is a sample", return_tensors="pt", return_token_type_ids=False if model_arch == "llama" else None - ) - ov_outputs = ov_model(**tokens) + tokens = tokenizer("This is a sample output", return_tensors="pt") + ov_outputs = ov_model(**tokens) self.assertTrue("logits" in ov_outputs) self.assertIsInstance(ov_outputs.logits, torch.Tensor) self.assertTrue("past_key_values" in ov_outputs) self.assertIsInstance(ov_outputs.past_key_values, tuple) - is_stateful = ov_model.config.model_type not in not_stateful and self.IS_SUPPORT_STATEFUL + is_stateful = ov_model.config.model_type not in not_stateful self.assertEqual(ov_model.stateful, is_stateful) if is_stateful: self.assertTrue(len(ov_outputs.past_key_values) == 1 and len(ov_outputs.past_key_values[0]) == 0) + + set_seed(SEED) + transformers_model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs) + if model_arch == "qwen": + transformers_model.to(torch.float32) + with torch.no_grad(): transformers_outputs = transformers_model(**tokens) # Compare tensor outputs self.assertTrue(torch.allclose(ov_outputs.logits, transformers_outputs.logits, equal_nan=True, atol=1e-4)) + + # Qwen tokenizer does not support padding + if model_arch == "qwen": + return + + # Compare batched generation. + tokenizer.pad_token_id = tokenizer.eos_token_id + tokenizer.padding_side = "left" + tokens = tokenizer(["Today is a nice day and I am longer", "This is me"], return_tensors="pt", padding=True) + ov_model.generation_config.eos_token_id = None + transformers_model.generation_config.eos_token_id = None + ov_model.config.eos_token_id = None + transformers_model.config.eos_token_id = None + gen_config = GenerationConfig( + max_new_tokens=30, + min_new_tokens=30, + num_beams=3, + do_sample=False, + eos_token_id=None, + ) + + ov_outputs = ov_model.generate(**tokens, generation_config=gen_config) + transformers_outputs = transformers_model.generate(**tokens, generation_config=gen_config) + self.assertTrue(torch.allclose(ov_outputs, transformers_outputs)) + del transformers_model del ov_model gc.collect() + @parameterized.expand(SUPPORTED_ARCHITECTURES) @pytest.mark.run_slow @slow @@ -625,37 +649,6 @@ def test_pipeline(self, model_arch): del model gc.collect() - @parameterized.expand(SUPPORTED_ARCHITECTURES) - @pytest.mark.run_slow - @slow - def test_multiple_inputs(self, model_arch): - model_id = MODEL_NAMES[model_arch] - set_seed(SEED) - if model_arch == "qwen": - self.skipTest("Qwen tokenizer does not support padding") - model_kwargs = {} - if model_arch in self.REMOTE_CODE_MODELS: - model_kwargs = { - "config": AutoConfig.from_pretrained(model_id, trust_remote_code=True), - "trust_remote_code": True, - } - model = OVModelForCausalLM.from_pretrained(model_id, export=True, compile=False, **model_kwargs) - tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS) - tokenizer.pad_token = tokenizer.eos_token - texts = ["this is a simple input", "this is a second simple input", "this is a third simple input"] - tokens = tokenizer(texts, padding=True, return_tensors="pt") - generation_config = GenerationConfig(encoder_no_repeat_ngram_size=0, max_new_tokens=20, num_beams=2) - outputs = model.generate(**tokens, generation_config=generation_config) - self.assertIsInstance(outputs, torch.Tensor) - self.assertEqual(outputs.shape[0], 3) - # test that generation result is reproducible - outputs2 = model.generate(**tokens, generation_config=generation_config) - self.assertIsInstance(outputs2, torch.Tensor) - self.assertEqual(outputs2.shape[0], 3) - self.assertTrue(torch.allclose(outputs2, outputs)) - del model - gc.collect() - def test_model_and_decoder_same_device(self): model_id = MODEL_NAMES["gpt2"] model = OVModelForCausalLM.from_pretrained(model_id, export=True) @@ -681,12 +674,11 @@ def test_compare_with_and_without_past_key_values(self): self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv)) self.assertEqual(outputs_model_with_pkv.shape[1], self.GENERATION_LENGTH) self.assertEqual(outputs_model_without_pkv.shape[1], self.GENERATION_LENGTH) - if self.IS_SUPPORT_STATEFUL: - model_stateful = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, stateful=True) - outputs_model_stateful = model_stateful.generate( - **tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1 - ) - self.assertTrue(torch.equal(outputs_model_without_pkv, outputs_model_stateful)) + model_stateful = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, stateful=True) + outputs_model_stateful = model_stateful.generate( + **tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1 + ) + self.assertTrue(torch.equal(outputs_model_without_pkv, outputs_model_stateful)) del model_with_pkv del model_without_pkv diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index e7f62f1f61..73224c81b2 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -42,6 +42,7 @@ "donut": "fxmarty/tiny-doc-qa-vision-encoder-decoder", "electra": "hf-internal-testing/tiny-random-electra", "gemma": "fxmarty/tiny-random-GemmaForCausalLM", + "falcon": "fxmarty/really-tiny-falcon-testing", "flaubert": "hf-internal-testing/tiny-random-flaubert", "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel", "gpt2": "hf-internal-testing/tiny-random-gpt2", From 9fea0b34ffbc43e5b70f5cbeb48c4130a8750fcc Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 8 Apr 2024 18:42:56 +0200 Subject: [PATCH 8/9] fix format --- tests/openvino/test_modeling.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 2229bc0ecd..4b268b5b62 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -616,7 +616,6 @@ def test_compare_to_transformers(self, model_arch): del ov_model gc.collect() - @parameterized.expand(SUPPORTED_ARCHITECTURES) @pytest.mark.run_slow @slow From 1eeba6b47a677a2141359408d6e53af24addce8b Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 9 Apr 2024 15:40:08 +0200 Subject: [PATCH 9/9] fix test for chatglm --- tests/openvino/test_modeling.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 4b268b5b62..907c767310 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -592,8 +592,9 @@ def test_compare_to_transformers(self, model_arch): if model_arch == "qwen": return - # Compare batched generation. - tokenizer.pad_token_id = tokenizer.eos_token_id + if model_arch != "chatglm": + tokenizer.pad_token_id = tokenizer.eos_token_id + # Compare batched generation tokenizer.padding_side = "left" tokens = tokenizer(["Today is a nice day and I am longer", "This is me"], return_tensors="pt", padding=True) ov_model.generation_config.eos_token_id = None