Skip to content

Commit e7108de

Browse files
authored
Schedule nightly tests (#653)
* Schedule nightly slow tests * set test to slow * merge tests * fix format * fix test for chatglm
1 parent e79da77 commit e7108de

File tree

5 files changed

+66
-52
lines changed

5 files changed

+66
-52
lines changed

.github/workflows/test_openvino.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime
3636
- name: Test with Pytest
3737
run: |
38-
pytest tests/openvino/ --ignore test_modeling_basic
38+
pytest tests/openvino/ --ignore test_modeling_basic --durations=0
3939
- name: Test openvino-nightly
4040
run: |
4141
pip uninstall -y openvino

.github/workflows/test_openvino_basic.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
# Testing lower and upper bound of supported Python versions
2626
# This also ensures that the test fails if dependencies break for Python 3.7
2727
python-version: ["3.8", "3.11"]
28-
transformers: ['transformers', 'git+https://github.com/huggingface/transformers.git']
28+
transformers: ['transformers']
2929
optimum: ['optimum', 'git+https://github.com/huggingface/optimum.git']
3030

3131
runs-on: ubuntu-20.04
@@ -42,7 +42,7 @@ jobs:
4242
# Install openvino manually to prevent dependency conflicts when .[openvino] pins
4343
# optimum or transformers to a specific version
4444
# Install PyTorch CPU to prevent unnecessary downloading/installing of CUDA packages
45-
pip install torch --extra-index-url https://download.pytorch.org/whl/cpu
45+
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
4646
pip install .[tests] openvino onnx onnxruntime ${{ matrix.optimum}} ${{ matrix.transformers }}
4747
4848
- name: Pip freeze
@@ -51,4 +51,4 @@ jobs:
5151
- name: Test with Pytest
5252
run: |
5353
pytest tests/openvino/test_modeling_basic.py
54-
54+
RUN_SLOW=1 pytest tests/openvino/test_modeling.py -s -m "run_slow" --durations=0

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
"auto-gptq",
5353
"transformers_stream_generator",
5454
"einops",
55+
"tiktoken",
5556
]
5657

5758
QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"]

tests/openvino/test_modeling.py

+60-48
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from typing import Dict
2121

2222
import numpy as np
23+
import pytest
2324
import requests
2425
import timm
2526
import torch
@@ -53,6 +54,7 @@
5354
set_seed,
5455
)
5556
from transformers.onnx.utils import get_preprocessor
57+
from transformers.testing_utils import slow
5658
from utils_tests import MODEL_NAMES
5759

5860
from optimum.intel import (
@@ -364,6 +366,8 @@ def test_compare_to_transformers(self, model_arch):
364366
gc.collect()
365367

366368
@parameterized.expand(SUPPORTED_ARCHITECTURES)
369+
@pytest.mark.run_slow
370+
@slow
367371
def test_pipeline(self, model_arch):
368372
model_id = MODEL_NAMES[model_arch]
369373
model = OVModelForQuestionAnswering.from_pretrained(model_id, export=True)
@@ -379,6 +383,8 @@ def test_pipeline(self, model_arch):
379383
del model
380384
gc.collect()
381385

386+
@pytest.mark.run_slow
387+
@slow
382388
def test_metric(self):
383389
model_id = "distilbert-base-cased-distilled-squad"
384390
set_seed(SEED)
@@ -431,6 +437,8 @@ def test_compare_to_transformers(self, model_arch):
431437
gc.collect()
432438

433439
@parameterized.expand(SUPPORTED_ARCHITECTURES)
440+
@pytest.mark.run_slow
441+
@slow
434442
def test_pipeline(self, model_arch):
435443
model_id = MODEL_NAMES[model_arch]
436444
model = OVModelForTokenClassification.from_pretrained(model_id, export=True)
@@ -481,6 +489,8 @@ def test_compare_to_transformers(self, model_arch):
481489
gc.collect()
482490

483491
@parameterized.expand(SUPPORTED_ARCHITECTURES)
492+
@pytest.mark.run_slow
493+
@slow
484494
def test_pipeline(self, model_arch):
485495
model_id = MODEL_NAMES[model_arch]
486496
model = OVModelForFeatureExtraction.from_pretrained(model_id, export=True)
@@ -526,9 +536,9 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
526536
"phi",
527537
"internlm2",
528538
"orion",
539+
"falcon",
529540
)
530541
GENERATION_LENGTH = 100
531-
IS_SUPPORT_STATEFUL = is_openvino_version(">=", "2023.3")
532542
REMOTE_CODE_MODELS = ("chatglm", "minicpm", "baichuan2", "jais", "qwen", "internlm2", "olmo", "orion")
533543

534544
@parameterized.expand(SUPPORTED_ARCHITECTURES)
@@ -553,37 +563,63 @@ def test_compare_to_transformers(self, model_arch):
553563
ov_model = OVModelForCausalLM.from_pretrained(model_id, export=True, ov_config=F32_CONFIG, **model_kwargs)
554564
self.assertIsInstance(ov_model.config, PretrainedConfig)
555565
self.assertTrue(ov_model.use_cache)
556-
self.assertEqual(
557-
ov_model.stateful, self.IS_SUPPORT_STATEFUL and ov_model.config.model_type not in not_stateful
558-
)
559-
set_seed(SEED)
560-
transformers_model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs)
566+
self.assertEqual(ov_model.stateful, ov_model.config.model_type not in not_stateful)
561567
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
562-
if model_arch == "qwen":
563-
transformers_model.to(torch.float32)
564-
tokens = tokenizer(
565-
"This is a sample", return_tensors="pt", return_token_type_ids=False if model_arch == "llama" else None
566-
)
567-
ov_outputs = ov_model(**tokens)
568+
tokens = tokenizer("This is a sample output", return_tensors="pt")
568569

570+
ov_outputs = ov_model(**tokens)
569571
self.assertTrue("logits" in ov_outputs)
570572
self.assertIsInstance(ov_outputs.logits, torch.Tensor)
571573
self.assertTrue("past_key_values" in ov_outputs)
572574
self.assertIsInstance(ov_outputs.past_key_values, tuple)
573-
is_stateful = ov_model.config.model_type not in not_stateful and self.IS_SUPPORT_STATEFUL
575+
is_stateful = ov_model.config.model_type not in not_stateful
574576
self.assertEqual(ov_model.stateful, is_stateful)
575577
if is_stateful:
576578
self.assertTrue(len(ov_outputs.past_key_values) == 1 and len(ov_outputs.past_key_values[0]) == 0)
579+
580+
set_seed(SEED)
581+
transformers_model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs)
582+
if model_arch == "qwen":
583+
transformers_model.to(torch.float32)
584+
577585
with torch.no_grad():
578586
transformers_outputs = transformers_model(**tokens)
579587

580588
# Compare tensor outputs
581589
self.assertTrue(torch.allclose(ov_outputs.logits, transformers_outputs.logits, equal_nan=True, atol=1e-4))
590+
591+
# Qwen tokenizer does not support padding
592+
if model_arch == "qwen":
593+
return
594+
595+
if model_arch != "chatglm":
596+
tokenizer.pad_token_id = tokenizer.eos_token_id
597+
# Compare batched generation
598+
tokenizer.padding_side = "left"
599+
tokens = tokenizer(["Today is a nice day and I am longer", "This is me"], return_tensors="pt", padding=True)
600+
ov_model.generation_config.eos_token_id = None
601+
transformers_model.generation_config.eos_token_id = None
602+
ov_model.config.eos_token_id = None
603+
transformers_model.config.eos_token_id = None
604+
gen_config = GenerationConfig(
605+
max_new_tokens=30,
606+
min_new_tokens=30,
607+
num_beams=3,
608+
do_sample=False,
609+
eos_token_id=None,
610+
)
611+
612+
ov_outputs = ov_model.generate(**tokens, generation_config=gen_config)
613+
transformers_outputs = transformers_model.generate(**tokens, generation_config=gen_config)
614+
self.assertTrue(torch.allclose(ov_outputs, transformers_outputs))
615+
582616
del transformers_model
583617
del ov_model
584618
gc.collect()
585619

586620
@parameterized.expand(SUPPORTED_ARCHITECTURES)
621+
@pytest.mark.run_slow
622+
@slow
587623
def test_pipeline(self, model_arch):
588624
model_kwargs = {}
589625
model_id = MODEL_NAMES[model_arch]
@@ -613,35 +649,6 @@ def test_pipeline(self, model_arch):
613649
del model
614650
gc.collect()
615651

616-
@parameterized.expand(SUPPORTED_ARCHITECTURES)
617-
def test_multiple_inputs(self, model_arch):
618-
model_id = MODEL_NAMES[model_arch]
619-
set_seed(SEED)
620-
if model_arch == "qwen":
621-
self.skipTest("Qwen tokenizer does not support padding")
622-
model_kwargs = {}
623-
if model_arch in self.REMOTE_CODE_MODELS:
624-
model_kwargs = {
625-
"config": AutoConfig.from_pretrained(model_id, trust_remote_code=True),
626-
"trust_remote_code": True,
627-
}
628-
model = OVModelForCausalLM.from_pretrained(model_id, export=True, compile=False, **model_kwargs)
629-
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
630-
tokenizer.pad_token = tokenizer.eos_token
631-
texts = ["this is a simple input", "this is a second simple input", "this is a third simple input"]
632-
tokens = tokenizer(texts, padding=True, return_tensors="pt")
633-
generation_config = GenerationConfig(encoder_no_repeat_ngram_size=0, max_new_tokens=20, num_beams=2)
634-
outputs = model.generate(**tokens, generation_config=generation_config)
635-
self.assertIsInstance(outputs, torch.Tensor)
636-
self.assertEqual(outputs.shape[0], 3)
637-
# test that generation result is reproducible
638-
outputs2 = model.generate(**tokens, generation_config=generation_config)
639-
self.assertIsInstance(outputs2, torch.Tensor)
640-
self.assertEqual(outputs2.shape[0], 3)
641-
self.assertTrue(torch.allclose(outputs2, outputs))
642-
del model
643-
gc.collect()
644-
645652
def test_model_and_decoder_same_device(self):
646653
model_id = MODEL_NAMES["gpt2"]
647654
model = OVModelForCausalLM.from_pretrained(model_id, export=True)
@@ -667,12 +674,11 @@ def test_compare_with_and_without_past_key_values(self):
667674
self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv))
668675
self.assertEqual(outputs_model_with_pkv.shape[1], self.GENERATION_LENGTH)
669676
self.assertEqual(outputs_model_without_pkv.shape[1], self.GENERATION_LENGTH)
670-
if self.IS_SUPPORT_STATEFUL:
671-
model_stateful = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, stateful=True)
672-
outputs_model_stateful = model_stateful.generate(
673-
**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
674-
)
675-
self.assertTrue(torch.equal(outputs_model_without_pkv, outputs_model_stateful))
677+
model_stateful = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, stateful=True)
678+
outputs_model_stateful = model_stateful.generate(
679+
**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
680+
)
681+
self.assertTrue(torch.equal(outputs_model_without_pkv, outputs_model_stateful))
676682

677683
del model_with_pkv
678684
del model_without_pkv
@@ -851,6 +857,8 @@ def test_compare_to_transformers(self, model_arch):
851857
gc.collect()
852858

853859
@parameterized.expand(SUPPORTED_ARCHITECTURES)
860+
@pytest.mark.run_slow
861+
@slow
854862
def test_pipeline(self, model_arch):
855863
model_id = MODEL_NAMES[model_arch]
856864
model = OVModelForImageClassification.from_pretrained(model_id, export=True)
@@ -981,6 +989,8 @@ def test_pipeline(self, model_arch):
981989
gc.collect()
982990

983991
@parameterized.expand(SUPPORTED_ARCHITECTURES)
992+
@pytest.mark.run_slow
993+
@slow
984994
def test_generate_utils(self, model_arch):
985995
model_id = MODEL_NAMES[model_arch]
986996
model = OVModelForSeq2SeqLM.from_pretrained(model_id, export=True)
@@ -1438,6 +1448,8 @@ def test_load_vanilla_transformers_which_is_not_supported(self):
14381448
self.assertIn("only supports the tasks", str(context.exception))
14391449

14401450
@parameterized.expand(SUPPORTED_ARCHITECTURES)
1451+
@pytest.mark.run_slow
1452+
@slow
14411453
def test_generate_utils(self, model_arch: str):
14421454
model_id = MODEL_NAMES[model_arch]
14431455
model = OVModelForVision2Seq.from_pretrained(model_id, export=True)

tests/openvino/utils_tests.py

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
"donut": "fxmarty/tiny-doc-qa-vision-encoder-decoder",
4343
"electra": "hf-internal-testing/tiny-random-electra",
4444
"gemma": "fxmarty/tiny-random-GemmaForCausalLM",
45+
"falcon": "fxmarty/really-tiny-falcon-testing",
4546
"flaubert": "hf-internal-testing/tiny-random-flaubert",
4647
"gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
4748
"gpt2": "hf-internal-testing/tiny-random-gpt2",

0 commit comments

Comments
 (0)