Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Schedule nightly slow tests #653

Merged
merged 9 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_openvino.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime
- name: Test with Pytest
run: |
pytest tests/openvino/ --ignore test_modeling_basic
pytest tests/openvino/ --ignore test_modeling_basic --durations=0
- name: Test openvino-nightly
run: |
pip uninstall -y openvino
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/test_openvino_basic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
# Testing lower and upper bound of supported Python versions
# This also ensures that the test fails if dependencies break for Python 3.7
python-version: ["3.8", "3.11"]
transformers: ['transformers', 'git+https://github.com/huggingface/transformers.git']
transformers: ['transformers']
optimum: ['optimum', 'git+https://github.com/huggingface/optimum.git']

runs-on: ubuntu-20.04
Expand All @@ -42,7 +42,7 @@ jobs:
# Install openvino manually to prevent dependency conflicts when .[openvino] pins
# optimum or transformers to a specific version
# Install PyTorch CPU to prevent unnecessary downloading/installing of CUDA packages
pip install torch --extra-index-url https://download.pytorch.org/whl/cpu
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install .[tests] openvino onnx onnxruntime ${{ matrix.optimum}} ${{ matrix.transformers }}

- name: Pip freeze
Expand All @@ -51,4 +51,4 @@ jobs:
- name: Test with Pytest
run: |
pytest tests/openvino/test_modeling_basic.py

RUN_SLOW=1 pytest tests/openvino/test_modeling.py -s -m "run_slow" --durations=0
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
"auto-gptq",
"transformers_stream_generator",
"einops",
"tiktoken",
]

QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"]
Expand Down
108 changes: 60 additions & 48 deletions tests/openvino/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from typing import Dict

import numpy as np
import pytest
import requests
import timm
import torch
Expand Down Expand Up @@ -53,6 +54,7 @@
set_seed,
)
from transformers.onnx.utils import get_preprocessor
from transformers.testing_utils import slow
from utils_tests import MODEL_NAMES

from optimum.intel import (
Expand Down Expand Up @@ -364,6 +366,8 @@ def test_compare_to_transformers(self, model_arch):
gc.collect()

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@pytest.mark.run_slow
@slow
def test_pipeline(self, model_arch):
model_id = MODEL_NAMES[model_arch]
model = OVModelForQuestionAnswering.from_pretrained(model_id, export=True)
Expand All @@ -379,6 +383,8 @@ def test_pipeline(self, model_arch):
del model
gc.collect()

@pytest.mark.run_slow
@slow
def test_metric(self):
model_id = "distilbert-base-cased-distilled-squad"
set_seed(SEED)
Expand Down Expand Up @@ -431,6 +437,8 @@ def test_compare_to_transformers(self, model_arch):
gc.collect()

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@pytest.mark.run_slow
@slow
def test_pipeline(self, model_arch):
model_id = MODEL_NAMES[model_arch]
model = OVModelForTokenClassification.from_pretrained(model_id, export=True)
Expand Down Expand Up @@ -481,6 +489,8 @@ def test_compare_to_transformers(self, model_arch):
gc.collect()

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@pytest.mark.run_slow
@slow
def test_pipeline(self, model_arch):
model_id = MODEL_NAMES[model_arch]
model = OVModelForFeatureExtraction.from_pretrained(model_id, export=True)
Expand Down Expand Up @@ -526,9 +536,9 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
"phi",
"internlm2",
"orion",
"falcon",
)
GENERATION_LENGTH = 100
IS_SUPPORT_STATEFUL = is_openvino_version(">=", "2023.3")
REMOTE_CODE_MODELS = ("chatglm", "minicpm", "baichuan2", "jais", "qwen", "internlm2", "olmo", "orion")

@parameterized.expand(SUPPORTED_ARCHITECTURES)
Expand All @@ -553,37 +563,63 @@ def test_compare_to_transformers(self, model_arch):
ov_model = OVModelForCausalLM.from_pretrained(model_id, export=True, ov_config=F32_CONFIG, **model_kwargs)
self.assertIsInstance(ov_model.config, PretrainedConfig)
self.assertTrue(ov_model.use_cache)
self.assertEqual(
ov_model.stateful, self.IS_SUPPORT_STATEFUL and ov_model.config.model_type not in not_stateful
)
set_seed(SEED)
transformers_model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs)
self.assertEqual(ov_model.stateful, ov_model.config.model_type not in not_stateful)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
if model_arch == "qwen":
transformers_model.to(torch.float32)
tokens = tokenizer(
"This is a sample", return_tensors="pt", return_token_type_ids=False if model_arch == "llama" else None
)
ov_outputs = ov_model(**tokens)
tokens = tokenizer("This is a sample output", return_tensors="pt")

ov_outputs = ov_model(**tokens)
self.assertTrue("logits" in ov_outputs)
self.assertIsInstance(ov_outputs.logits, torch.Tensor)
self.assertTrue("past_key_values" in ov_outputs)
self.assertIsInstance(ov_outputs.past_key_values, tuple)
is_stateful = ov_model.config.model_type not in not_stateful and self.IS_SUPPORT_STATEFUL
is_stateful = ov_model.config.model_type not in not_stateful
self.assertEqual(ov_model.stateful, is_stateful)
if is_stateful:
self.assertTrue(len(ov_outputs.past_key_values) == 1 and len(ov_outputs.past_key_values[0]) == 0)

set_seed(SEED)
transformers_model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs)
if model_arch == "qwen":
transformers_model.to(torch.float32)

with torch.no_grad():
transformers_outputs = transformers_model(**tokens)

# Compare tensor outputs
self.assertTrue(torch.allclose(ov_outputs.logits, transformers_outputs.logits, equal_nan=True, atol=1e-4))

# Qwen tokenizer does not support padding
if model_arch == "qwen":
return

if model_arch != "chatglm":
tokenizer.pad_token_id = tokenizer.eos_token_id
# Compare batched generation
tokenizer.padding_side = "left"
tokens = tokenizer(["Today is a nice day and I am longer", "This is me"], return_tensors="pt", padding=True)
ov_model.generation_config.eos_token_id = None
transformers_model.generation_config.eos_token_id = None
ov_model.config.eos_token_id = None
transformers_model.config.eos_token_id = None
gen_config = GenerationConfig(
max_new_tokens=30,
min_new_tokens=30,
num_beams=3,
do_sample=False,
eos_token_id=None,
)

ov_outputs = ov_model.generate(**tokens, generation_config=gen_config)
transformers_outputs = transformers_model.generate(**tokens, generation_config=gen_config)
self.assertTrue(torch.allclose(ov_outputs, transformers_outputs))

del transformers_model
del ov_model
gc.collect()

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@pytest.mark.run_slow
@slow
def test_pipeline(self, model_arch):
model_kwargs = {}
model_id = MODEL_NAMES[model_arch]
Expand Down Expand Up @@ -613,35 +649,6 @@ def test_pipeline(self, model_arch):
del model
gc.collect()

@parameterized.expand(SUPPORTED_ARCHITECTURES)
def test_multiple_inputs(self, model_arch):
model_id = MODEL_NAMES[model_arch]
set_seed(SEED)
if model_arch == "qwen":
self.skipTest("Qwen tokenizer does not support padding")
model_kwargs = {}
if model_arch in self.REMOTE_CODE_MODELS:
model_kwargs = {
"config": AutoConfig.from_pretrained(model_id, trust_remote_code=True),
"trust_remote_code": True,
}
model = OVModelForCausalLM.from_pretrained(model_id, export=True, compile=False, **model_kwargs)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
tokenizer.pad_token = tokenizer.eos_token
texts = ["this is a simple input", "this is a second simple input", "this is a third simple input"]
tokens = tokenizer(texts, padding=True, return_tensors="pt")
generation_config = GenerationConfig(encoder_no_repeat_ngram_size=0, max_new_tokens=20, num_beams=2)
outputs = model.generate(**tokens, generation_config=generation_config)
self.assertIsInstance(outputs, torch.Tensor)
self.assertEqual(outputs.shape[0], 3)
# test that generation result is reproducible
outputs2 = model.generate(**tokens, generation_config=generation_config)
self.assertIsInstance(outputs2, torch.Tensor)
self.assertEqual(outputs2.shape[0], 3)
self.assertTrue(torch.allclose(outputs2, outputs))
del model
gc.collect()

def test_model_and_decoder_same_device(self):
model_id = MODEL_NAMES["gpt2"]
model = OVModelForCausalLM.from_pretrained(model_id, export=True)
Expand All @@ -667,12 +674,11 @@ def test_compare_with_and_without_past_key_values(self):
self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv))
self.assertEqual(outputs_model_with_pkv.shape[1], self.GENERATION_LENGTH)
self.assertEqual(outputs_model_without_pkv.shape[1], self.GENERATION_LENGTH)
if self.IS_SUPPORT_STATEFUL:
model_stateful = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, stateful=True)
outputs_model_stateful = model_stateful.generate(
**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
)
self.assertTrue(torch.equal(outputs_model_without_pkv, outputs_model_stateful))
model_stateful = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, stateful=True)
outputs_model_stateful = model_stateful.generate(
**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
)
self.assertTrue(torch.equal(outputs_model_without_pkv, outputs_model_stateful))

del model_with_pkv
del model_without_pkv
Expand Down Expand Up @@ -851,6 +857,8 @@ def test_compare_to_transformers(self, model_arch):
gc.collect()

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@pytest.mark.run_slow
@slow
def test_pipeline(self, model_arch):
model_id = MODEL_NAMES[model_arch]
model = OVModelForImageClassification.from_pretrained(model_id, export=True)
Expand Down Expand Up @@ -981,6 +989,8 @@ def test_pipeline(self, model_arch):
gc.collect()

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@pytest.mark.run_slow
@slow
def test_generate_utils(self, model_arch):
model_id = MODEL_NAMES[model_arch]
model = OVModelForSeq2SeqLM.from_pretrained(model_id, export=True)
Expand Down Expand Up @@ -1438,6 +1448,8 @@ def test_load_vanilla_transformers_which_is_not_supported(self):
self.assertIn("only supports the tasks", str(context.exception))

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@pytest.mark.run_slow
@slow
def test_generate_utils(self, model_arch: str):
model_id = MODEL_NAMES[model_arch]
model = OVModelForVision2Seq.from_pretrained(model_id, export=True)
Expand Down
1 change: 1 addition & 0 deletions tests/openvino/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"donut": "fxmarty/tiny-doc-qa-vision-encoder-decoder",
"electra": "hf-internal-testing/tiny-random-electra",
"gemma": "fxmarty/tiny-random-GemmaForCausalLM",
"falcon": "fxmarty/really-tiny-falcon-testing",
"flaubert": "hf-internal-testing/tiny-random-flaubert",
"gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
"gpt2": "hf-internal-testing/tiny-random-gpt2",
Expand Down
Loading