20
20
from typing import Dict
21
21
22
22
import numpy as np
23
+ import pytest
23
24
import requests
24
25
import timm
25
26
import torch
53
54
set_seed ,
54
55
)
55
56
from transformers .onnx .utils import get_preprocessor
57
+ from transformers .testing_utils import slow
56
58
from utils_tests import MODEL_NAMES
57
59
58
60
from optimum .intel import (
@@ -364,6 +366,8 @@ def test_compare_to_transformers(self, model_arch):
364
366
gc .collect ()
365
367
366
368
@parameterized .expand (SUPPORTED_ARCHITECTURES )
369
+ @pytest .mark .run_slow
370
+ @slow
367
371
def test_pipeline (self , model_arch ):
368
372
model_id = MODEL_NAMES [model_arch ]
369
373
model = OVModelForQuestionAnswering .from_pretrained (model_id , export = True )
@@ -379,6 +383,8 @@ def test_pipeline(self, model_arch):
379
383
del model
380
384
gc .collect ()
381
385
386
+ @pytest .mark .run_slow
387
+ @slow
382
388
def test_metric (self ):
383
389
model_id = "distilbert-base-cased-distilled-squad"
384
390
set_seed (SEED )
@@ -431,6 +437,8 @@ def test_compare_to_transformers(self, model_arch):
431
437
gc .collect ()
432
438
433
439
@parameterized .expand (SUPPORTED_ARCHITECTURES )
440
+ @pytest .mark .run_slow
441
+ @slow
434
442
def test_pipeline (self , model_arch ):
435
443
model_id = MODEL_NAMES [model_arch ]
436
444
model = OVModelForTokenClassification .from_pretrained (model_id , export = True )
@@ -481,6 +489,8 @@ def test_compare_to_transformers(self, model_arch):
481
489
gc .collect ()
482
490
483
491
@parameterized .expand (SUPPORTED_ARCHITECTURES )
492
+ @pytest .mark .run_slow
493
+ @slow
484
494
def test_pipeline (self , model_arch ):
485
495
model_id = MODEL_NAMES [model_arch ]
486
496
model = OVModelForFeatureExtraction .from_pretrained (model_id , export = True )
@@ -526,9 +536,9 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
526
536
"phi" ,
527
537
"internlm2" ,
528
538
"orion" ,
539
+ "falcon" ,
529
540
)
530
541
GENERATION_LENGTH = 100
531
- IS_SUPPORT_STATEFUL = is_openvino_version (">=" , "2023.3" )
532
542
REMOTE_CODE_MODELS = ("chatglm" , "minicpm" , "baichuan2" , "jais" , "qwen" , "internlm2" , "olmo" , "orion" )
533
543
534
544
@parameterized .expand (SUPPORTED_ARCHITECTURES )
@@ -553,37 +563,63 @@ def test_compare_to_transformers(self, model_arch):
553
563
ov_model = OVModelForCausalLM .from_pretrained (model_id , export = True , ov_config = F32_CONFIG , ** model_kwargs )
554
564
self .assertIsInstance (ov_model .config , PretrainedConfig )
555
565
self .assertTrue (ov_model .use_cache )
556
- self .assertEqual (
557
- ov_model .stateful , self .IS_SUPPORT_STATEFUL and ov_model .config .model_type not in not_stateful
558
- )
559
- set_seed (SEED )
560
- transformers_model = AutoModelForCausalLM .from_pretrained (model_id , ** model_kwargs )
566
+ self .assertEqual (ov_model .stateful , ov_model .config .model_type not in not_stateful )
561
567
tokenizer = AutoTokenizer .from_pretrained (model_id , trust_remote_code = model_arch in self .REMOTE_CODE_MODELS )
562
- if model_arch == "qwen" :
563
- transformers_model .to (torch .float32 )
564
- tokens = tokenizer (
565
- "This is a sample" , return_tensors = "pt" , return_token_type_ids = False if model_arch == "llama" else None
566
- )
567
- ov_outputs = ov_model (** tokens )
568
+ tokens = tokenizer ("This is a sample output" , return_tensors = "pt" )
568
569
570
+ ov_outputs = ov_model (** tokens )
569
571
self .assertTrue ("logits" in ov_outputs )
570
572
self .assertIsInstance (ov_outputs .logits , torch .Tensor )
571
573
self .assertTrue ("past_key_values" in ov_outputs )
572
574
self .assertIsInstance (ov_outputs .past_key_values , tuple )
573
- is_stateful = ov_model .config .model_type not in not_stateful and self . IS_SUPPORT_STATEFUL
575
+ is_stateful = ov_model .config .model_type not in not_stateful
574
576
self .assertEqual (ov_model .stateful , is_stateful )
575
577
if is_stateful :
576
578
self .assertTrue (len (ov_outputs .past_key_values ) == 1 and len (ov_outputs .past_key_values [0 ]) == 0 )
579
+
580
+ set_seed (SEED )
581
+ transformers_model = AutoModelForCausalLM .from_pretrained (model_id , ** model_kwargs )
582
+ if model_arch == "qwen" :
583
+ transformers_model .to (torch .float32 )
584
+
577
585
with torch .no_grad ():
578
586
transformers_outputs = transformers_model (** tokens )
579
587
580
588
# Compare tensor outputs
581
589
self .assertTrue (torch .allclose (ov_outputs .logits , transformers_outputs .logits , equal_nan = True , atol = 1e-4 ))
590
+
591
+ # Qwen tokenizer does not support padding
592
+ if model_arch == "qwen" :
593
+ return
594
+
595
+ if model_arch != "chatglm" :
596
+ tokenizer .pad_token_id = tokenizer .eos_token_id
597
+ # Compare batched generation
598
+ tokenizer .padding_side = "left"
599
+ tokens = tokenizer (["Today is a nice day and I am longer" , "This is me" ], return_tensors = "pt" , padding = True )
600
+ ov_model .generation_config .eos_token_id = None
601
+ transformers_model .generation_config .eos_token_id = None
602
+ ov_model .config .eos_token_id = None
603
+ transformers_model .config .eos_token_id = None
604
+ gen_config = GenerationConfig (
605
+ max_new_tokens = 30 ,
606
+ min_new_tokens = 30 ,
607
+ num_beams = 3 ,
608
+ do_sample = False ,
609
+ eos_token_id = None ,
610
+ )
611
+
612
+ ov_outputs = ov_model .generate (** tokens , generation_config = gen_config )
613
+ transformers_outputs = transformers_model .generate (** tokens , generation_config = gen_config )
614
+ self .assertTrue (torch .allclose (ov_outputs , transformers_outputs ))
615
+
582
616
del transformers_model
583
617
del ov_model
584
618
gc .collect ()
585
619
586
620
@parameterized .expand (SUPPORTED_ARCHITECTURES )
621
+ @pytest .mark .run_slow
622
+ @slow
587
623
def test_pipeline (self , model_arch ):
588
624
model_kwargs = {}
589
625
model_id = MODEL_NAMES [model_arch ]
@@ -613,35 +649,6 @@ def test_pipeline(self, model_arch):
613
649
del model
614
650
gc .collect ()
615
651
616
- @parameterized .expand (SUPPORTED_ARCHITECTURES )
617
- def test_multiple_inputs (self , model_arch ):
618
- model_id = MODEL_NAMES [model_arch ]
619
- set_seed (SEED )
620
- if model_arch == "qwen" :
621
- self .skipTest ("Qwen tokenizer does not support padding" )
622
- model_kwargs = {}
623
- if model_arch in self .REMOTE_CODE_MODELS :
624
- model_kwargs = {
625
- "config" : AutoConfig .from_pretrained (model_id , trust_remote_code = True ),
626
- "trust_remote_code" : True ,
627
- }
628
- model = OVModelForCausalLM .from_pretrained (model_id , export = True , compile = False , ** model_kwargs )
629
- tokenizer = AutoTokenizer .from_pretrained (model_id , trust_remote_code = model_arch in self .REMOTE_CODE_MODELS )
630
- tokenizer .pad_token = tokenizer .eos_token
631
- texts = ["this is a simple input" , "this is a second simple input" , "this is a third simple input" ]
632
- tokens = tokenizer (texts , padding = True , return_tensors = "pt" )
633
- generation_config = GenerationConfig (encoder_no_repeat_ngram_size = 0 , max_new_tokens = 20 , num_beams = 2 )
634
- outputs = model .generate (** tokens , generation_config = generation_config )
635
- self .assertIsInstance (outputs , torch .Tensor )
636
- self .assertEqual (outputs .shape [0 ], 3 )
637
- # test that generation result is reproducible
638
- outputs2 = model .generate (** tokens , generation_config = generation_config )
639
- self .assertIsInstance (outputs2 , torch .Tensor )
640
- self .assertEqual (outputs2 .shape [0 ], 3 )
641
- self .assertTrue (torch .allclose (outputs2 , outputs ))
642
- del model
643
- gc .collect ()
644
-
645
652
def test_model_and_decoder_same_device (self ):
646
653
model_id = MODEL_NAMES ["gpt2" ]
647
654
model = OVModelForCausalLM .from_pretrained (model_id , export = True )
@@ -667,12 +674,11 @@ def test_compare_with_and_without_past_key_values(self):
667
674
self .assertTrue (torch .equal (outputs_model_with_pkv , outputs_model_without_pkv ))
668
675
self .assertEqual (outputs_model_with_pkv .shape [1 ], self .GENERATION_LENGTH )
669
676
self .assertEqual (outputs_model_without_pkv .shape [1 ], self .GENERATION_LENGTH )
670
- if self .IS_SUPPORT_STATEFUL :
671
- model_stateful = OVModelForCausalLM .from_pretrained (model_id , export = True , use_cache = True , stateful = True )
672
- outputs_model_stateful = model_stateful .generate (
673
- ** tokens , min_length = self .GENERATION_LENGTH , max_length = self .GENERATION_LENGTH , num_beams = 1
674
- )
675
- self .assertTrue (torch .equal (outputs_model_without_pkv , outputs_model_stateful ))
677
+ model_stateful = OVModelForCausalLM .from_pretrained (model_id , export = True , use_cache = True , stateful = True )
678
+ outputs_model_stateful = model_stateful .generate (
679
+ ** tokens , min_length = self .GENERATION_LENGTH , max_length = self .GENERATION_LENGTH , num_beams = 1
680
+ )
681
+ self .assertTrue (torch .equal (outputs_model_without_pkv , outputs_model_stateful ))
676
682
677
683
del model_with_pkv
678
684
del model_without_pkv
@@ -851,6 +857,8 @@ def test_compare_to_transformers(self, model_arch):
851
857
gc .collect ()
852
858
853
859
@parameterized .expand (SUPPORTED_ARCHITECTURES )
860
+ @pytest .mark .run_slow
861
+ @slow
854
862
def test_pipeline (self , model_arch ):
855
863
model_id = MODEL_NAMES [model_arch ]
856
864
model = OVModelForImageClassification .from_pretrained (model_id , export = True )
@@ -981,6 +989,8 @@ def test_pipeline(self, model_arch):
981
989
gc .collect ()
982
990
983
991
@parameterized .expand (SUPPORTED_ARCHITECTURES )
992
+ @pytest .mark .run_slow
993
+ @slow
984
994
def test_generate_utils (self , model_arch ):
985
995
model_id = MODEL_NAMES [model_arch ]
986
996
model = OVModelForSeq2SeqLM .from_pretrained (model_id , export = True )
@@ -1438,6 +1448,8 @@ def test_load_vanilla_transformers_which_is_not_supported(self):
1438
1448
self .assertIn ("only supports the tasks" , str (context .exception ))
1439
1449
1440
1450
@parameterized .expand (SUPPORTED_ARCHITECTURES )
1451
+ @pytest .mark .run_slow
1452
+ @slow
1441
1453
def test_generate_utils (self , model_arch : str ):
1442
1454
model_id = MODEL_NAMES [model_arch ]
1443
1455
model = OVModelForVision2Seq .from_pretrained (model_id , export = True )
0 commit comments