@@ -2322,8 +2322,8 @@ def test_merge_from_onnx_and_save(self, model_arch):
2322
2322
self .assertNotIn (ONNX_DECODER_WITH_PAST_NAME , folder_contents )
2323
2323
self .assertNotIn (ONNX_WEIGHTS_NAME , folder_contents )
2324
2324
2325
- @parameterized .expand (grid_parameters (FULL_GRID ))
2326
- def test_compare_to_transformers (self , test_name : str , model_arch : str , use_cache : bool ):
2325
+ @parameterized .expand (grid_parameters ({ ** FULL_GRID , "num_beams" : [ 1 , 3 ]} ))
2326
+ def test_compare_to_transformers (self , test_name : str , model_arch : str , use_cache : bool , num_beams : int ):
2327
2327
use_io_binding = None
2328
2328
if use_cache is False :
2329
2329
use_io_binding = False
@@ -2384,17 +2384,19 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach
2384
2384
if model_arch == "falcon" :
2385
2385
# TODO: remove once https://github.com/huggingface/transformers/pull/26873 is released, falcon is broken in transformers
2386
2386
new_tokens = 5
2387
+
2387
2388
onnx_outputs = onnx_model .generate (
2388
2389
** tokens ,
2389
- num_beams = 1 ,
2390
+ num_beams = num_beams ,
2390
2391
do_sample = False ,
2391
2392
min_new_tokens = new_tokens ,
2392
2393
max_new_tokens = new_tokens ,
2393
2394
eos_token_id = None ,
2394
2395
)
2396
+
2395
2397
transformers_outputs = transformers_model .generate (
2396
2398
** tokens ,
2397
- num_beams = 1 ,
2399
+ num_beams = num_beams ,
2398
2400
do_sample = False ,
2399
2401
min_new_tokens = new_tokens ,
2400
2402
max_new_tokens = new_tokens ,
@@ -4123,11 +4125,23 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache:
4123
4125
gc .collect ()
4124
4126
4125
4127
@parameterized .expand (
4126
- grid_parameters ({"model_arch" : SUPPORTED_ARCHITECTURES , "use_cache" : [True ], "use_merged" : [False , True ]})
4128
+ grid_parameters (
4129
+ {
4130
+ "model_arch" : SUPPORTED_ARCHITECTURES ,
4131
+ "use_cache" : [True ],
4132
+ "use_merged" : [False , True ],
4133
+ "num_beams" : [1 , 3 ],
4134
+ }
4135
+ )
4127
4136
)
4128
4137
@require_torch_gpu
4129
4138
def test_compare_generation_to_io_binding (
4130
- self , test_name : str , model_arch : str , use_cache : bool , use_merged : bool
4139
+ self ,
4140
+ test_name : str ,
4141
+ model_arch : str ,
4142
+ use_cache : bool ,
4143
+ use_merged : bool ,
4144
+ num_beams : int ,
4131
4145
):
4132
4146
if use_cache is False and use_merged is True :
4133
4147
self .skipTest ("use_cache=False, use_merged=True are uncompatible" )
@@ -4159,8 +4173,8 @@ def test_compare_generation_to_io_binding(
4159
4173
4160
4174
tokenizer = get_preprocessor (model_id )
4161
4175
tokens = tokenizer ("This is a sample output" , return_tensors = "pt" ).to ("cuda" )
4162
- onnx_outputs = onnx_model .generate (** tokens , num_beams = 5 )
4163
- io_outputs = io_model .generate (** tokens , num_beams = 5 )
4176
+ onnx_outputs = onnx_model .generate (** tokens , num_beams = num_beams )
4177
+ io_outputs = io_model .generate (** tokens , num_beams = num_beams )
4164
4178
4165
4179
# compare tensor outputs
4166
4180
self .assertTrue (torch .equal (onnx_outputs , io_outputs ))
@@ -4555,12 +4569,24 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache:
4555
4569
gc .collect ()
4556
4570
4557
4571
@parameterized .expand (
4558
- grid_parameters ({"model_arch" : SUPPORTED_ARCHITECTURES , "use_cache" : [True ], "use_merged" : [False , True ]})
4572
+ grid_parameters (
4573
+ {
4574
+ "model_arch" : SUPPORTED_ARCHITECTURES ,
4575
+ "use_cache" : [True ],
4576
+ "use_merged" : [False , True ],
4577
+ "num_beams" : [1 , 5 ],
4578
+ }
4579
+ )
4559
4580
)
4560
4581
@require_torch_gpu
4561
4582
@pytest .mark .cuda_ep_test
4562
4583
def test_compare_generation_to_io_binding (
4563
- self , test_name : str , model_arch : str , use_cache : bool , use_merged : bool
4584
+ self ,
4585
+ test_name : str ,
4586
+ model_arch : str ,
4587
+ use_cache : bool ,
4588
+ use_merged : bool ,
4589
+ num_beams : int ,
4564
4590
):
4565
4591
if use_cache is False and use_merged is True :
4566
4592
self .skipTest ("use_cache=False, use_merged=True are uncompatible" )
@@ -4586,8 +4612,8 @@ def test_compare_generation_to_io_binding(
4586
4612
data = self ._generate_random_audio_data ()
4587
4613
features = processor .feature_extractor (data , return_tensors = "pt" ).to ("cuda" )
4588
4614
4589
- onnx_outputs = onnx_model .generate (** features , num_beams = 5 )
4590
- io_outputs = io_model .generate (** features , num_beams = 5 )
4615
+ onnx_outputs = onnx_model .generate (** features , num_beams = num_beams )
4616
+ io_outputs = io_model .generate (** features , num_beams = num_beams )
4591
4617
4592
4618
# compare tensor outputs
4593
4619
self .assertTrue (torch .equal (onnx_outputs , io_outputs ))
@@ -4920,12 +4946,19 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache:
4920
4946
gc .collect ()
4921
4947
4922
4948
@parameterized .expand (
4923
- grid_parameters ({"model_arch" : SUPPORTED_ARCHITECTURES , "use_cache" : [True ], "use_merged" : [False , True ]})
4949
+ grid_parameters (
4950
+ {
4951
+ "model_arch" : SUPPORTED_ARCHITECTURES ,
4952
+ "use_cache" : [True ],
4953
+ "use_merged" : [False , True ],
4954
+ "num_beams" : [1 , 3 ],
4955
+ }
4956
+ )
4924
4957
)
4925
4958
@require_torch_gpu
4926
4959
@pytest .mark .cuda_ep_test
4927
4960
def test_compare_generation_to_io_binding (
4928
- self , test_name : str , model_arch : str , use_cache : bool , use_merged : bool
4961
+ self , test_name : str , model_arch : str , use_cache : bool , use_merged : bool , num_beams : int
4929
4962
):
4930
4963
if use_cache is False and use_merged is True :
4931
4964
self .skipTest ("use_cache=False, use_merged=True are uncompatible" )
@@ -4951,8 +4984,8 @@ def test_compare_generation_to_io_binding(
4951
4984
data = self ._get_sample_image ()
4952
4985
features = feature_extractor (data , return_tensors = "pt" ).to ("cuda" )
4953
4986
4954
- onnx_outputs = onnx_model .generate (** features , num_beams = 5 )
4955
- io_outputs = io_model .generate (** features , num_beams = 5 )
4987
+ onnx_outputs = onnx_model .generate (** features , num_beams = num_beams )
4988
+ io_outputs = io_model .generate (** features , num_beams = num_beams )
4956
4989
4957
4990
# compare tensor outputs
4958
4991
self .assertTrue (torch .equal (onnx_outputs , io_outputs ))
@@ -5336,10 +5369,22 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache:
5336
5369
gc .collect ()
5337
5370
5338
5371
@parameterized .expand (
5339
- grid_parameters ({"model_arch" : SUPPORTED_ARCHITECTURES , "use_cache" : [True ], "use_merged" : [False , True ]})
5372
+ grid_parameters (
5373
+ {
5374
+ "model_arch" : SUPPORTED_ARCHITECTURES ,
5375
+ "use_cache" : [True ],
5376
+ "use_merged" : [False , True ],
5377
+ "num_beams" : [1 , 3 ],
5378
+ }
5379
+ )
5340
5380
)
5341
5381
def test_compare_generation_to_io_binding (
5342
- self , test_name : str , model_arch : str , use_cache : bool , use_merged : bool
5382
+ self ,
5383
+ test_name : str ,
5384
+ model_arch : str ,
5385
+ use_cache : bool ,
5386
+ use_merged : bool ,
5387
+ num_beams : int ,
5343
5388
):
5344
5389
if use_cache is False and use_merged is True :
5345
5390
self .skipTest ("use_cache=False, use_merged=True are uncompatible" )
@@ -5362,8 +5407,8 @@ def test_compare_generation_to_io_binding(
5362
5407
inputs = preprocessor (images = [self .IMAGE , self .IMAGE ], text = question , padding = True , return_tensors = "pt" )
5363
5408
del inputs ["decoder_attention_mask" ]
5364
5409
del inputs ["decoder_input_ids" ]
5365
- onnx_outputs = onnx_model .generate (** inputs , num_beams = 5 )
5366
- io_outputs = io_model .generate (** inputs , num_beams = 5 )
5410
+ onnx_outputs = onnx_model .generate (** inputs , num_beams = num_beams )
5411
+ io_outputs = io_model .generate (** inputs , num_beams = num_beams )
5367
5412
5368
5413
# compare tensor outputs
5369
5414
self .assertTrue (torch .equal (onnx_outputs , io_outputs ))
0 commit comments