@@ -290,6 +290,7 @@ def test_load_seq2seq_model_from_empty_cache(self):
290
290
_ = ORTModelForSeq2SeqLM .from_pretrained (self .TINY_ONNX_SEQ2SEQ_MODEL_ID , local_files_only = True )
291
291
292
292
@require_torch_gpu
293
+ @pytest .mark .gpu_test
293
294
def test_load_model_cuda_provider (self ):
294
295
model = ORTModel .from_pretrained (self .ONNX_MODEL_ID , provider = "CUDAExecutionProvider" )
295
296
self .assertListEqual (model .providers , ["CUDAExecutionProvider" , "CPUExecutionProvider" ])
@@ -321,6 +322,7 @@ def test_load_seq2seq_model_without_past_from_hub(self):
321
322
self .assertIsInstance (model .config , PretrainedConfig )
322
323
323
324
@require_torch_gpu
325
+ @pytest .mark .gpu_test
324
326
def test_load_seq2seq_model_cuda_provider (self ):
325
327
model = ORTModelForSeq2SeqLM .from_pretrained (self .ONNX_SEQ2SEQ_MODEL_ID , provider = "CUDAExecutionProvider" )
326
328
self .assertListEqual (model .providers , ["CUDAExecutionProvider" , "CPUExecutionProvider" ])
@@ -419,6 +421,7 @@ def test_missing_execution_provider(self):
419
421
"""
420
422
421
423
@require_torch_gpu
424
+ @pytest .mark .gpu_test
422
425
def test_model_on_gpu (self ):
423
426
model = ORTModel .from_pretrained (self .ONNX_MODEL_ID )
424
427
gpu = torch .device ("cuda" )
@@ -428,6 +431,7 @@ def test_model_on_gpu(self):
428
431
429
432
# test string device input for to()
430
433
@require_torch_gpu
434
+ @pytest .mark .gpu_test
431
435
def test_model_on_gpu_str (self ):
432
436
model = ORTModel .from_pretrained (self .ONNX_MODEL_ID )
433
437
gpu = torch .device ("cuda" )
@@ -449,6 +453,7 @@ def test_passing_session_options_seq2seq(self):
449
453
self .assertEqual (model .decoder .session .get_session_options ().intra_op_num_threads , 3 )
450
454
451
455
@require_torch_gpu
456
+ @pytest .mark .gpu_test
452
457
def test_passing_provider_options (self ):
453
458
model = ORTModel .from_pretrained (self .ONNX_MODEL_ID , provider = "CUDAExecutionProvider" )
454
459
self .assertEqual (model .model .get_provider_options ()["CUDAExecutionProvider" ]["do_copy_in_default_stream" ], "1" )
@@ -490,6 +495,7 @@ def test_model_on_gpu_id(self):
490
495
self .assertEqual (model .model .get_provider_options ()["CUDAExecutionProvider" ]["device_id" ], "1" )
491
496
492
497
@require_torch_gpu
498
+ @pytest .mark .gpu_test
493
499
def test_passing_provider_options_seq2seq (self ):
494
500
model = ORTModelForSeq2SeqLM .from_pretrained (self .ONNX_SEQ2SEQ_MODEL_ID , provider = "CUDAExecutionProvider" )
495
501
self .assertEqual (
@@ -590,6 +596,7 @@ def test_seq2seq_model_on_cpu_str(self):
590
596
self .assertListEqual (model .providers , ["CPUExecutionProvider" ])
591
597
592
598
@require_torch_gpu
599
+ @pytest .mark .gpu_test
593
600
def test_seq2seq_model_on_gpu (self ):
594
601
model = ORTModelForSeq2SeqLM .from_pretrained (self .ONNX_SEQ2SEQ_MODEL_ID , use_cache = True )
595
602
gpu = torch .device ("cuda" )
@@ -631,6 +638,7 @@ def test_seq2seq_model_on_gpu_id(self):
631
638
632
639
# test string device input for to()
633
640
@require_torch_gpu
641
+ @pytest .mark .gpu_test
634
642
def test_seq2seq_model_on_gpu_str (self ):
635
643
model = ORTModelForSeq2SeqLM .from_pretrained (self .ONNX_SEQ2SEQ_MODEL_ID , use_cache = True )
636
644
model .to ("cuda" )
@@ -988,6 +996,7 @@ def test_pipeline_model_is_none(self):
988
996
)
989
997
)
990
998
@require_torch_gpu
999
+ @pytest .mark .gpu_test
991
1000
def test_pipeline_on_gpu (self , test_name : str , model_arch : str , provider : str ):
992
1001
if provider == "TensorrtExecutionProvider" and model_arch != self .__class__ .SUPPORTED_ARCHITECTURES [0 ]:
993
1002
self .skipTest ("testing a single arch for TensorrtExecutionProvider" )
@@ -1012,6 +1021,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
1012
1021
1013
1022
@parameterized .expand (SUPPORTED_ARCHITECTURES )
1014
1023
@require_torch_gpu
1024
+ @pytest .mark .gpu_test
1015
1025
def test_compare_to_io_binding (self , model_arch ):
1016
1026
model_args = {"test_name" : model_arch , "model_arch" : model_arch }
1017
1027
self ._setup (model_args )
@@ -1139,6 +1149,7 @@ def test_pipeline_model_is_none(self):
1139
1149
1140
1150
@parameterized .expand (SUPPORTED_ARCHITECTURES )
1141
1151
@require_torch_gpu
1152
+ @pytest .mark .gpu_test
1142
1153
def test_pipeline_on_gpu (self , model_arch ):
1143
1154
model_args = {"test_name" : model_arch , "model_arch" : model_arch }
1144
1155
self ._setup (model_args )
@@ -1160,6 +1171,7 @@ def test_pipeline_on_gpu(self, model_arch):
1160
1171
1161
1172
@parameterized .expand (SUPPORTED_ARCHITECTURES )
1162
1173
@require_torch_gpu
1174
+ @pytest .mark .gpu_test
1163
1175
def test_compare_to_io_binding (self , model_arch ):
1164
1176
model_args = {"test_name" : model_arch , "model_arch" : model_arch }
1165
1177
self ._setup (model_args )
@@ -1298,6 +1310,7 @@ def test_pipeline_model_is_none(self):
1298
1310
)
1299
1311
)
1300
1312
@require_torch_gpu
1313
+ @pytest .mark .gpu_test
1301
1314
def test_pipeline_on_gpu (self , test_name : str , model_arch : str , provider : str ):
1302
1315
if provider == "TensorrtExecutionProvider" and model_arch != self .__class__ .SUPPORTED_ARCHITECTURES [0 ]:
1303
1316
self .skipTest ("testing a single arch for TensorrtExecutionProvider" )
@@ -1340,6 +1353,7 @@ def test_pipeline_zero_shot_classification(self):
1340
1353
1341
1354
@parameterized .expand (SUPPORTED_ARCHITECTURES )
1342
1355
@require_torch_gpu
1356
+ @pytest .mark .gpu_test
1343
1357
def test_compare_to_io_binding (self , model_arch ):
1344
1358
model_args = {"test_name" : model_arch , "model_arch" : model_arch }
1345
1359
self ._setup (model_args )
@@ -1463,6 +1477,7 @@ def test_pipeline_model_is_none(self):
1463
1477
)
1464
1478
)
1465
1479
@require_torch_gpu
1480
+ @pytest .mark .gpu_test
1466
1481
def test_pipeline_on_gpu (self , test_name : str , model_arch : str , provider : str ):
1467
1482
if provider == "TensorrtExecutionProvider" and model_arch != self .__class__ .SUPPORTED_ARCHITECTURES [0 ]:
1468
1483
self .skipTest ("testing a single arch for TensorrtExecutionProvider" )
@@ -1487,6 +1502,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
1487
1502
1488
1503
@parameterized .expand (SUPPORTED_ARCHITECTURES )
1489
1504
@require_torch_gpu
1505
+ @pytest .mark .gpu_test
1490
1506
def test_compare_to_io_binding (self , model_arch ):
1491
1507
model_args = {"test_name" : model_arch , "model_arch" : model_arch }
1492
1508
self ._setup (model_args )
@@ -1583,6 +1599,7 @@ def test_pipeline_model_is_none(self):
1583
1599
)
1584
1600
)
1585
1601
@require_torch_gpu
1602
+ @pytest .mark .gpu_test
1586
1603
def test_pipeline_on_gpu (self , test_name : str , model_arch : str , provider : str ):
1587
1604
if provider == "TensorrtExecutionProvider" and model_arch != self .__class__ .SUPPORTED_ARCHITECTURES [0 ]:
1588
1605
self .skipTest ("testing a single arch for TensorrtExecutionProvider" )
@@ -1605,6 +1622,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
1605
1622
1606
1623
@parameterized .expand (SUPPORTED_ARCHITECTURES )
1607
1624
@require_torch_gpu
1625
+ @pytest .mark .gpu_test
1608
1626
def test_compare_to_io_binding (self , model_arch ):
1609
1627
model_args = {"test_name" : model_arch , "model_arch" : model_arch }
1610
1628
self ._setup (model_args )
@@ -1699,6 +1717,7 @@ def test_compare_to_transformers(self, model_arch):
1699
1717
1700
1718
@parameterized .expand (SUPPORTED_ARCHITECTURES )
1701
1719
@require_torch_gpu
1720
+ @pytest .mark .gpu_test
1702
1721
def test_compare_to_io_binding (self , model_arch ):
1703
1722
model_args = {"test_name" : model_arch , "model_arch" : model_arch }
1704
1723
self ._setup (model_args )
@@ -1849,6 +1868,7 @@ def test_pipeline_model_is_none(self):
1849
1868
1850
1869
@parameterized .expand (grid_parameters ({"model_arch" : SUPPORTED_ARCHITECTURES , "use_cache" : [True ]}))
1851
1870
@require_torch_gpu
1871
+ @pytest .mark .gpu_test
1852
1872
def test_pipeline_on_gpu (self , test_name : str , model_arch : str , use_cache : bool ):
1853
1873
model_args = {"test_name" : test_name , "model_arch" : model_arch , "use_cache" : use_cache }
1854
1874
self ._setup (model_args )
@@ -1871,6 +1891,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, use_cache: bool)
1871
1891
# TRT EP compile time can be long, so we don't test all archs
1872
1892
@parameterized .expand (grid_parameters ({"model_arch" : ["gpt2" ], "use_cache" : [True , False ]}))
1873
1893
@require_torch_gpu
1894
+ @pytest .mark .gpu_test
1874
1895
def test_pipeline_on_trt_execution_provider (self , test_name : str , model_arch : str , use_cache : bool ):
1875
1896
model_args = {"test_name" : test_name , "model_arch" : model_arch , "use_cache" : use_cache }
1876
1897
self ._setup (model_args )
@@ -1953,6 +1974,7 @@ def test_compare_with_and_without_past_key_values(self, model_arch):
1953
1974
1954
1975
@parameterized .expand (grid_parameters ({"model_arch" : SUPPORTED_ARCHITECTURES , "use_cache" : [True ]}))
1955
1976
@require_torch_gpu
1977
+ @pytest .mark .gpu_test
1956
1978
def test_compare_to_io_binding (self , test_name : str , model_arch : str , use_cache : bool ):
1957
1979
model_args = {"test_name" : test_name , "model_arch" : model_arch , "use_cache" : use_cache }
1958
1980
self ._setup (model_args )
@@ -1976,6 +1998,7 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache:
1976
1998
1977
1999
@parameterized .expand (grid_parameters ({"model_arch" : SUPPORTED_ARCHITECTURES , "use_cache" : [True ]}))
1978
2000
@require_torch_gpu
2001
+ @pytest .mark .gpu_test
1979
2002
def test_compare_generation_to_io_binding (self , test_name : str , model_arch : str , use_cache : bool ):
1980
2003
model_args = {"test_name" : test_name , "model_arch" : model_arch , "use_cache" : use_cache }
1981
2004
self ._setup (model_args )
@@ -2094,6 +2117,7 @@ def test_pipeline_model_is_none(self):
2094
2117
)
2095
2118
)
2096
2119
@require_torch_gpu
2120
+ @pytest .mark .gpu_test
2097
2121
def test_pipeline_on_gpu (self , test_name : str , model_arch : str , provider : str ):
2098
2122
if provider == "TensorrtExecutionProvider" and model_arch != self .__class__ .SUPPORTED_ARCHITECTURES [0 ]:
2099
2123
self .skipTest ("testing a single arch for TensorrtExecutionProvider" )
@@ -2120,6 +2144,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
2120
2144
2121
2145
@parameterized .expand (SUPPORTED_ARCHITECTURES )
2122
2146
@require_torch_gpu
2147
+ @pytest .mark .gpu_test
2123
2148
def test_compare_to_io_binding (self , model_arch ):
2124
2149
model_args = {"test_name" : model_arch , "model_arch" : model_arch }
2125
2150
self ._setup (model_args )
@@ -2227,6 +2252,7 @@ def test_pipeline_model_is_none(self):
2227
2252
)
2228
2253
)
2229
2254
@require_torch_gpu
2255
+ @pytest .mark .gpu_test
2230
2256
def test_pipeline_on_gpu (self , test_name : str , model_arch : str , provider : str ):
2231
2257
if provider == "TensorrtExecutionProvider" and model_arch != self .__class__ .SUPPORTED_ARCHITECTURES [0 ]:
2232
2258
self .skipTest ("testing a single arch for TensorrtExecutionProvider" )
@@ -2253,6 +2279,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
2253
2279
2254
2280
@parameterized .expand (SUPPORTED_ARCHITECTURES )
2255
2281
@require_torch_gpu
2282
+ @pytest .mark .gpu_test
2256
2283
def test_compare_to_io_binding (self , model_arch ):
2257
2284
model_args = {"test_name" : model_arch , "model_arch" : model_arch }
2258
2285
self ._setup (model_args )
@@ -2277,7 +2304,7 @@ def test_compare_to_io_binding(self, model_arch):
2277
2304
2278
2305
# compare tensor outputs
2279
2306
self .assertTrue (
2280
- torch .allclose (onnx_outputs .logits , io_outputs .logits ),
2307
+ torch .allclose (onnx_outputs .logits , io_outputs .logits , atol = 1e-5 ),
2281
2308
f" Maxdiff: { torch .abs (onnx_outputs .logits - io_outputs .logits ).max ()} " ,
2282
2309
)
2283
2310
@@ -2426,6 +2453,7 @@ def test_pipeline_model_is_none(self):
2426
2453
2427
2454
@parameterized .expand (grid_parameters ({"model_arch" : SUPPORTED_ARCHITECTURES , "use_cache" : [True ]}))
2428
2455
@require_torch_gpu
2456
+ @pytest .mark .gpu_test
2429
2457
def test_pipeline_on_gpu (self , test_name : str , model_arch : str , use_cache : bool ):
2430
2458
model_args = {"test_name" : test_name , "model_arch" : model_arch , "use_cache" : use_cache }
2431
2459
self ._setup (model_args )
@@ -2450,6 +2478,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, use_cache: bool)
2450
2478
# TRT EP compile time can be long, so we don't test all archs
2451
2479
@parameterized .expand (grid_parameters ({"model_arch" : ["t5" ], "use_cache" : [True , False ]}))
2452
2480
@require_torch_gpu
2481
+ @pytest .mark .gpu_test
2453
2482
def test_pipeline_on_trt_execution_provider (self , test_name : str , model_arch : str , use_cache : bool ):
2454
2483
model_args = {"test_name" : test_name , "model_arch" : model_arch , "use_cache" : use_cache }
2455
2484
self ._setup (model_args )
@@ -2537,6 +2566,7 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str):
2537
2566
2538
2567
@parameterized .expand (grid_parameters ({"model_arch" : SUPPORTED_ARCHITECTURES , "use_cache" : [True ]}))
2539
2568
@require_torch_gpu
2569
+ @pytest .mark .gpu_test
2540
2570
def test_compare_to_io_binding (self , test_name : str , model_arch : str , use_cache : bool ):
2541
2571
model_args = {"test_name" : test_name , "model_arch" : model_arch , "use_cache" : use_cache }
2542
2572
self ._setup (model_args )
@@ -2567,6 +2597,7 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache:
2567
2597
2568
2598
@parameterized .expand (grid_parameters ({"model_arch" : SUPPORTED_ARCHITECTURES , "use_cache" : [True ]}))
2569
2599
@require_torch_gpu
2600
+ @pytest .mark .gpu_test
2570
2601
def test_compare_generation_to_io_binding (self , test_name : str , model_arch : str , use_cache : bool ):
2571
2602
model_args = {"test_name" : test_name , "model_arch" : model_arch , "use_cache" : use_cache }
2572
2603
self ._setup (model_args )
@@ -2696,6 +2727,7 @@ def test_pipeline_speech_recognition(self, test_name: str, model_arch: str, use_
2696
2727
2697
2728
@parameterized .expand (grid_parameters ({"model_arch" : SUPPORTED_ARCHITECTURES , "use_cache" : [True ]}))
2698
2729
@require_torch_gpu
2730
+ @pytest .mark .gpu_test
2699
2731
def test_pipeline_on_gpu (self , test_name : str , model_arch : str , use_cache : bool ):
2700
2732
model_args = {"test_name" : test_name , "model_arch" : model_arch , "use_cache" : use_cache }
2701
2733
self ._setup (model_args )
@@ -2761,6 +2793,7 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str):
2761
2793
2762
2794
@parameterized .expand (grid_parameters ({"model_arch" : SUPPORTED_ARCHITECTURES , "use_cache" : [True ]}))
2763
2795
@require_torch_gpu
2796
+ @pytest .mark .gpu_test
2764
2797
def test_compare_to_io_binding (self , test_name : str , model_arch : str , use_cache : bool ):
2765
2798
model_args = {"test_name" : test_name , "model_arch" : model_arch , "use_cache" : use_cache }
2766
2799
self ._setup (model_args )
@@ -2794,6 +2827,7 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache:
2794
2827
2795
2828
@parameterized .expand (grid_parameters ({"model_arch" : SUPPORTED_ARCHITECTURES , "use_cache" : [True ]}))
2796
2829
@require_torch_gpu
2830
+ @pytest .mark .gpu_test
2797
2831
def test_compare_generation_to_io_binding (self , test_name : str , model_arch : str , use_cache : bool ):
2798
2832
model_args = {"test_name" : test_name , "model_arch" : model_arch , "use_cache" : use_cache }
2799
2833
self ._setup (model_args )
@@ -2950,6 +2984,7 @@ def test_pipeline_image_to_text(self, test_name: str, model_arch: str, use_cache
2950
2984
)
2951
2985
)
2952
2986
@require_torch_gpu
2987
+ @pytest .mark .gpu_test
2953
2988
def test_pipeline_on_gpu (self , test_name : str , model_arch : str , use_cache : bool ):
2954
2989
model_args = {
2955
2990
"test_name" : test_name ,
@@ -3049,6 +3084,7 @@ def test_pipeline_ort_model(self, *args, **kwargs):
3049
3084
3050
3085
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_MODEL_ID .items ())
3051
3086
@require_torch_gpu
3087
+ @pytest .mark .gpu_test
3052
3088
def test_pipeline_on_gpu (self , * args , ** kwargs ):
3053
3089
model_arch , model_id = args
3054
3090
onnx_model = ORTModelForCustomTasks .from_pretrained (model_id )
@@ -3071,6 +3107,7 @@ def test_default_pipeline_and_model_device(self, *args, **kwargs):
3071
3107
3072
3108
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_MODEL_ID .items ())
3073
3109
@require_torch_gpu
3110
+ @pytest .mark .gpu_test
3074
3111
def test_compare_to_io_binding (self , * args , ** kwargs ):
3075
3112
model_arch , model_id = args
3076
3113
set_seed (SEED )
0 commit comments