Skip to content

Commit 8671303

Browse files
authored
Fix GPU tests (#758)
fix tests
1 parent 4d3ec82 commit 8671303

File tree

3 files changed

+42
-5
lines changed

3 files changed

+42
-5
lines changed

optimum/onnxruntime/io_binding/io_binding_helper.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,9 @@ def prepare_io_binding(ort_model: "ORTModel", **inputs) -> ort.IOBinding:
158158
Returns an IOBinding object for an inference session. This method is for general purpose, if the inputs and outputs
159159
are determined, you can prepare data buffers directly to avoid tensor transfers across frameworks.
160160
"""
161-
if not all(input_name in inputs.keys() for input_name in ort_model.input_names):
161+
if not all(input_name in inputs.keys() for input_name in ort_model.inputs_names):
162162
raise ValueError(
163-
f"The ONNX model takes {ort_model.input_names.keys()} as inputs, but only {inputs.keys()} are given."
163+
f"The ONNX model takes {ort_model.inputs_names.keys()} as inputs, but only {inputs.keys()} are given."
164164
)
165165

166166
name_to_np_type = TypeHelper.get_io_numpy_type_map(ort_model.model)
@@ -169,7 +169,7 @@ def prepare_io_binding(ort_model: "ORTModel", **inputs) -> ort.IOBinding:
169169
io_binding = ort_model.model.io_binding()
170170

171171
# Bind inputs
172-
for input_name in ort_model.input_names:
172+
for input_name in ort_model.inputs_names:
173173
onnx_input = inputs.pop(input_name)
174174
onnx_input = onnx_input.contiguous()
175175

tests/onnxruntime/Dockerfile_onnxruntime_gpu

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@ RUN pip install datasets evaluate scipy
2323
COPY . /workspace/optimum
2424
RUN pip install /workspace/optimum[onnxruntime-gpu,tests]
2525

26-
CMD python3 -m unittest discover -s onnxruntime -p 'test_*.py'
26+
CMD pytest onnxruntime/test_*.py --durations=0 -s -m gpu_test

tests/onnxruntime/test_modeling.py

+38-1
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ def test_load_seq2seq_model_from_empty_cache(self):
290290
_ = ORTModelForSeq2SeqLM.from_pretrained(self.TINY_ONNX_SEQ2SEQ_MODEL_ID, local_files_only=True)
291291

292292
@require_torch_gpu
293+
@pytest.mark.gpu_test
293294
def test_load_model_cuda_provider(self):
294295
model = ORTModel.from_pretrained(self.ONNX_MODEL_ID, provider="CUDAExecutionProvider")
295296
self.assertListEqual(model.providers, ["CUDAExecutionProvider", "CPUExecutionProvider"])
@@ -321,6 +322,7 @@ def test_load_seq2seq_model_without_past_from_hub(self):
321322
self.assertIsInstance(model.config, PretrainedConfig)
322323

323324
@require_torch_gpu
325+
@pytest.mark.gpu_test
324326
def test_load_seq2seq_model_cuda_provider(self):
325327
model = ORTModelForSeq2SeqLM.from_pretrained(self.ONNX_SEQ2SEQ_MODEL_ID, provider="CUDAExecutionProvider")
326328
self.assertListEqual(model.providers, ["CUDAExecutionProvider", "CPUExecutionProvider"])
@@ -419,6 +421,7 @@ def test_missing_execution_provider(self):
419421
"""
420422

421423
@require_torch_gpu
424+
@pytest.mark.gpu_test
422425
def test_model_on_gpu(self):
423426
model = ORTModel.from_pretrained(self.ONNX_MODEL_ID)
424427
gpu = torch.device("cuda")
@@ -428,6 +431,7 @@ def test_model_on_gpu(self):
428431

429432
# test string device input for to()
430433
@require_torch_gpu
434+
@pytest.mark.gpu_test
431435
def test_model_on_gpu_str(self):
432436
model = ORTModel.from_pretrained(self.ONNX_MODEL_ID)
433437
gpu = torch.device("cuda")
@@ -449,6 +453,7 @@ def test_passing_session_options_seq2seq(self):
449453
self.assertEqual(model.decoder.session.get_session_options().intra_op_num_threads, 3)
450454

451455
@require_torch_gpu
456+
@pytest.mark.gpu_test
452457
def test_passing_provider_options(self):
453458
model = ORTModel.from_pretrained(self.ONNX_MODEL_ID, provider="CUDAExecutionProvider")
454459
self.assertEqual(model.model.get_provider_options()["CUDAExecutionProvider"]["do_copy_in_default_stream"], "1")
@@ -490,6 +495,7 @@ def test_model_on_gpu_id(self):
490495
self.assertEqual(model.model.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1")
491496

492497
@require_torch_gpu
498+
@pytest.mark.gpu_test
493499
def test_passing_provider_options_seq2seq(self):
494500
model = ORTModelForSeq2SeqLM.from_pretrained(self.ONNX_SEQ2SEQ_MODEL_ID, provider="CUDAExecutionProvider")
495501
self.assertEqual(
@@ -590,6 +596,7 @@ def test_seq2seq_model_on_cpu_str(self):
590596
self.assertListEqual(model.providers, ["CPUExecutionProvider"])
591597

592598
@require_torch_gpu
599+
@pytest.mark.gpu_test
593600
def test_seq2seq_model_on_gpu(self):
594601
model = ORTModelForSeq2SeqLM.from_pretrained(self.ONNX_SEQ2SEQ_MODEL_ID, use_cache=True)
595602
gpu = torch.device("cuda")
@@ -631,6 +638,7 @@ def test_seq2seq_model_on_gpu_id(self):
631638

632639
# test string device input for to()
633640
@require_torch_gpu
641+
@pytest.mark.gpu_test
634642
def test_seq2seq_model_on_gpu_str(self):
635643
model = ORTModelForSeq2SeqLM.from_pretrained(self.ONNX_SEQ2SEQ_MODEL_ID, use_cache=True)
636644
model.to("cuda")
@@ -988,6 +996,7 @@ def test_pipeline_model_is_none(self):
988996
)
989997
)
990998
@require_torch_gpu
999+
@pytest.mark.gpu_test
9911000
def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
9921001
if provider == "TensorrtExecutionProvider" and model_arch != self.__class__.SUPPORTED_ARCHITECTURES[0]:
9931002
self.skipTest("testing a single arch for TensorrtExecutionProvider")
@@ -1012,6 +1021,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
10121021

10131022
@parameterized.expand(SUPPORTED_ARCHITECTURES)
10141023
@require_torch_gpu
1024+
@pytest.mark.gpu_test
10151025
def test_compare_to_io_binding(self, model_arch):
10161026
model_args = {"test_name": model_arch, "model_arch": model_arch}
10171027
self._setup(model_args)
@@ -1139,6 +1149,7 @@ def test_pipeline_model_is_none(self):
11391149

11401150
@parameterized.expand(SUPPORTED_ARCHITECTURES)
11411151
@require_torch_gpu
1152+
@pytest.mark.gpu_test
11421153
def test_pipeline_on_gpu(self, model_arch):
11431154
model_args = {"test_name": model_arch, "model_arch": model_arch}
11441155
self._setup(model_args)
@@ -1160,6 +1171,7 @@ def test_pipeline_on_gpu(self, model_arch):
11601171

11611172
@parameterized.expand(SUPPORTED_ARCHITECTURES)
11621173
@require_torch_gpu
1174+
@pytest.mark.gpu_test
11631175
def test_compare_to_io_binding(self, model_arch):
11641176
model_args = {"test_name": model_arch, "model_arch": model_arch}
11651177
self._setup(model_args)
@@ -1298,6 +1310,7 @@ def test_pipeline_model_is_none(self):
12981310
)
12991311
)
13001312
@require_torch_gpu
1313+
@pytest.mark.gpu_test
13011314
def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
13021315
if provider == "TensorrtExecutionProvider" and model_arch != self.__class__.SUPPORTED_ARCHITECTURES[0]:
13031316
self.skipTest("testing a single arch for TensorrtExecutionProvider")
@@ -1340,6 +1353,7 @@ def test_pipeline_zero_shot_classification(self):
13401353

13411354
@parameterized.expand(SUPPORTED_ARCHITECTURES)
13421355
@require_torch_gpu
1356+
@pytest.mark.gpu_test
13431357
def test_compare_to_io_binding(self, model_arch):
13441358
model_args = {"test_name": model_arch, "model_arch": model_arch}
13451359
self._setup(model_args)
@@ -1463,6 +1477,7 @@ def test_pipeline_model_is_none(self):
14631477
)
14641478
)
14651479
@require_torch_gpu
1480+
@pytest.mark.gpu_test
14661481
def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
14671482
if provider == "TensorrtExecutionProvider" and model_arch != self.__class__.SUPPORTED_ARCHITECTURES[0]:
14681483
self.skipTest("testing a single arch for TensorrtExecutionProvider")
@@ -1487,6 +1502,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
14871502

14881503
@parameterized.expand(SUPPORTED_ARCHITECTURES)
14891504
@require_torch_gpu
1505+
@pytest.mark.gpu_test
14901506
def test_compare_to_io_binding(self, model_arch):
14911507
model_args = {"test_name": model_arch, "model_arch": model_arch}
14921508
self._setup(model_args)
@@ -1583,6 +1599,7 @@ def test_pipeline_model_is_none(self):
15831599
)
15841600
)
15851601
@require_torch_gpu
1602+
@pytest.mark.gpu_test
15861603
def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
15871604
if provider == "TensorrtExecutionProvider" and model_arch != self.__class__.SUPPORTED_ARCHITECTURES[0]:
15881605
self.skipTest("testing a single arch for TensorrtExecutionProvider")
@@ -1605,6 +1622,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
16051622

16061623
@parameterized.expand(SUPPORTED_ARCHITECTURES)
16071624
@require_torch_gpu
1625+
@pytest.mark.gpu_test
16081626
def test_compare_to_io_binding(self, model_arch):
16091627
model_args = {"test_name": model_arch, "model_arch": model_arch}
16101628
self._setup(model_args)
@@ -1699,6 +1717,7 @@ def test_compare_to_transformers(self, model_arch):
16991717

17001718
@parameterized.expand(SUPPORTED_ARCHITECTURES)
17011719
@require_torch_gpu
1720+
@pytest.mark.gpu_test
17021721
def test_compare_to_io_binding(self, model_arch):
17031722
model_args = {"test_name": model_arch, "model_arch": model_arch}
17041723
self._setup(model_args)
@@ -1849,6 +1868,7 @@ def test_pipeline_model_is_none(self):
18491868

18501869
@parameterized.expand(grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True]}))
18511870
@require_torch_gpu
1871+
@pytest.mark.gpu_test
18521872
def test_pipeline_on_gpu(self, test_name: str, model_arch: str, use_cache: bool):
18531873
model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache}
18541874
self._setup(model_args)
@@ -1871,6 +1891,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, use_cache: bool)
18711891
# TRT EP compile time can be long, so we don't test all archs
18721892
@parameterized.expand(grid_parameters({"model_arch": ["gpt2"], "use_cache": [True, False]}))
18731893
@require_torch_gpu
1894+
@pytest.mark.gpu_test
18741895
def test_pipeline_on_trt_execution_provider(self, test_name: str, model_arch: str, use_cache: bool):
18751896
model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache}
18761897
self._setup(model_args)
@@ -1953,6 +1974,7 @@ def test_compare_with_and_without_past_key_values(self, model_arch):
19531974

19541975
@parameterized.expand(grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True]}))
19551976
@require_torch_gpu
1977+
@pytest.mark.gpu_test
19561978
def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache: bool):
19571979
model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache}
19581980
self._setup(model_args)
@@ -1976,6 +1998,7 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache:
19761998

19771999
@parameterized.expand(grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True]}))
19782000
@require_torch_gpu
2001+
@pytest.mark.gpu_test
19792002
def test_compare_generation_to_io_binding(self, test_name: str, model_arch: str, use_cache: bool):
19802003
model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache}
19812004
self._setup(model_args)
@@ -2094,6 +2117,7 @@ def test_pipeline_model_is_none(self):
20942117
)
20952118
)
20962119
@require_torch_gpu
2120+
@pytest.mark.gpu_test
20972121
def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
20982122
if provider == "TensorrtExecutionProvider" and model_arch != self.__class__.SUPPORTED_ARCHITECTURES[0]:
20992123
self.skipTest("testing a single arch for TensorrtExecutionProvider")
@@ -2120,6 +2144,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
21202144

21212145
@parameterized.expand(SUPPORTED_ARCHITECTURES)
21222146
@require_torch_gpu
2147+
@pytest.mark.gpu_test
21232148
def test_compare_to_io_binding(self, model_arch):
21242149
model_args = {"test_name": model_arch, "model_arch": model_arch}
21252150
self._setup(model_args)
@@ -2227,6 +2252,7 @@ def test_pipeline_model_is_none(self):
22272252
)
22282253
)
22292254
@require_torch_gpu
2255+
@pytest.mark.gpu_test
22302256
def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
22312257
if provider == "TensorrtExecutionProvider" and model_arch != self.__class__.SUPPORTED_ARCHITECTURES[0]:
22322258
self.skipTest("testing a single arch for TensorrtExecutionProvider")
@@ -2253,6 +2279,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str):
22532279

22542280
@parameterized.expand(SUPPORTED_ARCHITECTURES)
22552281
@require_torch_gpu
2282+
@pytest.mark.gpu_test
22562283
def test_compare_to_io_binding(self, model_arch):
22572284
model_args = {"test_name": model_arch, "model_arch": model_arch}
22582285
self._setup(model_args)
@@ -2277,7 +2304,7 @@ def test_compare_to_io_binding(self, model_arch):
22772304

22782305
# compare tensor outputs
22792306
self.assertTrue(
2280-
torch.allclose(onnx_outputs.logits, io_outputs.logits),
2307+
torch.allclose(onnx_outputs.logits, io_outputs.logits, atol=1e-5),
22812308
f" Maxdiff: {torch.abs(onnx_outputs.logits - io_outputs.logits).max()}",
22822309
)
22832310

@@ -2426,6 +2453,7 @@ def test_pipeline_model_is_none(self):
24262453

24272454
@parameterized.expand(grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True]}))
24282455
@require_torch_gpu
2456+
@pytest.mark.gpu_test
24292457
def test_pipeline_on_gpu(self, test_name: str, model_arch: str, use_cache: bool):
24302458
model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache}
24312459
self._setup(model_args)
@@ -2450,6 +2478,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, use_cache: bool)
24502478
# TRT EP compile time can be long, so we don't test all archs
24512479
@parameterized.expand(grid_parameters({"model_arch": ["t5"], "use_cache": [True, False]}))
24522480
@require_torch_gpu
2481+
@pytest.mark.gpu_test
24532482
def test_pipeline_on_trt_execution_provider(self, test_name: str, model_arch: str, use_cache: bool):
24542483
model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache}
24552484
self._setup(model_args)
@@ -2537,6 +2566,7 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str):
25372566

25382567
@parameterized.expand(grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True]}))
25392568
@require_torch_gpu
2569+
@pytest.mark.gpu_test
25402570
def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache: bool):
25412571
model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache}
25422572
self._setup(model_args)
@@ -2567,6 +2597,7 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache:
25672597

25682598
@parameterized.expand(grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True]}))
25692599
@require_torch_gpu
2600+
@pytest.mark.gpu_test
25702601
def test_compare_generation_to_io_binding(self, test_name: str, model_arch: str, use_cache: bool):
25712602
model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache}
25722603
self._setup(model_args)
@@ -2696,6 +2727,7 @@ def test_pipeline_speech_recognition(self, test_name: str, model_arch: str, use_
26962727

26972728
@parameterized.expand(grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True]}))
26982729
@require_torch_gpu
2730+
@pytest.mark.gpu_test
26992731
def test_pipeline_on_gpu(self, test_name: str, model_arch: str, use_cache: bool):
27002732
model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache}
27012733
self._setup(model_args)
@@ -2761,6 +2793,7 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str):
27612793

27622794
@parameterized.expand(grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True]}))
27632795
@require_torch_gpu
2796+
@pytest.mark.gpu_test
27642797
def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache: bool):
27652798
model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache}
27662799
self._setup(model_args)
@@ -2794,6 +2827,7 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache:
27942827

27952828
@parameterized.expand(grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True]}))
27962829
@require_torch_gpu
2830+
@pytest.mark.gpu_test
27972831
def test_compare_generation_to_io_binding(self, test_name: str, model_arch: str, use_cache: bool):
27982832
model_args = {"test_name": test_name, "model_arch": model_arch, "use_cache": use_cache}
27992833
self._setup(model_args)
@@ -2950,6 +2984,7 @@ def test_pipeline_image_to_text(self, test_name: str, model_arch: str, use_cache
29502984
)
29512985
)
29522986
@require_torch_gpu
2987+
@pytest.mark.gpu_test
29532988
def test_pipeline_on_gpu(self, test_name: str, model_arch: str, use_cache: bool):
29542989
model_args = {
29552990
"test_name": test_name,
@@ -3049,6 +3084,7 @@ def test_pipeline_ort_model(self, *args, **kwargs):
30493084

30503085
@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items())
30513086
@require_torch_gpu
3087+
@pytest.mark.gpu_test
30523088
def test_pipeline_on_gpu(self, *args, **kwargs):
30533089
model_arch, model_id = args
30543090
onnx_model = ORTModelForCustomTasks.from_pretrained(model_id)
@@ -3071,6 +3107,7 @@ def test_default_pipeline_and_model_device(self, *args, **kwargs):
30713107

30723108
@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_MODEL_ID.items())
30733109
@require_torch_gpu
3110+
@pytest.mark.gpu_test
30743111
def test_compare_to_io_binding(self, *args, **kwargs):
30753112
model_arch, model_id = args
30763113
set_seed(SEED)

0 commit comments

Comments
 (0)