|
7 | 7 | import torch
|
8 | 8 | from torch._inductor import config
|
9 | 9 | from torch._inductor.test_case import TestCase as InductorTestCase
|
| 10 | +from torch._inductor.utils import is_gpu |
10 | 11 | from torch.testing._internal.common_device_type import (
|
11 | 12 | get_desired_device_type_test_bases,
|
12 | 13 | )
|
13 | 14 | from torch.testing._internal.common_utils import slowTest, TEST_WITH_ASAN
|
14 |
| -from torch.testing._internal.inductor_utils import HAS_CUDA |
| 15 | +from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU |
15 | 16 |
|
16 | 17 |
|
17 | 18 | try:
|
|
38 | 39 | raise
|
39 | 40 |
|
40 | 41 |
|
41 |
| -_desired_test_bases = get_desired_device_type_test_bases() |
42 |
| -RUN_CUDA = ( |
43 |
| - HAS_CUDA |
44 |
| - and any(getattr(x, "device_type", "") == "cuda" for x in _desired_test_bases) |
| 42 | +_desired_test_bases = get_desired_device_type_test_bases(allow_xpu=True) |
| 43 | +RUN_GPU = ( |
| 44 | + HAS_GPU |
| 45 | + and any(is_gpu(getattr(x, "device_type", "")) for x in _desired_test_bases) |
45 | 46 | and not TEST_WITH_ASAN
|
46 | 47 | )
|
47 | 48 |
|
48 | 49 |
|
49 |
| -class CudaWrapperTemplate: |
| 50 | +class GpuWrapperTemplate: |
50 | 51 | pass
|
51 | 52 |
|
52 | 53 |
|
53 |
| -class TestCudaWrapper(InductorTestCase): |
54 |
| - device = "cuda" |
| 54 | +class TestGpuWrapper(InductorTestCase): |
| 55 | + device = GPU_TYPE |
55 | 56 |
|
56 | 57 |
|
57 |
| -class DynamicShapesCudaWrapperCudaTests(InductorTestCase): |
58 |
| - device = "cuda" |
| 58 | +class DynamicShapesGpuWrapperGpuTests(InductorTestCase): |
| 59 | + device = GPU_TYPE |
59 | 60 |
|
60 | 61 |
|
61 |
| -test_failures_cuda_wrapper = { |
| 62 | +test_failures_gpu_wrapper = { |
62 | 63 | "test_mm_plus_mm2_cuda_dynamic_shapes": test_torchinductor.TestFailure(
|
63 |
| - ("cuda_wrapper",), is_skip=True |
| 64 | + ("gpu_wrapper",), is_skip=True |
| 65 | + ), |
| 66 | + "test_randint_xpu": test_torchinductor.TestFailure(("gpu_wrapper",), is_skip=False), |
| 67 | + "test_randint_xpu_dynamic_shapes": test_torchinductor.TestFailure( |
| 68 | + ("gpu_wrapper",), is_skip=False |
| 69 | + ), |
| 70 | + # ATen ops: scaled_dot_product_efficient_attention not implemented on XPU. |
| 71 | + "test_scaled_dot_product_efficient_attention_xpu": test_torchinductor.TestFailure( |
| 72 | + ("gpu_wrapper",), is_skip=False |
| 73 | + ), |
| 74 | + "test_scaled_dot_product_efficient_attention_xpu_dynamic_shapes": test_torchinductor.TestFailure( |
| 75 | + ("gpu_wrapper",), is_skip=False |
64 | 76 | ),
|
65 | 77 | }
|
66 | 78 |
|
@@ -114,20 +126,34 @@ def fn(self):
|
114 | 126 | fn.__dict__ = copy.deepcopy(func.__dict__)
|
115 | 127 | if condition:
|
116 | 128 | setattr(
|
117 |
| - CudaWrapperTemplate, |
| 129 | + GpuWrapperTemplate, |
118 | 130 | test_name,
|
119 | 131 | fn,
|
120 | 132 | )
|
121 | 133 |
|
122 | 134 |
|
123 |
| -if RUN_CUDA: |
| 135 | +if RUN_GPU: |
124 | 136 |
|
125 | 137 | class BaseTest(NamedTuple):
|
126 | 138 | name: str
|
127 |
| - device: str = "cuda" |
| 139 | + device: str = GPU_TYPE |
128 | 140 | tests: InductorTestCase = test_torchinductor.GPUTests()
|
129 | 141 | check_code: bool = True
|
130 | 142 |
|
| 143 | + # XPU Not implemented yet |
| 144 | + XPU_BASE_TEST_SKIP = [ |
| 145 | + "test_foreach_cpp_wrapper", |
| 146 | + "test_enable_dynamic_shapes_cpp_wrapper", |
| 147 | + "test_dynamic_shapes_persistent_reduction_mixed_x_dim", |
| 148 | + "test_cat_slice_cat", |
| 149 | + "test_mm_plus_mm2", |
| 150 | + "test_mm_plus_mm3", |
| 151 | + "test_addmm", |
| 152 | + "test_linear_relu", |
| 153 | + "test_fft_real_input", |
| 154 | + "test_fft_real_input_real_output", |
| 155 | + ] |
| 156 | + |
131 | 157 | # Maintain two separate test lists for cuda and cpp for now
|
132 | 158 | for item in [
|
133 | 159 | BaseTest("test_add_complex"),
|
@@ -236,40 +262,41 @@ class BaseTest(NamedTuple):
|
236 | 262 | tests=test_select_algorithm.TestSelectAlgorithm(),
|
237 | 263 | ),
|
238 | 264 | ]:
|
| 265 | + if item.device == "xpu" and item.name in XPU_BASE_TEST_SKIP: |
| 266 | + continue |
239 | 267 | make_test_case(item.name, item.device, item.tests, check_code=item.check_code)
|
240 | 268 |
|
241 | 269 | from torch._inductor.utils import is_big_gpu
|
242 | 270 |
|
243 |
| - if is_big_gpu(0): |
| 271 | + if GPU_TYPE == "cuda" and is_big_gpu(0): |
244 | 272 | skip_list = ["test_addmm", "test_linear_relu"]
|
245 | 273 | # need to skip instead of omit, otherwise fbcode ci can be flaky
|
246 | 274 | for test_name in skip_list:
|
247 |
| - test_failures_cuda_wrapper[ |
| 275 | + test_failures_gpu_wrapper[ |
248 | 276 | f"{test_name}_cuda"
|
249 |
| - ] = test_torchinductor.TestFailure(("cuda_wrapper",), is_skip=True) |
250 |
| - test_failures_cuda_wrapper[ |
251 |
| - f"{test_name}_cuda_dynamic_shapes" |
252 |
| - ] = test_torchinductor.TestFailure(("cuda_wrapper",), is_skip=True) |
| 277 | + ] = test_torchinductor.TestFailure(("gpu_wrapper",), is_skip=True) |
| 278 | + test_failures_gpu_wrapper[ |
| 279 | + f"{test_name}_gpu_dynamic_shapes" |
| 280 | + ] = test_torchinductor.TestFailure(("gpu_wrapper",), is_skip=True) |
253 | 281 |
|
254 | 282 | test_torchinductor.copy_tests(
|
255 |
| - CudaWrapperTemplate, TestCudaWrapper, "cuda_wrapper", test_failures_cuda_wrapper |
| 283 | + GpuWrapperTemplate, TestGpuWrapper, "gpu_wrapper", test_failures_gpu_wrapper |
256 | 284 | )
|
257 | 285 |
|
258 |
| - DynamicShapesCudaWrapperTemplate = ( |
259 |
| - test_torchinductor_dynamic_shapes.make_dynamic_cls(CudaWrapperTemplate) |
| 286 | + DynamicShapesGpuWrapperTemplate = ( |
| 287 | + test_torchinductor_dynamic_shapes.make_dynamic_cls(GpuWrapperTemplate) |
260 | 288 | )
|
261 | 289 |
|
262 | 290 | test_torchinductor.copy_tests(
|
263 |
| - DynamicShapesCudaWrapperTemplate, |
264 |
| - DynamicShapesCudaWrapperCudaTests, |
265 |
| - "cuda_wrapper", |
266 |
| - test_failures_cuda_wrapper, |
| 291 | + DynamicShapesGpuWrapperTemplate, |
| 292 | + DynamicShapesGpuWrapperGpuTests, |
| 293 | + "gpu_wrapper", |
| 294 | + test_failures_gpu_wrapper, |
267 | 295 | xfail_prop="_expected_failure_dynamic_wrapper",
|
268 | 296 | )
|
269 | 297 |
|
270 | 298 | if __name__ == "__main__":
|
271 | 299 | from torch._inductor.test_case import run_tests
|
272 | 300 |
|
273 |
| - print(f"FS: run_cuda {RUN_CUDA}") |
274 |
| - if RUN_CUDA: |
| 301 | + if RUN_GPU: |
275 | 302 | run_tests(needs="filelock")
|
0 commit comments