Skip to content

Commit b275be6

Browse files
authored
install codegen header to torch/include (#1405)
# Motivation This PR addresses a code generation issue related to XPU. Currently, there are two separate codegen paths for XPU: 1. **Stock PyTorch** – Generates code for oneDNN ops. 2. **torch-xpu-ops** – Generates code for SYCL kernel ops. The corresponding build directories are: 1. `build/aten/src/ATen` (for stock PyTorch) 2. `build/xpu/ATen` (for torch-xpu-ops) However, in the torch-xpu-ops codegen, we mistakenly omitted installing XPU op headers from `build/xpu/ATen/ops` to `build/aten/src/ATen/ops`. This PR fixes the issue and also removes some unnecessary code for better maintainability. # Solution We copy the codegen from torch-xpu-ops to stock PyTorch # Additional Context Fix pytorch/pytorch#145902
1 parent b8c05de commit b275be6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+300
-155
lines changed

cmake/Codegen.cmake

+37-9
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ endif()
44
set(Codegen_XPU_cmake_included true)
55

66
set(BUILD_TORCH_XPU_ATEN_GENERATED "${CMAKE_BINARY_DIR}/xpu/ATen")
7+
set(BUILD_TORCH_ATEN_GENERATED "${CMAKE_BINARY_DIR}/aten/src/ATen")
78
file(MAKE_DIRECTORY ${BUILD_TORCH_XPU_ATEN_GENERATED})
89

910
set(RegisterXPU_GENERATED ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp)
@@ -49,6 +50,38 @@ function(GEN_XPU file_yaml)
4950
--xpu
5051
)
5152

53+
set(XPU_INSTALL_HEADER_COMMAND
54+
"${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/install_xpu_headers.py
55+
--src-header-dir ${BUILD_TORCH_XPU_ATEN_GENERATED}
56+
--dst-header-dir ${BUILD_TORCH_ATEN_GENERATED}
57+
)
58+
59+
execute_process(
60+
COMMAND
61+
${XPU_CODEGEN_COMMAND}
62+
--generate headers
63+
--dry-run
64+
--output-dependencies ${BUILD_TORCH_XPU_ATEN_GENERATED}/generated_headers.cmake
65+
RESULT_VARIABLE RETURN_VALUE
66+
WORKING_DIRECTORY ${TORCH_ROOT}
67+
)
68+
69+
if(NOT RETURN_VALUE EQUAL 0)
70+
message(FATAL_ERROR "Failed to get generated_headers list")
71+
endif()
72+
73+
execute_process(
74+
COMMAND
75+
${XPU_INSTALL_HEADER_COMMAND}
76+
--dry-run
77+
RESULT_VARIABLE RETURN_VALUE
78+
WORKING_DIRECTORY ${TORCH_ROOT}
79+
)
80+
81+
if(NOT RETURN_VALUE EQUAL 0)
82+
message(FATAL_ERROR "Failed to get XPU header list to install")
83+
endif()
84+
5285
add_custom_command(
5386
COMMENT "Generating XPU ATen Codegen..."
5487
OUTPUT ${generated_files}
@@ -66,14 +99,13 @@ function(GEN_XPU file_yaml)
6699
COMMAND
67100
${REGISTER_FALLBACK_CMD}
68101
# Codegen post-process
69-
COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterXPU_GENERATED}
70-
COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterSparseXPU_GENERATED}
71-
COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterSparseCsrXPU_GENERATED}
72-
COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterNestedTensorXPU_GENERATED}
102+
COMMAND
103+
${XPU_INSTALL_HEADER_COMMAND}
73104
WORKING_DIRECTORY ${TORCH_ROOT}
74105
DEPENDS
75106
${CODEGEN_XPU_YAML_DIR}/native/${file_yaml}
76107
${XPUFallback_TEMPLATE}
108+
${TORCH_XPU_OPS_ROOT}/tools/codegen/install_xpu_headers.py
77109
)
78110

79111
# Post codegen delete the copied templates folder only on Windows.
@@ -99,11 +131,7 @@ GEN_XPU(
99131
${XPU_AOTI_SHIM_SOURCE}
100132
)
101133

102-
# The c_shim_xpu.cpp needs include files in ${CMAKE_BINARY_DIR}/xpu/ATen/ops/*.h)
103-
# The include path is auto generated as "#include <ATen/ops/*.h">
104-
# To follow the design of aoti codegen, here ${CMAKE_BINARY_DIR}/xpu is added to
105-
# $TORCH_XPU_OPS_INCLUDE_DIRS, so that "#include <ATen/ops/*.h>" works.
106-
list(APPEND TORCH_XPU_OPS_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/xpu)
134+
include(${BUILD_TORCH_XPU_ATEN_GENERATED}/xpu_ops_generated_headers.cmake)
107135

108136
list(APPEND xpu_generated_src
109137
${RegisterXPU_GENERATED}

src/ATen/CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,7 @@ set(ATen_XPU_SYCL_SRCS ${ATen_XPU_SYCL_SRCS} PARENT_SCOPE)
1919
foreach(HEADER ${xpu_h})
2020
install(FILES ${HEADER} DESTINATION "${AT_INSTALL_INCLUDE_DIR}/ATen/xpu")
2121
endforeach()
22+
23+
foreach(HEADER ${xpu_ops_generated_headers})
24+
install(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/ops)
25+
endforeach()

src/ATen/native/sparse/xpu/SparseCsrTensorMath.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include <ATen/native/sparse/SparseStubs.h>
22
#include <ATen/native/sparse/xpu/sycl/SparseCsrTensorMathKernels.h>
3-
#include <xpu/ATen/ops/_convert_indices_from_coo_to_csr_native.h>
4-
#include <xpu/ATen/ops/_convert_indices_from_csr_to_coo_native.h>
3+
#include <ATen/ops/_convert_indices_from_coo_to_csr_native.h>
4+
#include <ATen/ops/_convert_indices_from_csr_to_coo_native.h>
55

66
namespace at::native {
77

src/ATen/native/xpu/Activation.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
#include <ATen/native/TensorIterator.h>
88

99
#include <ATen/ops/empty_like.h>
10-
#include <xpu/ATen/ops/empty.h>
11-
#include <xpu/ATen/ops/gelu_backward_native.h>
12-
#include <xpu/ATen/ops/gelu_native.h>
10+
#include <ATen/ops/empty.h>
11+
#include <ATen/ops/gelu_backward_native.h>
12+
#include <ATen/ops/gelu_native.h>
1313

1414
#include <ATen/native/xpu/sycl/ActivationEluKernels.h>
1515
#include <ATen/native/xpu/sycl/ActivationGeluKernel.h>

src/ATen/native/xpu/AdaptiveAveragePooling2d.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88
#include <ATen/ops/mean.h>
99
#include <ATen/ops/zeros_like.h>
10-
#include <xpu/ATen/ops/_adaptive_avg_pool2d_backward_native.h>
11-
#include <xpu/ATen/ops/_adaptive_avg_pool2d_native.h>
10+
#include <ATen/ops/_adaptive_avg_pool2d_backward_native.h>
11+
#include <ATen/ops/_adaptive_avg_pool2d_native.h>
1212

1313
#include <ATen/native/xpu/sycl/AdaptiveAveragePooling2dKernels.h>
1414

src/ATen/native/xpu/AdaptiveAveragePooling3d.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44

55
#include <ATen/ops/empty.h>
66
#include <ATen/ops/empty_like.h>
7-
#include <xpu/ATen/ops/adaptive_avg_pool3d_backward_native.h>
8-
#include <xpu/ATen/ops/adaptive_avg_pool3d_native.h>
7+
#include <ATen/ops/adaptive_avg_pool3d_backward_native.h>
8+
#include <ATen/ops/adaptive_avg_pool3d_native.h>
99

1010
namespace at::native {
1111

src/ATen/native/xpu/AdaptiveMaxPooling2d.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
#include <ATen/native/xpu/sycl/AdaptiveMaxPooling2dKernels.h>
55
#include <comm/RegisterUtils.h>
66

7-
#include <xpu/ATen/ops/adaptive_max_pool2d_backward_native.h>
8-
#include <xpu/ATen/ops/adaptive_max_pool2d_native.h>
7+
#include <ATen/ops/adaptive_max_pool2d_backward_native.h>
8+
#include <ATen/ops/adaptive_max_pool2d_native.h>
99

1010
namespace at {
1111
namespace native {

src/ATen/native/xpu/AdaptiveMaxPooling3d.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
#include <ATen/native/xpu/sycl/AdaptiveMaxPooling3dKernels.h>
55

66
#include <ATen/ops/empty.h>
7-
#include <xpu/ATen/ops/adaptive_max_pool3d_backward_native.h>
8-
#include <xpu/ATen/ops/adaptive_max_pool3d_native.h>
7+
#include <ATen/ops/adaptive_max_pool3d_backward_native.h>
8+
#include <ATen/ops/adaptive_max_pool3d_native.h>
99

1010
namespace at {
1111
namespace native {

src/ATen/native/xpu/AveragePool2d.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
#include <ATen/native/xpu/sycl/AveragePool2dKernels.h>
66
#include <comm/RegisterUtils.h>
77

8-
#include <xpu/ATen/ops/avg_pool2d_backward_native.h>
9-
#include <xpu/ATen/ops/avg_pool2d_native.h>
8+
#include <ATen/ops/avg_pool2d_backward_native.h>
9+
#include <ATen/ops/avg_pool2d_native.h>
1010

1111
namespace at {
1212
namespace native {

src/ATen/native/xpu/AveragePool3d.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#include <ATen/core/Tensor.h>
22
#include <ATen/native/xpu/sycl/AveragePool3dKernels.h>
33

4-
#include <xpu/ATen/ops/avg_pool3d_backward_native.h>
5-
#include <xpu/ATen/ops/avg_pool3d_native.h>
4+
#include <ATen/ops/avg_pool3d_backward_native.h>
5+
#include <ATen/ops/avg_pool3d_native.h>
66

77
namespace at {
88
namespace native {

src/ATen/native/xpu/BinaryOps.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#include <ATen/native/DispatchStub.h>
55
#include <ATen/native/TensorIterator.h>
66

7-
#include <xpu/ATen/ops/add_native.h>
7+
#include <ATen/ops/add_native.h>
88

99
#include <ATen/native/xpu/sycl/BinaryBitwiseOpsKernels.h>
1010
#include <ATen/native/xpu/sycl/BinaryGeometricKernels.h>

src/ATen/native/xpu/Col2Im.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include <ATen/native/xpu/sycl/Col2ImKernel.h>
88

99
#include <comm/xpu_aten.h>
10-
#include <xpu/ATen/ops/col2im_native.h>
10+
#include <ATen/ops/col2im_native.h>
1111

1212
namespace at::native {
1313

src/ATen/native/xpu/DilatedMaxPool2d.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
#include <ATen/native/xpu/sycl/DilatedMaxPool2d.h>
55
#include <comm/RegisterUtils.h>
66

7-
#include <xpu/ATen/ops/max.h>
8-
#include <xpu/ATen/ops/max_pool2d_with_indices_backward_native.h>
9-
#include <xpu/ATen/ops/max_pool2d_with_indices_native.h>
7+
#include <ATen/ops/max.h>
8+
#include <ATen/ops/max_pool2d_with_indices_backward_native.h>
9+
#include <ATen/ops/max_pool2d_with_indices_native.h>
1010

1111
namespace at {
1212
namespace native {

src/ATen/native/xpu/DilatedMaxPool3d.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
#include <ATen/native/xpu/sycl/DilatedMaxPool3d.h>
33

44
#include <ATen/ops/empty.h>
5-
#include <xpu/ATen/ops/max_pool3d_with_indices_backward_native.h>
6-
#include <xpu/ATen/ops/max_pool3d_with_indices_native.h>
5+
#include <ATen/ops/max_pool3d_with_indices_backward_native.h>
6+
#include <ATen/ops/max_pool3d_with_indices_native.h>
77
namespace at {
88
namespace native {
99

src/ATen/native/xpu/Dropout.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
#include <ATen/native/TensorIterator.h>
44
#include <ATen/native/xpu/sycl/DropoutKernels.h>
55

6-
#include <xpu/ATen/ops/native_dropout_backward_native.h>
7-
#include <xpu/ATen/ops/native_dropout_native.h>
6+
#include <ATen/ops/native_dropout_backward_native.h>
7+
#include <ATen/ops/native_dropout_native.h>
88

99
#include <comm/xpu_aten.h>
1010

src/ATen/native/xpu/Embedding.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include <ATen/core/op_registration/adaption.h>
22

3-
#include <xpu/ATen/ops/embedding_dense_backward_native.h>
3+
#include <ATen/ops/embedding_dense_backward_native.h>
44

55
#include <ATen/native/xpu/sycl/EmbeddingKernels.h>
66
#include <comm/xpu_aten.h>

src/ATen/native/xpu/EmbeddingBag.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#include <xpu/ATen/ops/_embedding_bag_forward_only_native.h>
2-
#include <xpu/ATen/ops/_embedding_bag_native.h>
1+
#include <ATen/ops/_embedding_bag_forward_only_native.h>
2+
#include <ATen/ops/_embedding_bag_native.h>
33

44
#include <ATen/native/xpu/sycl/EmbeddingBagKernels.h>
55
#include <comm/xpu_aten.h>

src/ATen/native/xpu/Equal.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include <ATen/NamedTensorUtils.h>
22

3-
#include <xpu/ATen/ops/equal_native.h>
3+
#include <ATen/ops/equal_native.h>
44

55
namespace at {
66
namespace xpu {

src/ATen/native/xpu/ForeachOpScalarList.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
#include <ATen/native/xpu/sycl/ForeachPointwiseOpScalarListKernels.h>
1717
#include <ATen/native/xpu/sycl/ForeachTernaryOpScalarListKernels.h>
1818

19-
#include <xpu/ATen/ops/_foreach_add_native.h>
20-
#include <xpu/ATen/ops/_foreach_mul_native.h>
19+
#include <ATen/ops/_foreach_add_native.h>
20+
#include <ATen/ops/_foreach_mul_native.h>
2121

2222
namespace at {
2323
namespace native {

src/ATen/native/xpu/ForeachReduceOp.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#include <ATen/native/ForeachUtils.h>
22

33
#include <ATen/native/xpu/sycl/ForeachReduceKernels.h>
4-
#include <xpu/ATen/ops/_foreach_max_native.h>
5-
#include <xpu/ATen/ops/_foreach_norm_native.h>
4+
#include <ATen/ops/_foreach_max_native.h>
5+
#include <ATen/ops/_foreach_norm_native.h>
66

77
namespace at {
88
namespace native {

src/ATen/native/xpu/FractionalMaxPool2d.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
#include <ATen/native/cpu/mixed_data_type.h>
44
#include <ATen/native/xpu/sycl/FractionalMaxPool2dKernels.h>
55

6-
#include <xpu/ATen/ops/fractional_max_pool2d_backward_native.h>
7-
#include <xpu/ATen/ops/fractional_max_pool2d_native.h>
6+
#include <ATen/ops/fractional_max_pool2d_backward_native.h>
7+
#include <ATen/ops/fractional_max_pool2d_native.h>
88

99
namespace at::native {
1010

src/ATen/native/xpu/FractionalMaxPool3d.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
#include <ATen/native/xpu/sycl/FractionalMaxPool3dKernels.h>
55
#include <ATen/ops/empty.h>
66

7-
#include <xpu/ATen/ops/fractional_max_pool3d_backward_native.h>
8-
#include <xpu/ATen/ops/fractional_max_pool3d_native.h>
7+
#include <ATen/ops/fractional_max_pool3d_backward_native.h>
8+
#include <ATen/ops/fractional_max_pool3d_native.h>
99

1010
namespace at::native {
1111

src/ATen/native/xpu/Im2Col.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#include <ATen/native/TensorIterator.h>
55
#include <torch/library.h>
66

7-
#include <xpu/ATen/ops/im2col_native.h>
7+
#include <ATen/ops/im2col_native.h>
88

99
#include <ATen/native/xpu/sycl/Im2ColKernel.h>
1010
#include <comm/xpu_aten.h>

src/ATen/native/xpu/Indexing.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#include <comm/xpu_aten.h>
1111

1212
#include <ATen/ops/index.h>
13-
#include <xpu/ATen/ops/index_native.h>
13+
#include <ATen/ops/index_native.h>
1414

1515
namespace at {
1616
namespace native {

src/ATen/native/xpu/LossMultiMargin.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
#include <ATen/native/xpu/sycl/MultiMarginLossKernels.h>
33

44
#include <ATen/ops/empty.h>
5-
#include <xpu/ATen/ops/multi_margin_loss_backward_native.h>
6-
#include <xpu/ATen/ops/multi_margin_loss_native.h>
5+
#include <ATen/ops/multi_margin_loss_backward_native.h>
6+
#include <ATen/ops/multi_margin_loss_native.h>
77

88
namespace at::native {
99

src/ATen/native/xpu/LossNLL.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
#include <comm/RegisterUtils.h>
66
#include <comm/xpu_aten.h>
77

8-
#include <xpu/ATen/ops/nll_loss_backward_native.h>
9-
#include <xpu/ATen/ops/nll_loss_forward_native.h>
8+
#include <ATen/ops/nll_loss_backward_native.h>
9+
#include <ATen/ops/nll_loss_forward_native.h>
1010

1111
namespace at {
1212
namespace native {

src/ATen/native/xpu/PinnedMemoryAllocator.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#include <ATen/xpu/PinnedMemoryAllocator.h>
44
#include <comm/xpu_aten.h>
55

6-
#include <xpu/ATen/ops/is_pinned_native.h>
6+
#include <ATen/ops/is_pinned_native.h>
77

88
namespace at {
99
namespace native {

src/ATen/native/xpu/RangeFactories.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
#include <comm/xpu_aten.h>
1111
#include <torch/library.h>
1212

13-
#include <xpu/ATen/ops/arange_native.h>
14-
#include <xpu/ATen/ops/linspace_native.h>
15-
#include <xpu/ATen/ops/logspace_native.h>
16-
#include <xpu/ATen/ops/range_native.h>
13+
#include <ATen/ops/arange_native.h>
14+
#include <ATen/ops/linspace_native.h>
15+
#include <ATen/ops/logspace_native.h>
16+
#include <ATen/ops/range_native.h>
1717

1818
namespace at {
1919

src/ATen/native/xpu/ReflectionPad.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66

77
#include <ATen/ops/empty.h>
88
#include <ATen/ops/zeros_like.h>
9-
#include <xpu/ATen/ops/reflection_pad1d_backward_native.h>
10-
#include <xpu/ATen/ops/reflection_pad1d_native.h>
11-
#include <xpu/ATen/ops/reflection_pad2d_backward_native.h>
12-
#include <xpu/ATen/ops/reflection_pad2d_native.h>
13-
#include <xpu/ATen/ops/reflection_pad3d_backward_native.h>
14-
#include <xpu/ATen/ops/reflection_pad3d_native.h>
9+
#include <ATen/ops/reflection_pad1d_backward_native.h>
10+
#include <ATen/ops/reflection_pad1d_native.h>
11+
#include <ATen/ops/reflection_pad2d_backward_native.h>
12+
#include <ATen/ops/reflection_pad2d_native.h>
13+
#include <ATen/ops/reflection_pad3d_backward_native.h>
14+
#include <ATen/ops/reflection_pad3d_native.h>
1515
#include "ATen/TensorMeta.h"
1616

1717
namespace at {

src/ATen/native/xpu/ReplicationPadding.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66

77
#include <comm/RegisterUtils.h>
88

9-
#include <xpu/ATen/ops/replication_pad1d_backward_native.h>
10-
#include <xpu/ATen/ops/replication_pad1d_native.h>
11-
#include <xpu/ATen/ops/replication_pad2d_backward_native.h>
12-
#include <xpu/ATen/ops/replication_pad2d_native.h>
13-
#include <xpu/ATen/ops/replication_pad3d_backward_native.h>
14-
#include <xpu/ATen/ops/replication_pad3d_native.h>
9+
#include <ATen/ops/replication_pad1d_backward_native.h>
10+
#include <ATen/ops/replication_pad1d_native.h>
11+
#include <ATen/ops/replication_pad2d_backward_native.h>
12+
#include <ATen/ops/replication_pad2d_native.h>
13+
#include <ATen/ops/replication_pad3d_backward_native.h>
14+
#include <ATen/ops/replication_pad3d_native.h>
1515

1616
namespace at {
1717
namespace native {

0 commit comments

Comments
 (0)