-
Notifications
You must be signed in to change notification settings - Fork 34
/
Copy pathBuildOnWindows.cmake
346 lines (313 loc) · 14.3 KB
/
BuildOnWindows.cmake
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
# Build on Windows
set(TORCH_XPU_OPS_LIBRARIES)
set(SYCL_LINK_LIBRARIES_KEYWORD PRIVATE)
macro(setup_common_libraries)
add_library(
torch_xpu_ops
STATIC
${ATen_XPU_CPP_SRCS})
set(PATH_TO_TORCH_XPU_OPS_ATEN_LIB \"torch_xpu_ops_aten.dll\")
target_compile_options(torch_xpu_ops PRIVATE -DPATH_TO_TORCH_XPU_OPS_ATEN_LIB=${PATH_TO_TORCH_XPU_OPS_ATEN_LIB})
add_library(
torch_xpu_ops_aten
SHARED
${ATen_XPU_MKL_SRCS}
${ATen_XPU_NATIVE_CPP_SRCS}
${ATen_XPU_GEN_SRCS})
install(TARGETS torch_xpu_ops_aten DESTINATION "${TORCH_INSTALL_LIB_DIR}")
target_compile_definitions(torch_xpu_ops_aten PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC torch_xpu)
target_link_libraries(torch_xpu_ops_aten PUBLIC torch_cpu)
target_link_libraries(torch_xpu_ops_aten PUBLIC c10)
endmacro()
if(BUILD_SEPARATE_OPS)
setup_common_libraries()
foreach(sycl_src ${ATen_XPU_SYCL_SRCS})
get_filename_component(name ${sycl_src} NAME_WLE REALPATH)
set(sycl_lib torch-xpu-ops-sycl-${name})
sycl_add_library(
${sycl_lib}
SHARED
SYCL_SOURCES ${sycl_src})
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_lib})
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_lib})
# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
endforeach()
list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops)
list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops_aten)
# Working with the compilers which don't support device code compression, we have to split kernels
# into multiple libraries to meet the bin size limitation.
elseif(BUILD_SPLIT_KERNEL_LIB OR __INTEL_LLVM_COMPILER LESS 20250004 OR ICX_DATE LESS 20241205)
setup_common_libraries()
# Split SYCL kernels into 2 libraries as categories 1) Unary+Binary 2) Others.
set(ATen_XPU_SYCL_BINARY_SRCS)
set(ATen_XPU_SYCL_UNARY_SRCS)
set(ATen_XPU_SYCL_REDUCE_SRCS)
set(ATen_XPU_SYCL_ACTIVATION_SRCS)
set(ATen_XPU_SYCL_FOREACH_SRCS)
set(ATen_XPU_SYCL_TENSOR_SRCS)
set(ATen_XPU_SYCL_NORM_LOSS_SRCS)
set(ATen_XPU_SYCL_POLY_SRCS)
set(ATen_XPU_SYCL_DISTRIBUTION_SRCS)
set(ATen_XPU_SYCL_OTHERS_SRCS)
foreach(sycl_src ${ATen_XPU_SYCL_SRCS})
string(REGEX MATCH "Binary" IS_BINARY ${sycl_src})
string(REGEX MATCH "Unary" IS_UNARY ${sycl_src})
# Resolve cyclic dependences between
# torch_xpu_ops_sycl_unary_binary_kernels.dll and
# torch_xpu_ops_sycl_kernels.dll. Move definition and invoke of kernels
# into a same kernel library. Here we move elementwise kernel pow and copy
# into torch_xpu_ops_sycl_unary_binary_kernels.dll.
string(REGEX MATCH "Pow" IS_POW ${sycl_src})
string(REGEX MATCH "Copy" IS_COPY ${sycl_src})
string(REGEX MATCH "Activation" IS_ACTIVATION ${sycl_src})
string(REGEX MATCH "Foreach" IS_FOREACH ${sycl_src})
string(REGEX MATCH "Reduce" IS_REDUCE ${sycl_src})
string(REGEX MATCH "Tensor" IS_TENSOR ${sycl_src})
string(REGEX MATCH "Norm" IS_NORM ${sycl_src})
string(REGEX MATCH "Loss" IS_LOSS ${sycl_src})
string(REGEX MATCH "Polynomial" IS_POLY ${sycl_src})
#Move resize kernel to Norm and Loss lib, to resolve symbol.
string(REGEX MATCH "Resize" IS_RESIZE ${sycl_src})
string(REGEX MATCH "Distribution" IS_DISTRIBUTION ${sycl_src})
if(NOT IS_FOREACH STREQUAL "")
list(APPEND ATen_XPU_SYCL_FOREACH_SRCS ${sycl_src})
elseif(NOT IS_BINARY STREQUAL "")
list(APPEND ATen_XPU_SYCL_BINARY_SRCS ${sycl_src})
elseif(NOT IS_UNARY STREQUAL "" OR NOT IS_COPY STREQUAL "" OR NOT IS_POW STREQUAL "")
list(APPEND ATen_XPU_SYCL_UNARY_SRCS ${sycl_src})
elseif(NOT IS_REDUCE STREQUAL "")
list(APPEND ATen_XPU_SYCL_REDUCE_SRCS ${sycl_src})
elseif(NOT IS_ACTIVATION STREQUAL "")
list(APPEND ATen_XPU_SYCL_ACTIVATION_SRCS ${sycl_src})
elseif(NOT IS_TENSOR STREQUAL "")
list(APPEND ATen_XPU_SYCL_TENSOR_SRCS ${sycl_src})
elseif(NOT IS_DISTRIBUTION STREQUAL "")
list(APPEND ATen_XPU_SYCL_DISTRIBUTION_SRCS ${sycl_src})
elseif(NOT IS_NORM STREQUAL "" OR NOT IS_LOSS STREQUAL "" OR NOT IS_RESIZE STREQUAL "")
list(APPEND ATen_XPU_SYCL_NORM_LOSS_SRCS ${sycl_src})
elseif(NOT IS_POLY STREQUAL "")
list(APPEND ATen_XPU_SYCL_POLY_SRCS ${sycl_src})
else()
list(APPEND ATen_XPU_SYCL_OTHERS_SRCS ${sycl_src})
endif()
endforeach()
# Binary kernel lib
set(sycl_binary_lib torch_xpu_ops_sycl_binary_kernels)
sycl_add_library(
${sycl_binary_lib}
SHARED
SYCL_SOURCES ${ATen_XPU_SYCL_BINARY_SRCS})
target_compile_definitions(${sycl_binary_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_binary_lib})
target_link_libraries(${sycl_binary_lib} PUBLIC torch_xpu)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_binary_lib})
# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_binary_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
# Unary kernel lib
set(sycl_unary_lib torch_xpu_ops_sycl_unary_kernels)
sycl_add_library(
${sycl_unary_lib}
SHARED
SYCL_SOURCES ${ATen_XPU_SYCL_UNARY_SRCS})
target_compile_definitions(${sycl_unary_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_unary_lib})
target_link_libraries(${sycl_unary_lib} PUBLIC torch_xpu)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_unary_lib})
# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_unary_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
# Reduce kernel lib
set(sycl_reduce_lib torch_xpu_ops_sycl_reduce_kernels)
sycl_add_library(
${sycl_reduce_lib}
SHARED
SYCL_SOURCES ${ATen_XPU_SYCL_REDUCE_SRCS})
target_compile_definitions(${sycl_reduce_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_reduce_lib})
target_link_libraries(${sycl_reduce_lib} PUBLIC torch_xpu)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_reduce_lib})
# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_reduce_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
# Activation kernel lib
set(sycl_activation_lib torch_xpu_ops_sycl_activation_kernels)
sycl_add_library(
${sycl_activation_lib}
SHARED
SYCL_SOURCES ${ATen_XPU_SYCL_ACTIVATION_SRCS})
target_compile_definitions(${sycl_activation_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_activation_lib})
target_link_libraries(${sycl_activation_lib} PUBLIC torch_xpu)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_activation_lib})
# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_activation_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
# Foreach kernel lib
set(sycl_foreach_lib torch_xpu_ops_sycl_foreach_kernels)
sycl_add_library(
${sycl_foreach_lib}
SHARED
SYCL_SOURCES ${ATen_XPU_SYCL_FOREACH_SRCS})
target_compile_definitions(${sycl_foreach_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_foreach_lib})
target_link_libraries(${sycl_foreach_lib} PUBLIC torch_xpu)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_foreach_lib})
# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_foreach_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
# Tensor kernel lib
set(sycl_tensor_lib torch_xpu_ops_sycl_tensor_kernels)
sycl_add_library(
${sycl_tensor_lib}
SHARED
SYCL_SOURCES ${ATen_XPU_SYCL_TENSOR_SRCS})
target_compile_definitions(${sycl_tensor_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_tensor_lib})
target_link_libraries(${sycl_tensor_lib} PUBLIC torch_xpu)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_tensor_lib})
# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_tensor_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
# Norm and Loss kernel lib
set(sycl_norm_loss_lib torch_xpu_ops_sycl_norm_loss_kernels)
sycl_add_library(
${sycl_norm_loss_lib}
SHARED
SYCL_SOURCES ${ATen_XPU_SYCL_NORM_LOSS_SRCS})
target_compile_definitions(${sycl_norm_loss_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_norm_loss_lib})
target_link_libraries(${sycl_norm_loss_lib} PUBLIC torch_xpu)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_norm_loss_lib})
# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_norm_loss_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
# Polynomial kernel lib
set(sycl_poly_lib torch_xpu_ops_sycl_poly_kernels)
sycl_add_library(
${sycl_poly_lib}
SHARED
SYCL_SOURCES ${ATen_XPU_SYCL_POLY_SRCS})
target_compile_definitions(${sycl_poly_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_poly_lib})
target_link_libraries(${sycl_poly_lib} PUBLIC torch_xpu)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_poly_lib})
# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_poly_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
# Distribution kernel lib
set(sycl_dist_lib torch_xpu_ops_sycl_dist_kernels)
sycl_add_library(
${sycl_dist_lib}
SHARED
SYCL_SOURCES ${ATen_XPU_SYCL_DISTRIBUTION_SRCS})
target_compile_definitions(${sycl_dist_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_dist_lib})
target_link_libraries(${sycl_dist_lib} PUBLIC torch_xpu)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_dist_lib})
# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_dist_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
# Other kernel lib
set(sycl_lib torch_xpu_ops_sycl_kernels)
sycl_add_library(
${sycl_lib}
SHARED
SYCL_SOURCES ${ATen_XPU_SYCL_OTHERS_SRCS})
target_compile_definitions(${sycl_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_lib})
target_link_libraries(${sycl_lib} PUBLIC torch_xpu)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_lib})
# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops)
list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops_aten)
else()
# On Windows, it is not possible to combine all obj files into one library
# because the obj files of kernels compiled on Windows are much larger than
# those on Linux. If they are combined into one, the library size will exceed
# 4GB, which conflicts with the size limit of a single library on Windows.
# We will combine the libraries on Windows into one after the compiler is fixed.
add_library(
torch_xpu_ops
STATIC
${ATen_XPU_CPP_SRCS}
${ATen_XPU_MKL_SRCS}
${ATen_XPU_NATIVE_CPP_SRCS}
${ATen_XPU_GEN_SRCS})
target_compile_definitions(torch_xpu_ops PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
# Split SYCL kernels into 2 libraries as categories 1) Common (Unary+Binary+Reduce+Pow+Copy+Activation+Foreach) 2) Others.
set(ATen_XPU_SYCL_COMMON_SRCS)
set(ATen_XPU_SYCL_OTHERS_SRCS)
foreach(sycl_src ${ATen_XPU_SYCL_SRCS})
string(REGEX MATCH "Binary" IS_BINARY ${sycl_src})
string(REGEX MATCH "Unary" IS_UNARY ${sycl_src})
string(REGEX MATCH "Pow" IS_POW ${sycl_src})
string(REGEX MATCH "Copy" IS_COPY ${sycl_src})
string(REGEX MATCH "Reduce" IS_REDUCE ${sycl_src})
string(REGEX MATCH "Activation" IS_ACTIVATION ${sycl_src})
string(REGEX MATCH "Foreach" IS_FOREACH ${sycl_src})
string(REGEX MATCH "Norm" IS_NORM ${sycl_src})
string(REGEX MATCH "Loss" IS_LOSS ${sycl_src})
string(REGEX MATCH "Resize" IS_RESIZE ${sycl_src})
string(REGEX MATCH "Distribution" IS_DISTRIBUTION ${sycl_src})
if(NOT IS_FOREACH STREQUAL "")
list(APPEND ATen_XPU_SYCL_COMMON_SRCS ${sycl_src})
elseif(NOT IS_REDUCE STREQUAL "")
list(APPEND ATen_XPU_SYCL_COMMON_SRCS ${sycl_src})
elseif(NOT IS_UNARY STREQUAL "" OR NOT IS_BINARY STREQUAL "")
list(APPEND ATen_XPU_SYCL_COMMON_SRCS ${sycl_src})
elseif(NOT IS_COPY STREQUAL "" OR NOT IS_POW STREQUAL "")
list(APPEND ATen_XPU_SYCL_COMMON_SRCS ${sycl_src})
elseif(NOT IS_ACTIVATION STREQUAL "")
list(APPEND ATen_XPU_SYCL_COMMON_SRCS ${sycl_src})
elseif(NOT IS_NORM STREQUAL "")
list(APPEND ATen_XPU_SYCL_COMMON_SRCS ${sycl_src})
elseif(NOT IS_LOSS STREQUAL "")
list(APPEND ATen_XPU_SYCL_COMMON_SRCS ${sycl_src})
elseif(NOT IS_RESIZE STREQUAL "")
list(APPEND ATen_XPU_SYCL_COMMON_SRCS ${sycl_src})
elseif(NOT IS_DISTRIBUTION STREQUAL "")
list(APPEND ATen_XPU_SYCL_COMMON_SRCS ${sycl_src})
else()
list(APPEND ATen_XPU_SYCL_OTHERS_SRCS ${sycl_src})
endif()
endforeach()
# Common kernel lib
set(sycl_common_lib torch_xpu_ops_sycl_common_kernels)
sycl_add_library(
${sycl_common_lib}
STATIC
SYCL_SOURCES ${ATen_XPU_SYCL_COMMON_SRCS})
target_compile_definitions(${sycl_common_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_common_lib})
# Other kernel lib
set(sycl_lib torch_xpu_ops_sycl_kernels)
sycl_add_library(
${sycl_lib}
STATIC
SYCL_SOURCES ${ATen_XPU_SYCL_OTHERS_SRCS})
target_compile_definitions(${sycl_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_lib})
target_link_libraries(torch_xpu_ops
PUBLIC
${sycl_common_lib}
${sycl_lib}
)
target_link_options(torch_xpu_ops PUBLIC
"-WHOLEARCHIVE:$<TARGET_FILE:${sycl_common_lib}>"
"-WHOLEARCHIVE:$<TARGET_FILE:${sycl_lib}>"
)
list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops)
endif()
set(SYCL_LINK_LIBRARIES_KEYWORD)
foreach(lib ${TORCH_XPU_OPS_LIBRARIES})
# Align with PyTorch compile options PYTORCH_SRC_DIR/cmake/public/utils.cmake
torch_compile_options(${lib})
target_compile_options_if_supported(${lib} "-Wno-deprecated-copy")
target_compile_options(${lib} PRIVATE ${TORCH_XPU_OPS_FLAGS})
target_include_directories(${lib} PUBLIC ${TORCH_XPU_OPS_INCLUDE_DIRS})
target_include_directories(${lib} PUBLIC ${ATen_XPU_INCLUDE_DIRS})
target_include_directories(${lib} PUBLIC ${SYCL_INCLUDE_DIR})
target_link_libraries(${lib} PUBLIC ${SYCL_LIBRARY})
target_link_libraries(${lib} PUBLIC c10_xpu)
target_link_libraries(${lib} PUBLIC torch_cpu)
endforeach()
if(USE_ONEMKL)
target_compile_options(torch_xpu_ops PRIVATE "-DUSE_ONEMKL")
target_include_directories(torch_xpu_ops PUBLIC ${TORCH_XPU_OPS_ONEMKL_INCLUDE_DIR})
target_link_libraries(torch_xpu_ops PUBLIC ${TORCH_XPU_OPS_ONEMKL_LIBRARIES})
endif()