Skip to content

Commit 8384acf

Browse files
authored
[SYCL-TLA] Enable SYCL-TLA build (#2030)
This is a draft PR to enable SYCL-TLA build in torch-xpu-ops so that we can test SYCL-TLA kernels' accuracy/performance in Pytorch when SDPA/GEMM kernels are ready. After discussion with Eikan, we decided to put build logic in torch-xpu-ops while put kernels source code in Pytorch in-tree. Please put your SYCL-TLA kernel source code in Pytorch and set its path as part of `ATen_XPU_SYCLTLA_SRCS` in `torch-xpu-ops/src/ATen/CMakeLists.txt`. Since SYCL-TLA has different compilation options compared with normal SYCL kernels in torch-xpu-ops, I make the logic in `cmake/BuildFlags.cmake` as a macro so that I can reuse the common compilation options. Since there is not a determined plan of how to import sycl-tla repo, I git clone the main branch in cmake for debug convinence. We can pin commit after sycl-tla has first release tag Depend on g++ upgrading to gcc13, otherwise the sycltla kernel won't build
1 parent c8b02b6 commit 8384acf

File tree

8 files changed

+237
-136
lines changed

8 files changed

+237
-136
lines changed

CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ include(${TORCH_XPU_OPS_ROOT}/cmake/SYCL.cmake)
3939
include(${TORCH_XPU_OPS_ROOT}/cmake/ONEMKL.cmake)
4040
include(${TORCH_XPU_OPS_ROOT}/cmake/BuildFlags.cmake)
4141

42+
set_build_flags()
43+
4244
# -- [ Re-generate the macros file for https://github.com/pytorch/pytorch/pull/147161
4345
macro(update_caffe2_macros_file)
4446
configure_file(
@@ -56,6 +58,16 @@ if(USE_XCCL)
5658
endif()
5759
endif()
5860

61+
set(USE_SYCLTLA ON)
62+
if(WIN32 OR NOT ${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 13.0)
63+
set(USE_SYCLTLA OFF)
64+
message(WARNING "SYCL-TLA is not build as it only supports GCC >= 13.0 as CXX Compiler on Linux Platform!")
65+
endif()
66+
67+
if(USE_SYCLTLA)
68+
include(${TORCH_XPU_OPS_ROOT}/cmake/SYCLTLA.cmake)
69+
endif()
70+
5971
if(BUILD_TEST)
6072
add_subdirectory(${TORCH_XPU_OPS_ROOT}/test/sycl ${CMAKE_BINARY_DIR}/test_sycl)
6173
endif()

cmake/BuildFlags.cmake

Lines changed: 148 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -22,142 +22,162 @@ function(CHECK_SYCL_FLAG FLAG VARIABLE_NAME)
2222
file(REMOVE_RECURSE ${TEMP_DIR})
2323
endfunction()
2424

25-
# Support GCC on Linux and MSVC on Windows at the moment.
26-
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
27-
# # -- Host flags (SYCL_CXX_FLAGS)
28-
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
29-
list(APPEND SYCL_HOST_FLAGS /std:c++17)
30-
list(APPEND SYCL_HOST_FLAGS /MD)
31-
list(APPEND SYCL_HOST_FLAGS /EHsc) # exception handling
32-
# SYCL headers warnings
33-
list(APPEND SYCL_HOST_FLAGS /wd4996) # allow usage of deprecated functions
34-
list(APPEND SYCL_HOST_FLAGS /wd4018) # allow signed and unsigned comparison
35-
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
36-
list(APPEND SYCL_HOST_FLAGS -fPIC)
37-
list(APPEND SYCL_HOST_FLAGS -std=c++17)
38-
list(APPEND SYCL_HOST_FLAGS -Wunused-variable)
39-
list(APPEND SYCL_HOST_FLAGS -Wno-interference-size)
40-
# Some versions of DPC++ compiler pass paths to SYCL headers as user include paths (`-I`) rather
41-
# than system paths (`-isystem`). This makes host compiler to report warnings encountered in the
42-
# SYCL headers, such as deprecated warnings, even if warned API is not actually used in the program.
43-
# We expect that this issue will be addressed in the later version of DPC++ compiler. To workaround
44-
# the issue we wrap paths to SYCL headers in `-isystem`.
45-
foreach(FLAGS IN LISTS SYCL_INCLUDE_DIR)
46-
list(APPEND SYCL_HOST_FLAGS "-isystem ${FLAGS}")
47-
endforeach()
48-
# Excluding warnings which flood the compilation output
49-
# TODO: fix warnings in the source code and then reenable them in compilation
50-
list(APPEND SYCL_HOST_FLAGS -Wno-sign-compare)
51-
endif()
25+
macro(set_build_flags)
26+
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
27+
set(SYCL_HOST_FLAGS)
28+
set(SYCL_KERNEL_OPTIONS)
29+
set(SYCL_COMPILE_FLAGS ${SYCL_FLAGS})
30+
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_LINK_FLAGS})
31+
set(SYCL_OFFLINE_COMPILER_AOT_OPTIONS)
32+
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS)
33+
set(SYCL_OFFLINE_COMPILER_FLAGS)
5234

53-
if(CMAKE_BUILD_TYPE MATCHES Debug)
54-
list(APPEND SYCL_HOST_FLAGS -g -fno-omit-frame-pointer -O0)
55-
elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
56-
list(APPEND SYCL_HOST_FLAGS -g -O2)
57-
endif()
58-
if(USE_PER_OPERATOR_HEADERS)
59-
list(APPEND SYCL_HOST_FLAGS -DAT_PER_OPERATOR_HEADERS)
60-
endif()
61-
list(APPEND SYCL_HOST_FLAGS -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER})
62-
# -- Kernel flags (SYCL_KERNEL_OPTIONS)
63-
# The fast-math will be enabled by default in SYCL compiler.
64-
# Refer to [https://clang.llvm.org/docs/UsersManual.html#cmdoption-fno-fast-math]
65-
# 1. We enable below flags here to be warn about NaN and Infinity,
66-
# which will be hidden by fast-math by default.
67-
# 2. The associative-math in fast-math allows floating point
68-
# operations to be reassociated, which will lead to non-deterministic
69-
# results compared with CUDA backend.
70-
# 3. The approx-func allows certain math function calls (such as log, sqrt, pow, etc)
71-
# to be replaced with an approximately equivalent set of instructions or
72-
# alternative math function calls, which have great errors.
73-
#
74-
# PSEUDO of separate compilation with DPCPP compiler.
75-
# 1. Kernel source compilation:
76-
# icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} ${SYCL_FLAGS} -fsycl-host-compiler=gcc -fsycl-host-compiler-options='${CMAKE_HOST_FLAGS}' kernel.cpp -o kernel.o
77-
# 2. Device code linkage:
78-
# icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} -fsycl-link ${SYCL_DEVICE_LINK_FLAGS} -Xs '${SYCL_OFFLINE_COMPILER_FLAGS}' kernel.o -o device-code.o
79-
# 3. Host only source compilation:
80-
# gcc ${CMAKE_HOST_FLAGS} host.cpp -o host.o
81-
# 4. Linkage:
82-
# gcc -shared host.o kernel.o device-code.o -o libxxx.so
83-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-sycl-unnamed-lambda)
84-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -sycl-std=2020)
85-
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
86-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} /fp:strict)
87-
# Suppress warnings about dllexport.
88-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-ignored-attributes)
89-
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
90-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-nans)
91-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-infinities)
92-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-associative-math)
93-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-approx-func)
94-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-absolute-value)
95-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no-ftz)
96-
endif()
35+
if(REPLACE_FLAGS_FOR_SYCLTLA)
36+
set(CPP_STD c++20)
37+
else()
38+
set(CPP_STD c++17)
39+
endif()
40+
# # -- Host flags (SYCL_CXX_FLAGS)
41+
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
42+
list(APPEND SYCL_HOST_FLAGS /std:${CPP_STD})
43+
list(APPEND SYCL_HOST_FLAGS /MD)
44+
list(APPEND SYCL_HOST_FLAGS /EHsc) # exception handling
45+
# SYCL headers warnings
46+
list(APPEND SYCL_HOST_FLAGS /wd4996) # allow usage of deprecated functions
47+
list(APPEND SYCL_HOST_FLAGS /wd4018) # allow signed and unsigned comparison
48+
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
49+
list(APPEND SYCL_HOST_FLAGS -fPIC)
50+
list(APPEND SYCL_HOST_FLAGS -std=${CPP_STD})
51+
list(APPEND SYCL_HOST_FLAGS -Wunused-variable)
52+
list(APPEND SYCL_HOST_FLAGS -Wno-interference-size)
53+
# Some versions of DPC++ compiler pass paths to SYCL headers as user include paths (`-I`) rather
54+
# than system paths (`-isystem`). This makes host compiler to report warnings encountered in the
55+
# SYCL headers, such as deprecated warnings, even if warned API is not actually used in the program.
56+
# We expect that this issue will be addressed in the later version of DPC++ compiler. To workaround
57+
# the issue we wrap paths to SYCL headers in `-isystem`.
58+
foreach(FLAGS IN LISTS SYCL_INCLUDE_DIR)
59+
list(APPEND SYCL_HOST_FLAGS "-isystem ${FLAGS}")
60+
endforeach()
61+
# Excluding warnings which flood the compilation output
62+
# TODO: fix warnings in the source code and then reenable them in compilation
63+
list(APPEND SYCL_HOST_FLAGS -Wno-sign-compare)
64+
endif()
9765

98-
if(CMAKE_BUILD_TYPE MATCHES Debug)
99-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -g -O0 -Rno-debug-disables-optimization)
100-
elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
101-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -gline-tables-only -O2)
102-
endif()
66+
if(CMAKE_BUILD_TYPE MATCHES Debug)
67+
list(APPEND SYCL_HOST_FLAGS -g -fno-omit-frame-pointer -O0)
68+
elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
69+
list(APPEND SYCL_HOST_FLAGS -g -O2)
70+
endif()
71+
if(USE_PER_OPERATOR_HEADERS)
72+
list(APPEND SYCL_HOST_FLAGS -DAT_PER_OPERATOR_HEADERS)
73+
endif()
74+
# -- Kernel flags (SYCL_KERNEL_OPTIONS)
75+
# The fast-math will be enabled by default in SYCL compiler.
76+
# Refer to [https://clang.llvm.org/docs/UsersManual.html#cmdoption-fno-fast-math]
77+
# 1. We enable below flags here to be warn about NaN and Infinity,
78+
# which will be hidden by fast-math by default.
79+
# 2. The associative-math in fast-math allows floating point
80+
# operations to be reassociated, which will lead to non-deterministic
81+
# results compared with CUDA backend.
82+
# 3. The approx-func allows certain math function calls (such as log, sqrt, pow, etc)
83+
# to be replaced with an approximately equivalent set of instructions or
84+
# alternative math function calls, which have great errors.
85+
#
86+
# PSEUDO of separate compilation with DPCPP compiler.
87+
# 1. Kernel source compilation:
88+
# icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} ${SYCL_KERNEL_OPTIONS} -fsycl-host-compiler=gcc -fsycl-host-compiler-options='${CMAKE_HOST_FLAGS}' kernel.cpp -o kernel.o
89+
# 2. Device code linkage:
90+
# icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} -fsycl-link ${SYCL_DEVICE_LINK_FLAGS} -Xs '${SYCL_OFFLINE_COMPILER_FLAGS}' kernel.o -o device-code.o
91+
# 3. Host only source compilation:
92+
# gcc ${CMAKE_HOST_FLAGS} host.cpp -o host.o
93+
# 4. Linkage:
94+
# gcc -shared host.o kernel.o device-code.o -o libxxx.so
95+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-sycl-unnamed-lambda)
96+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -sycl-std=2020)
97+
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
98+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} /fp:strict)
99+
# Suppress warnings about dllexport.
100+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-ignored-attributes)
101+
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
102+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-nans)
103+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-infinities)
104+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-associative-math)
105+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-approx-func)
106+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-absolute-value)
107+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no-ftz)
108+
endif()
103109

104-
CHECK_SYCL_FLAG("-fsycl-fp64-conv-emu" SUPPORTS_FP64_CONV_EMU)
105-
if(NOT SUPPORTS_FP64_CONV_EMU)
106-
message(WARNING "The compiler does not support the '-fsycl-fp64-conv-emu' flag, \
107-
will disable it. On some platforms that don't support FP64, \
108-
running operations with the FP64 datatype will raise a Runtime error: Required aspect fp64 is not supported on the device \
109-
or a Native API failed error.")
110-
endif()
110+
if(CMAKE_BUILD_TYPE MATCHES Debug)
111+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -g -O0 -Rno-debug-disables-optimization)
112+
elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
113+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -gline-tables-only -O2)
114+
endif()
115+
116+
CHECK_SYCL_FLAG("-fsycl-fp64-conv-emu" SUPPORTS_FP64_CONV_EMU)
117+
if(NOT SUPPORTS_FP64_CONV_EMU)
118+
message(WARNING "The compiler does not support the '-fsycl-fp64-conv-emu' flag, \
119+
will disable it. On some platforms that don't support FP64, \
120+
running operations with the FP64 datatype will raise a Runtime error: Required aspect fp64 is not supported on the device \
121+
or a Native API failed error.")
122+
endif()
111123

112-
set(TORCH_XPU_OPS_FLAGS ${SYCL_HOST_FLAGS})
124+
set(TORCH_XPU_OPS_FLAGS ${SYCL_HOST_FLAGS})
113125

114-
# -- SYCL device object linkage flags
115-
include(ProcessorCount)
116-
ProcessorCount(proc_cnt)
117-
if((DEFINED ENV{MAX_JOBS}) AND ("$ENV{MAX_JOBS}" LESS_EQUAL ${proc_cnt}))
118-
set(SYCL_MAX_PARALLEL_LINK_JOBS $ENV{MAX_JOBS})
119-
else()
120-
set(SYCL_MAX_PARALLEL_LINK_JOBS ${proc_cnt})
121-
endif()
122-
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} -fsycl-max-parallel-link-jobs=${SYCL_MAX_PARALLEL_LINK_JOBS})
123-
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} --offload-compress)
126+
# -- SYCL device object linkage flags
127+
include(ProcessorCount)
128+
ProcessorCount(proc_cnt)
129+
if((DEFINED ENV{MAX_JOBS}) AND ("$ENV{MAX_JOBS}" LESS_EQUAL ${proc_cnt}))
130+
set(SYCL_MAX_PARALLEL_LINK_JOBS $ENV{MAX_JOBS})
131+
else()
132+
set(SYCL_MAX_PARALLEL_LINK_JOBS ${proc_cnt})
133+
endif()
134+
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} -fsycl-max-parallel-link-jobs=${SYCL_MAX_PARALLEL_LINK_JOBS})
135+
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} --offload-compress)
124136

125-
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-poison-unsupported-fp64-kernels")
126-
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-enable-auto-large-GRF-mode")
127-
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-fp32-correctly-rounded-divide-sqrt")
128-
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-greater-than-4GB-buffer-required")
137+
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-poison-unsupported-fp64-kernels")
138+
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-enable-auto-large-GRF-mode")
139+
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-fp32-correctly-rounded-divide-sqrt")
140+
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-greater-than-4GB-buffer-required")
129141

130-
if(WIN32)
131-
set(AOT_TARGETS "mtl,mtl-h,bmg,dg2,arl-h,lnl-m,ptl")
132-
else()
133-
set(AOT_TARGETS "pvc,bmg,dg2,arl-h,mtl-h,lnl-m,ptl-h,ptl-u")
134-
endif()
135-
if(TORCH_XPU_ARCH_LIST)
136-
set(AOT_TARGETS "${TORCH_XPU_ARCH_LIST}")
137-
endif()
138-
if(AOT_TARGETS STREQUAL "none")
139-
set(TORCH_XPU_ARCH_LIST "" PARENT_SCOPE)
140-
else()
141-
# Enable FP64 conversion emulation for DG2 / ATS-M targets
142-
if(SUPPORTS_FP64_CONV_EMU)
143-
string(FIND "${AOT_TARGETS}" "dg2" _dg2_index)
144-
string(FIND "${AOT_TARGETS}" "ats-m" _atsm_index)
145-
if(_dg2_index GREATER_EQUAL 0 OR _atsm_index GREATER_EQUAL 0)
146-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fsycl-fp64-conv-emu)
142+
if(REPLACE_FLAGS_FOR_SYCLTLA)
143+
set(SYCL_TARGETS_OPTION -fsycl-targets=spir64_gen)
144+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} ${SYCL_TARGETS_OPTION})
145+
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} ${SYCL_TARGETS_OPTION})
146+
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier,+SPV_INTEL_2d_block_io,+SPV_INTEL_subgroup_matrix_multiply_accumulate")
147+
set(SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device pvc,bmg")
148+
else()
149+
if(WIN32)
150+
set(AOT_TARGETS "mtl,mtl-h,bmg,dg2,arl-h,lnl-m,ptl")
151+
else()
152+
set(AOT_TARGETS "pvc,bmg,dg2,arl-h,mtl-h,lnl-m,ptl-h,ptl-u")
153+
endif()
154+
if(TORCH_XPU_ARCH_LIST)
155+
set(AOT_TARGETS "${TORCH_XPU_ARCH_LIST}")
156+
endif()
157+
if(AOT_TARGETS STREQUAL "none")
158+
set(TORCH_XPU_ARCH_LIST "" PARENT_SCOPE)
159+
else()
160+
if(SUPPORTS_FP64_CONV_EMU)
161+
string(FIND "${AOT_TARGETS}" "dg2" _dg2_index)
162+
string(FIND "${AOT_TARGETS}" "ats-m" _atsm_index)
163+
if(_dg2_index GREATER_EQUAL 0 OR _atsm_index GREATER_EQUAL 0)
164+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fsycl-fp64-conv-emu)
165+
endif()
166+
endif()
167+
set(SYCL_TARGETS_OPTION -fsycl-targets=spir64_gen,spir64)
168+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} ${SYCL_TARGETS_OPTION})
169+
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} ${SYCL_TARGETS_OPTION})
170+
set(SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device ${AOT_TARGETS}")
171+
set(TORCH_XPU_ARCH_LIST ${AOT_TARGETS} PARENT_SCOPE)
147172
endif()
173+
message(STATUS "Compile Intel GPU AOT Targets for ${AOT_TARGETS}")
148174
endif()
149-
set(SYCL_TARGETS_OPTION -fsycl-targets=spir64_gen,spir64)
150-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} ${SYCL_TARGETS_OPTION})
151-
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} ${SYCL_TARGETS_OPTION})
152-
set(SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device ${AOT_TARGETS}")
153-
set(TORCH_XPU_ARCH_LIST ${AOT_TARGETS} PARENT_SCOPE)
154-
endif()
155-
message(STATUS "Compile Intel GPU AOT Targets for ${AOT_TARGETS}")
156175

157-
set(SYCL_FLAGS ${SYCL_FLAGS} ${SYCL_KERNEL_OPTIONS})
176+
set(SYCL_COMPILE_FLAGS ${SYCL_COMPILE_FLAGS} ${SYCL_KERNEL_OPTIONS})
158177

159-
set(SYCL_OFFLINE_COMPILER_FLAGS "${SYCL_OFFLINE_COMPILER_AOT_OPTIONS}${SYCL_OFFLINE_COMPILER_CG_OPTIONS}")
160-
else()
161-
message("Not compiling with XPU. Currently only support GCC compiler on Linux and MSVC compiler on Windows as CXX compiler.")
162-
return()
163-
endif()
178+
set(SYCL_OFFLINE_COMPILER_FLAGS "${SYCL_OFFLINE_COMPILER_AOT_OPTIONS}${SYCL_OFFLINE_COMPILER_CG_OPTIONS}")
179+
else()
180+
message("Not compiling with XPU. Currently only support GCC compiler on Linux and MSVC compiler on Windows as CXX compiler.")
181+
return()
182+
endif()
183+
endmacro()

cmake/Modules/FindSYCL.cmake

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# SYCL_COMPILER
1111
# -- SYCL compiler's executable.
1212
#
13-
# SYCL_FLAGS
13+
# SYCL_COMPILE_FLAGS
1414
# -- SYCL compiler's compilation command line arguments.
1515
#
1616
# SYCL_HOST_FLAGS
@@ -217,7 +217,6 @@ endfunction()
217217

218218
macro(SYCL_WRAP_SRCS sycl_target generated_files)
219219
# Optional arguments
220-
set(SYCL_flags "")
221220
set(generated_extension ${CMAKE_${SYCL_C_OR_CXX}_OUTPUT_EXTENSION})
222221

223222
set(SYCL_include_dirs "${SYCL_INCLUDE_DIR}")
@@ -388,7 +387,6 @@ macro(SYCL_LINK_DEVICE_OBJECTS output_file sycl_target)
388387
set(SYCL_device_link_flags
389388
${link_type_flag}
390389
${important_host_flags}
391-
${SYCL_FLAGS}
392390
${SYCL_DEVICE_LINK_FLAGS})
393391

394392
file(RELATIVE_PATH output_file_relative_path "${CMAKE_BINARY_DIR}" "${output_file}")

cmake/Modules/FindSYCL/run_sycl.cmake

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ set(SYCL_host_compiler "@SYCL_HOST_COMPILER@") # path
2727
set(generated_file_path "@generated_file_path@") # path
2828
set(generated_file_internal "@generated_file@") # path
2929
set(SYCL_executable "@SYCL_EXECUTABLE@") # path
30-
set(SYCL_flags @SYCL_FLAGS@) # list
30+
set(SYCL_compile_flags @SYCL_COMPILE_FLAGS@) # list
3131
set(SYCL_include_dirs [==[@SYCL_include_dirs@]==]) # list
3232
set(SYCL_compile_definitions [==[@SYCL_compile_definitions@]==]) # list
3333

@@ -47,10 +47,10 @@ foreach(dir ${SYCL_include_dirs})
4747
endif()
4848
endforeach()
4949

50-
# Clean up list of compile definitions, add -D flags, and append to SYCL_flags
50+
# Clean up list of compile definitions, add -D flags, and append to SYCL_compile_flags
5151
list(REMOVE_DUPLICATES SYCL_compile_definitions)
5252
foreach(def ${SYCL_compile_definitions})
53-
list(APPEND SYCL_flags "-D${def}")
53+
list(APPEND SYCL_compile_flags "-D${def}")
5454
endforeach()
5555

5656
# Choose host flags in FindSYCL.cmake
@@ -134,7 +134,7 @@ SYCL_execute_process(
134134
${SYCL_include_args}
135135
${SYCL_host_compiler}
136136
${SYCL_host_compiler_flags}
137-
${SYCL_flags}
137+
${SYCL_compile_flags}
138138
)
139139

140140
if(SYCL_result)

0 commit comments

Comments
 (0)