@@ -22,142 +22,162 @@ function(CHECK_SYCL_FLAG FLAG VARIABLE_NAME)
2222 file (REMOVE_RECURSE ${TEMP_DIR} )
2323endfunction ()
2424
25- # Support GCC on Linux and MSVC on Windows at the moment.
26- if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
27- # # -- Host flags (SYCL_CXX_FLAGS)
28- if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
29- list (APPEND SYCL_HOST_FLAGS /std:c++17)
30- list (APPEND SYCL_HOST_FLAGS /MD )
31- list (APPEND SYCL_HOST_FLAGS /EHsc) # exception handling
32- # SYCL headers warnings
33- list (APPEND SYCL_HOST_FLAGS /wd4996) # allow usage of deprecated functions
34- list (APPEND SYCL_HOST_FLAGS /wd4018) # allow signed and unsigned comparison
35- elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
36- list (APPEND SYCL_HOST_FLAGS -fPIC)
37- list (APPEND SYCL_HOST_FLAGS -std=c++17)
38- list (APPEND SYCL_HOST_FLAGS -Wunused-variable )
39- list (APPEND SYCL_HOST_FLAGS -Wno-interference-size)
40- # Some versions of DPC++ compiler pass paths to SYCL headers as user include paths (`-I`) rather
41- # than system paths (`-isystem`). This makes host compiler to report warnings encountered in the
42- # SYCL headers, such as deprecated warnings, even if warned API is not actually used in the program.
43- # We expect that this issue will be addressed in the later version of DPC++ compiler. To workaround
44- # the issue we wrap paths to SYCL headers in `-isystem`.
45- foreach (FLAGS IN LISTS SYCL_INCLUDE_DIR)
46- list (APPEND SYCL_HOST_FLAGS "-isystem ${FLAGS} " )
47- endforeach ()
48- # Excluding warnings which flood the compilation output
49- # TODO: fix warnings in the source code and then reenable them in compilation
50- list (APPEND SYCL_HOST_FLAGS -Wno-sign-compare)
51- endif ()
25+ macro (set_build_flags)
26+ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
27+ set (SYCL_HOST_FLAGS)
28+ set (SYCL_KERNEL_OPTIONS)
29+ set (SYCL_COMPILE_FLAGS ${SYCL_FLAGS} )
30+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_LINK_FLAGS} )
31+ set (SYCL_OFFLINE_COMPILER_AOT_OPTIONS)
32+ set (SYCL_OFFLINE_COMPILER_CG_OPTIONS)
33+ set (SYCL_OFFLINE_COMPILER_FLAGS)
5234
53- if (CMAKE_BUILD_TYPE MATCHES Debug)
54- list (APPEND SYCL_HOST_FLAGS -g -fno-omit-frame-pointer -O0)
55- elseif (CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
56- list (APPEND SYCL_HOST_FLAGS -g -O2)
57- endif ()
58- if (USE_PER_OPERATOR_HEADERS)
59- list (APPEND SYCL_HOST_FLAGS -DAT_PER_OPERATOR_HEADERS)
60- endif ()
61- list (APPEND SYCL_HOST_FLAGS -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER} )
62- # -- Kernel flags (SYCL_KERNEL_OPTIONS)
63- # The fast-math will be enabled by default in SYCL compiler.
64- # Refer to [https://clang.llvm.org/docs/UsersManual.html#cmdoption-fno-fast-math]
65- # 1. We enable below flags here to be warn about NaN and Infinity,
66- # which will be hidden by fast-math by default.
67- # 2. The associative-math in fast-math allows floating point
68- # operations to be reassociated, which will lead to non-deterministic
69- # results compared with CUDA backend.
70- # 3. The approx-func allows certain math function calls (such as log, sqrt, pow, etc)
71- # to be replaced with an approximately equivalent set of instructions or
72- # alternative math function calls, which have great errors.
73- #
74- # PSEUDO of separate compilation with DPCPP compiler.
75- # 1. Kernel source compilation:
76- # icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} ${SYCL_FLAGS} -fsycl-host-compiler=gcc -fsycl-host-compiler-options='${CMAKE_HOST_FLAGS}' kernel.cpp -o kernel.o
77- # 2. Device code linkage:
78- # icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} -fsycl-link ${SYCL_DEVICE_LINK_FLAGS} -Xs '${SYCL_OFFLINE_COMPILER_FLAGS}' kernel.o -o device-code.o
79- # 3. Host only source compilation:
80- # gcc ${CMAKE_HOST_FLAGS} host.cpp -o host.o
81- # 4. Linkage:
82- # gcc -shared host.o kernel.o device-code.o -o libxxx.so
83- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-sycl-unnamed-lambda)
84- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -sycl-std=2020)
85- if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
86- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} /fp:strict)
87- # Suppress warnings about dllexport.
88- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-ignored-attributes)
89- elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
90- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-nans)
91- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-infinities)
92- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-associative-math)
93- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-approx-func)
94- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-absolute -value )
95- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no -ftz)
96- endif ()
35+ if (REPLACE_FLAGS_FOR_SYCLTLA)
36+ set (CPP_STD c++20)
37+ else ()
38+ set (CPP_STD c++17)
39+ endif ()
40+ # # -- Host flags (SYCL_CXX_FLAGS)
41+ if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
42+ list (APPEND SYCL_HOST_FLAGS /std:${CPP_STD} )
43+ list (APPEND SYCL_HOST_FLAGS /MD )
44+ list (APPEND SYCL_HOST_FLAGS /EHsc) # exception handling
45+ # SYCL headers warnings
46+ list (APPEND SYCL_HOST_FLAGS /wd4996) # allow usage of deprecated functions
47+ list (APPEND SYCL_HOST_FLAGS /wd4018) # allow signed and unsigned comparison
48+ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
49+ list (APPEND SYCL_HOST_FLAGS -fPIC)
50+ list (APPEND SYCL_HOST_FLAGS -std=${CPP_STD} )
51+ list (APPEND SYCL_HOST_FLAGS -Wunused-variable )
52+ list (APPEND SYCL_HOST_FLAGS -Wno-interference-size)
53+ # Some versions of DPC++ compiler pass paths to SYCL headers as user include paths (`-I`) rather
54+ # than system paths (`-isystem`). This makes host compiler to report warnings encountered in the
55+ # SYCL headers, such as deprecated warnings, even if warned API is not actually used in the program.
56+ # We expect that this issue will be addressed in the later version of DPC++ compiler. To workaround
57+ # the issue we wrap paths to SYCL headers in `-isystem`.
58+ foreach (FLAGS IN LISTS SYCL_INCLUDE_DIR)
59+ list (APPEND SYCL_HOST_FLAGS "-isystem ${FLAGS} " )
60+ endforeach ()
61+ # Excluding warnings which flood the compilation output
62+ # TODO: fix warnings in the source code and then reenable them in compilation
63+ list (APPEND SYCL_HOST_FLAGS -Wno-sign-compare)
64+ endif ()
9765
98- if (CMAKE_BUILD_TYPE MATCHES Debug)
99- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -g -O0 -Rno-debug-disables-optimization)
100- elseif (CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
101- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -gline-tables-only -O2)
102- endif ()
66+ if (CMAKE_BUILD_TYPE MATCHES Debug)
67+ list (APPEND SYCL_HOST_FLAGS -g -fno-omit-frame-pointer -O0)
68+ elseif (CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
69+ list (APPEND SYCL_HOST_FLAGS -g -O2)
70+ endif ()
71+ if (USE_PER_OPERATOR_HEADERS)
72+ list (APPEND SYCL_HOST_FLAGS -DAT_PER_OPERATOR_HEADERS)
73+ endif ()
74+ # -- Kernel flags (SYCL_KERNEL_OPTIONS)
75+ # The fast-math will be enabled by default in SYCL compiler.
76+ # Refer to [https://clang.llvm.org/docs/UsersManual.html#cmdoption-fno-fast-math]
77+ # 1. We enable below flags here to be warn about NaN and Infinity,
78+ # which will be hidden by fast-math by default.
79+ # 2. The associative-math in fast-math allows floating point
80+ # operations to be reassociated, which will lead to non-deterministic
81+ # results compared with CUDA backend.
82+ # 3. The approx-func allows certain math function calls (such as log, sqrt, pow, etc)
83+ # to be replaced with an approximately equivalent set of instructions or
84+ # alternative math function calls, which have great errors.
85+ #
86+ # PSEUDO of separate compilation with DPCPP compiler.
87+ # 1. Kernel source compilation:
88+ # icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} ${SYCL_KERNEL_OPTIONS} -fsycl-host-compiler=gcc -fsycl-host-compiler-options='${CMAKE_HOST_FLAGS}' kernel.cpp -o kernel.o
89+ # 2. Device code linkage:
90+ # icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} -fsycl-link ${SYCL_DEVICE_LINK_FLAGS} -Xs '${SYCL_OFFLINE_COMPILER_FLAGS}' kernel.o -o device-code.o
91+ # 3. Host only source compilation:
92+ # gcc ${CMAKE_HOST_FLAGS} host.cpp -o host.o
93+ # 4. Linkage:
94+ # gcc -shared host.o kernel.o device-code.o -o libxxx.so
95+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-sycl-unnamed-lambda)
96+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -sycl-std=2020)
97+ if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
98+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} /fp:strict)
99+ # Suppress warnings about dllexport.
100+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-ignored-attributes)
101+ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
102+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-nans)
103+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-infinities)
104+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-associative-math)
105+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-approx-func)
106+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-absolute -value )
107+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no -ftz)
108+ endif ()
103109
104- CHECK_SYCL_FLAG("-fsycl-fp64-conv-emu" SUPPORTS_FP64_CONV_EMU)
105- if (NOT SUPPORTS_FP64_CONV_EMU)
106- message (WARNING "The compiler does not support the '-fsycl-fp64-conv-emu' flag, \
107- will disable it. On some platforms that don't support FP64, \
108- running operations with the FP64 datatype will raise a Runtime error: Required aspect fp64 is not supported on the device \
109- or a Native API failed error." )
110- endif ()
110+ if (CMAKE_BUILD_TYPE MATCHES Debug)
111+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -g -O0 -Rno-debug-disables-optimization)
112+ elseif (CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
113+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -gline-tables-only -O2)
114+ endif ()
115+
116+ CHECK_SYCL_FLAG("-fsycl-fp64-conv-emu" SUPPORTS_FP64_CONV_EMU)
117+ if (NOT SUPPORTS_FP64_CONV_EMU)
118+ message (WARNING "The compiler does not support the '-fsycl-fp64-conv-emu' flag, \
119+ will disable it. On some platforms that don't support FP64, \
120+ running operations with the FP64 datatype will raise a Runtime error: Required aspect fp64 is not supported on the device \
121+ or a Native API failed error." )
122+ endif ()
111123
112- set (TORCH_XPU_OPS_FLAGS ${SYCL_HOST_FLAGS} )
124+ set (TORCH_XPU_OPS_FLAGS ${SYCL_HOST_FLAGS} )
113125
114- # -- SYCL device object linkage flags
115- include (ProcessorCount)
116- ProcessorCount(proc_cnt)
117- if ((DEFINED ENV{MAX_JOBS}) AND ("$ENV{MAX_JOBS} " LESS_EQUAL ${proc_cnt} ))
118- set (SYCL_MAX_PARALLEL_LINK_JOBS $ENV{MAX_JOBS} )
119- else ()
120- set (SYCL_MAX_PARALLEL_LINK_JOBS ${proc_cnt} )
121- endif ()
122- set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} -fsycl-max-parallel-link-jobs=${SYCL_MAX_PARALLEL_LINK_JOBS} )
123- set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} --offload-compress)
126+ # -- SYCL device object linkage flags
127+ include (ProcessorCount)
128+ ProcessorCount(proc_cnt)
129+ if ((DEFINED ENV{MAX_JOBS}) AND ("$ENV{MAX_JOBS} " LESS_EQUAL ${proc_cnt} ))
130+ set (SYCL_MAX_PARALLEL_LINK_JOBS $ENV{MAX_JOBS} )
131+ else ()
132+ set (SYCL_MAX_PARALLEL_LINK_JOBS ${proc_cnt} )
133+ endif ()
134+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} -fsycl-max-parallel-link-jobs=${SYCL_MAX_PARALLEL_LINK_JOBS} )
135+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} --offload-compress)
124136
125- set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-poison-unsupported-fp64-kernels" )
126- set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-enable-auto-large-GRF-mode" )
127- set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-fp32-correctly-rounded-divide-sqrt" )
128- set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-greater-than-4GB-buffer-required" )
137+ set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-poison-unsupported-fp64-kernels" )
138+ set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-enable-auto-large-GRF-mode" )
139+ set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-fp32-correctly-rounded-divide-sqrt" )
140+ set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-greater-than-4GB-buffer-required" )
129141
130- if (WIN32 )
131- set (AOT_TARGETS "mtl,mtl-h,bmg,dg2,arl-h,lnl-m,ptl" )
132- else ()
133- set (AOT_TARGETS "pvc,bmg,dg2,arl-h,mtl-h,lnl-m,ptl-h,ptl-u" )
134- endif ()
135- if (TORCH_XPU_ARCH_LIST)
136- set (AOT_TARGETS "${TORCH_XPU_ARCH_LIST} " )
137- endif ()
138- if (AOT_TARGETS STREQUAL "none" )
139- set (TORCH_XPU_ARCH_LIST "" PARENT_SCOPE)
140- else ()
141- # Enable FP64 conversion emulation for DG2 / ATS-M targets
142- if (SUPPORTS_FP64_CONV_EMU)
143- string (FIND "${AOT_TARGETS} " "dg2" _dg2_index)
144- string (FIND "${AOT_TARGETS} " "ats-m" _atsm_index)
145- if (_dg2_index GREATER_EQUAL 0 OR _atsm_index GREATER_EQUAL 0)
146- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fsycl-fp64-conv-emu)
142+ if (REPLACE_FLAGS_FOR_SYCLTLA)
143+ set (SYCL_TARGETS_OPTION -fsycl-targets=spir64_gen)
144+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} ${SYCL_TARGETS_OPTION} )
145+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} ${SYCL_TARGETS_OPTION} )
146+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier,+SPV_INTEL_2d_block_io,+SPV_INTEL_subgroup_matrix_multiply_accumulate" )
147+ set (SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device pvc,bmg" )
148+ else ()
149+ if (WIN32 )
150+ set (AOT_TARGETS "mtl,mtl-h,bmg,dg2,arl-h,lnl-m,ptl" )
151+ else ()
152+ set (AOT_TARGETS "pvc,bmg,dg2,arl-h,mtl-h,lnl-m,ptl-h,ptl-u" )
153+ endif ()
154+ if (TORCH_XPU_ARCH_LIST)
155+ set (AOT_TARGETS "${TORCH_XPU_ARCH_LIST} " )
156+ endif ()
157+ if (AOT_TARGETS STREQUAL "none" )
158+ set (TORCH_XPU_ARCH_LIST "" PARENT_SCOPE)
159+ else ()
160+ if (SUPPORTS_FP64_CONV_EMU)
161+ string (FIND "${AOT_TARGETS} " "dg2" _dg2_index)
162+ string (FIND "${AOT_TARGETS} " "ats-m" _atsm_index)
163+ if (_dg2_index GREATER_EQUAL 0 OR _atsm_index GREATER_EQUAL 0)
164+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fsycl-fp64-conv-emu)
165+ endif ()
166+ endif ()
167+ set (SYCL_TARGETS_OPTION -fsycl-targets=spir64_gen,spir64)
168+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} ${SYCL_TARGETS_OPTION} )
169+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} ${SYCL_TARGETS_OPTION} )
170+ set (SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device ${AOT_TARGETS} " )
171+ set (TORCH_XPU_ARCH_LIST ${AOT_TARGETS} PARENT_SCOPE)
147172 endif ()
173+ message (STATUS "Compile Intel GPU AOT Targets for ${AOT_TARGETS} " )
148174 endif ()
149- set (SYCL_TARGETS_OPTION -fsycl-targets=spir64_gen,spir64)
150- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} ${SYCL_TARGETS_OPTION} )
151- set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} ${SYCL_TARGETS_OPTION} )
152- set (SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device ${AOT_TARGETS} " )
153- set (TORCH_XPU_ARCH_LIST ${AOT_TARGETS} PARENT_SCOPE)
154- endif ()
155- message (STATUS "Compile Intel GPU AOT Targets for ${AOT_TARGETS} " )
156175
157- set (SYCL_FLAGS ${SYCL_FLAGS } ${SYCL_KERNEL_OPTIONS} )
176+ set (SYCL_COMPILE_FLAGS ${SYCL_COMPILE_FLAGS } ${SYCL_KERNEL_OPTIONS} )
158177
159- set (SYCL_OFFLINE_COMPILER_FLAGS "${SYCL_OFFLINE_COMPILER_AOT_OPTIONS}${SYCL_OFFLINE_COMPILER_CG_OPTIONS} " )
160- else ()
161- message ("Not compiling with XPU. Currently only support GCC compiler on Linux and MSVC compiler on Windows as CXX compiler." )
162- return ()
163- endif ()
178+ set (SYCL_OFFLINE_COMPILER_FLAGS "${SYCL_OFFLINE_COMPILER_AOT_OPTIONS}${SYCL_OFFLINE_COMPILER_CG_OPTIONS} " )
179+ else ()
180+ message ("Not compiling with XPU. Currently only support GCC compiler on Linux and MSVC compiler on Windows as CXX compiler." )
181+ return ()
182+ endif ()
183+ endmacro ()
0 commit comments