diff --git a/cmake/Codegen.cmake b/cmake/Codegen.cmake index 22a3f741f9..993decc324 100644 --- a/cmake/Codegen.cmake +++ b/cmake/Codegen.cmake @@ -1,89 +1,60 @@ -if(Codegen_GPU_cmake_included) +if(Codegen_XPU_cmake_included) return() endif() -set(Codegen_GPU_cmake_included true) +set(Codegen_XPU_cmake_included true) -set(BUILD_TORCH_XPU_ATEN_GENERATED "${CMAKE_BINARY_DIR}/xpu/ATen/") +set(BUILD_TORCH_XPU_ATEN_GENERATED "${CMAKE_BINARY_DIR}/xpu/ATen") file(MAKE_DIRECTORY ${BUILD_TORCH_XPU_ATEN_GENERATED}) -set(RegisterXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp) -set(RegisterSparseXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseXPU_0.cpp) -set(RegisterSparseCsrXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseCsrXPU_0.cpp) -set(RegisterNestedTensorXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterNestedTensorXPU_0.cpp) -set(XPUFallback_PATH ${TORCH_XPU_OPS_ROOT}/src/ATen/native/xpu/XPUFallback.template) +set(RegisterXPU_GENERATED ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp) +set(RegisterSparseXPU_GENERATED ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseXPU_0.cpp) +set(RegisterSparseCsrXPU_GENERATED ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseCsrXPU_0.cpp) +set(RegisterNestedTensorXPU_GENERATED ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterNestedTensorXPU_0.cpp) +set(XPUFallback_TEMPLATE ${TORCH_XPU_OPS_ROOT}/src/ATen/native/xpu/XPUFallback.template) +set(XPU_AOTI_INSTALL_DIR ${TORCH_ROOT}/torch/csrc/inductor/aoti_torch/generated/extend) +set(XPU_AOTI_SHIM_HEADER ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.h) +set(XPU_AOTI_SHIM_SOURCE ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.cpp) if(WIN32) set(FILE_DISPLAY_CMD type) - # replace forward slash with back slash for compatibility with 'type' command on Windows - string(REPLACE "/" "\\" RegisterXPU_PATH_BACKSLASH "${RegisterXPU_PATH}") - string(REPLACE "/" "\\" XPUFallback_PATH_BACKSLASH "${XPUFallback_PATH}") - set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_PATH_BACKSLASH} ">>" ${RegisterXPU_PATH_BACKSLASH}) else() set(FILE_DISPLAY_CMD cat) - set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_PATH} ">>" ${RegisterXPU_PATH}) endif() +file(TO_NATIVE_PATH "${RegisterXPU_GENERATED}" RegisterXPU_GENERATED_NATIVE) +file(TO_NATIVE_PATH "${XPUFallback_TEMPLATE}" XPUFallback_TEMPLATE_NATIVE) +set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_TEMPLATE_NATIVE} ">>" ${RegisterXPU_GENERATED_NATIVE}) -function(GEN_BACKEND file_yaml) - set(generated_files "") - foreach(f ${ARGN}) - list(APPEND generated_files "${BUILD_TORCH_XPU_ATEN_GENERATED}/${f}") - endforeach() - file(GLOB_RECURSE depended_files ${TORCH_XPU_OPS_ROOT}/yaml/${file_yaml}) - add_custom_command( - OUTPUT ${generated_files} - COMMAND - "${PYTHON_EXECUTABLE}" -m torchgen.gen_backend_stubs - --output_dir ${BUILD_TORCH_XPU_ATEN_GENERATED} - --source_yaml ${TORCH_XPU_OPS_ROOT}/yaml/${file_yaml} - COMMAND - ${REGISTER_FALLBACK_CMD} - ${SIMPLE_TRACE} - WORKING_DIRECTORY ${TORCH_ROOT} - DEPENDS - ${depended_files} - ${TORCH_XPU_OPS_ROOT}/yaml/${file_yaml} - ${XPUFallback_PATH} - ) -endfunction(GEN_BACKEND) - - -set(RegisterXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp) -set(RegisterSparseXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseXPU_0.cpp) -set(RegisterSparseCsrXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseCsrXPU_0.cpp) -set(RegisterNestedTensorXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterNestedTensorXPU_0.cpp) -set(XPUFallback_PATH ${TORCH_XPU_OPS_ROOT}/src/ATen/native/xpu/XPUFallback.template) -set(XPU_AOTI_INSTALL_DIR ${TORCH_ROOT}/torch/csrc/inductor/aoti_torch/generated/extend) function(GEN_XPU file_yaml) set(generated_files "") foreach(f ${ARGN}) list(APPEND generated_files "${f}") endforeach() - file(GLOB_RECURSE depend_files ${TORCH_XPU_OPS_ROOT}/yaml/${file_yaml}) - set(CODEGEN_TEMPLATE ${TORCH_XPU_OPS_ROOT}/yaml/) + set(CODEGEN_XPU_YAML_DIR ${TORCH_XPU_OPS_ROOT}/yaml) # Codegen prepare process if(WIN32) - string(REPLACE "/" "\\" DestPATH "${CODEGEN_TEMPLATE}templates") - string(REPLACE "/" "\\" SrcPATH "${CMAKE_SOURCE_DIR}/aten/src/ATen/templates") + file(TO_NATIVE_PATH "${CODEGEN_XPU_YAML_DIR}/templates" DestPATH) + file(TO_NATIVE_PATH "${CMAKE_SOURCE_DIR}/aten/src/ATen/templates" SrcPATH) execute_process(COMMAND cmd /c xcopy ${SrcPATH} ${DestPATH} /E /H /C /I /Y > nul) - string(REPLACE "/" "\\" RegisterXPU_PATH_BACKSLASH "${RegisterXPU_PATH}") - string(REPLACE "/" "\\" XPUFallback_PATH_BACKSLASH "${XPUFallback_PATH}") - set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_PATH_BACKSLASH} ">>" ${RegisterXPU_PATH_BACKSLASH}) else() - execute_process(COMMAND ln -s ${CMAKE_SOURCE_DIR}/aten/src/ATen/templates ${CODEGEN_TEMPLATE}) # soft link to pytorch templates - set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_PATH} ">>" ${RegisterXPU_PATH}) + execute_process(COMMAND ln -s ${CMAKE_SOURCE_DIR}/aten/src/ATen/templates ${CODEGEN_XPU_YAML_DIR}) # soft link to pytorch templates endif() - add_custom_command( - OUTPUT ${generated_files} - COMMAND + + set(XPU_CODEGEN_COMMAND "${PYTHON_EXECUTABLE}" -m torchgen.gen - --source-path ${TORCH_XPU_OPS_ROOT}/yaml/ + --source-path ${CODEGEN_XPU_YAML_DIR} --install-dir ${BUILD_TORCH_XPU_ATEN_GENERATED} --per-operator-headers - --static-dispatch-backend --backend-whitelist XPU SparseXPU SparseCsrXPU NestedTensorXPU - # --xpu: generate in-tree RegisterXPU_0.cpp for in-tree OPs --xpu + ) + + add_custom_command( + COMMENT "Generating XPU ATen Codegen..." + OUTPUT ${generated_files} + COMMAND + ${XPU_CODEGEN_COMMAND} + --static-dispatch-backend # --update-aoti-c-shim: generate extend/c_shim_xpu.h --update-aoti-c-shim # --exten-aoti-c-shim: specifiy the extend/c_shim_xpu @@ -95,16 +66,14 @@ function(GEN_XPU file_yaml) COMMAND ${REGISTER_FALLBACK_CMD} # Codegen post-process - COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterXPU_PATH} - COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterSparseXPU_PATH} - COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterSparseCsrXPU_PATH} - COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterNestedTensorXPU_PATH} - ${SIMPLE_TRACE} + COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterXPU_GENERATED} + COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterSparseXPU_GENERATED} + COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterSparseCsrXPU_GENERATED} + COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterNestedTensorXPU_GENERATED} WORKING_DIRECTORY ${TORCH_ROOT} DEPENDS - ${depended_files} - ${TORCH_XPU_OPS_ROOT}/yaml/native/${file_yaml} - ${XPUFallback_PATH} + ${CODEGEN_XPU_YAML_DIR}/native/${file_yaml} + ${XPUFallback_TEMPLATE} ) # Post codegen delete the copied templates folder only on Windows. @@ -118,30 +87,29 @@ function(GEN_XPU file_yaml) endif() endfunction(GEN_XPU) -# GEN_BACKEND( -# xpu_functions.yaml -# XPUNativeFunctions.h -# RegisterXPU_0.cpp) - GEN_XPU( native_functions.yaml ${BUILD_TORCH_XPU_ATEN_GENERATED}/XPUFunctions.h - ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp - ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseXPU_0.cpp - ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseCsrXPU_0.cpp - ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterNestedTensorXPU_0.cpp - ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.h - ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.cpp + ${BUILD_TORCH_XPU_ATEN_GENERATED}/XPUFunctions_inl.h + ${RegisterXPU_GENERATED} + ${RegisterSparseXPU_GENERATED} + ${RegisterSparseCsrXPU_GENERATED} + ${RegisterNestedTensorXPU_GENERATED} + ${XPU_AOTI_SHIM_HEADER} + ${XPU_AOTI_SHIM_SOURCE} ) - # The c_shim_xpu.cpp needs include files in ${CMAKE_BINARY_DIR}/xpu/ATen/ops/*.h) # The include path is auto generated as "#include # To follow the design of aoti codegen, here ${CMAKE_BINARY_DIR}/xpu is added to # $TORCH_XPU_OPS_INCLUDE_DIRS, so that "#include " works. list(APPEND TORCH_XPU_OPS_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/xpu) -list(APPEND xpu_generated_src ${RegisterXPU_PATH} ${RegisterSparseXPU_PATH} ${RegisterSparseCsrXPU_PATH} ${RegisterNestedTensorXPU_PATH}) -list(APPEND xpu_generated_src ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.cpp) -add_custom_target(TORCH_XPU_GEN_TARGET DEPENDS ${xpu_generated_src}) +list(APPEND xpu_generated_src + ${RegisterXPU_GENERATED} + ${RegisterSparseXPU_GENERATED} + ${RegisterSparseCsrXPU_GENERATED} + ${RegisterNestedTensorXPU_GENERATED} + ${XPU_AOTI_SHIM_SOURCE} +) set(ATen_XPU_GEN_SRCS ${xpu_generated_src})