diff --git a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h index 094360e75ab61..cf0c96f0eba00 100644 --- a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h +++ b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h @@ -64,7 +64,8 @@ struct FunctionCallBuilder { /// populate converter for gpu types. void populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns, - bool kernelBarePtrCallConv = false); + bool kernelBarePtrCallConv = false, + bool typeCheckKernelArgs = false); /// A function that maps a MemorySpace enum to a target-specific integer value. using MemorySpaceMapping = std::function; diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index afeed370ce347..0f42ffb3a8026 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -517,6 +517,12 @@ def GpuToLLVMConversionPass : Pass<"gpu-to-llvm", "ModuleOp"> { /*default=*/"false", "Use bare pointers to pass memref arguments to kernels. " "The kernel must use the same setting for this option." + >, + Option<"typeCheckKernelArgs", "type-check-kernel-args", "bool", + /*default=*/"false", + "Require all kernel arguments to be memrefs of rank 1 and with a " + "32-bit element size. This is a temporary option that will be " + "removed; TODO(https://github.com/llvm/llvm-project/issues/73457)." > ]; diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp index 83208e0c42da2..ca9883a79dc16 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp @@ -427,9 +427,11 @@ class LegalizeLaunchFuncOpPattern : public ConvertOpToGpuRuntimeCallPattern { public: LegalizeLaunchFuncOpPattern(const LLVMTypeConverter &typeConverter, - bool kernelBarePtrCallConv) + bool kernelBarePtrCallConv, + bool typeCheckKernelArgs) : ConvertOpToGpuRuntimeCallPattern(typeConverter), - kernelBarePtrCallConv(kernelBarePtrCallConv) {} + kernelBarePtrCallConv(kernelBarePtrCallConv), + typeCheckKernelArgs(typeCheckKernelArgs) {} private: LogicalResult @@ -437,6 +439,7 @@ class LegalizeLaunchFuncOpPattern ConversionPatternRewriter &rewriter) const override; bool kernelBarePtrCallConv; + bool typeCheckKernelArgs; }; /// A rewrite pattern to convert gpu.memcpy operations into a GPU runtime @@ -563,8 +566,8 @@ void GpuToLLVMConversionPass::runOnOperation() { populateFinalizeMemRefToLLVMConversionPatterns(converter, patterns); populateAsyncStructuralTypeConversionsAndLegality(converter, patterns, target); - populateGpuToLLVMConversionPatterns(converter, patterns, - kernelBarePtrCallConv); + populateGpuToLLVMConversionPatterns( + converter, patterns, kernelBarePtrCallConv, typeCheckKernelArgs); if (failed( applyPartialConversion(getOperation(), target, std::move(patterns)))) @@ -966,6 +969,28 @@ LogicalResult LegalizeLaunchFuncOpPattern::matchAndRewrite( // stream must be created to pass to subsequent operations. else if (launchOp.getAsyncToken()) stream = streamCreateCallBuilder.create(loc, rewriter, {}).getResult(); + + if (typeCheckKernelArgs) { + // The current non-bare-pointer ABI is a bad fit for `mgpuLaunchKernel`, + // which takes an untyped list of arguments. The type check here prevents + // accidentally violating the assumption made in vulkan-runtime-wrappers.cpp + // and creating a unchecked runtime ABI mismatch. + // TODO(https://github.com/llvm/llvm-project/issues/73457): Change the ABI + // here to remove the need for this type check. + for (Value arg : launchOp.getKernelOperands()) { + if (auto memrefTy = dyn_cast(arg.getType())) { + if (memrefTy.getRank() != 1 || + memrefTy.getElementTypeBitWidth() != 32) { + return rewriter.notifyMatchFailure( + launchOp, "Operand to launch op is not a rank-1 memref with " + "32-bit element type."); + } + } else { + return rewriter.notifyMatchFailure( + launchOp, "Operand to launch op is not a memref."); + } + } + } // Lower the kernel operands to match kernel parameters. // Note: If `useBarePtrCallConv` is set in the type converter's options, // the value of `kernelBarePtrCallConv` will be ignored. @@ -1737,7 +1762,8 @@ LogicalResult ConvertCreateBsrOpToGpuRuntimeCallPattern::matchAndRewrite( void mlir::populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns, - bool kernelBarePtrCallConv) { + bool kernelBarePtrCallConv, + bool typeCheckKernelArgs) { addOpaquePointerConversion(converter); addOpaquePointerConversion(converter); addOpaquePointerConversion(converter); @@ -1774,7 +1800,8 @@ void mlir::populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter, ConvertSpGEMMCopyOpToGpuRuntimeCallPattern, ConvertSpMatGetSizeOpToGpuRuntimeCallPattern, ConvertSetCsrPointersOpToGpuRuntimeCallPattern>(converter); - patterns.add(converter, kernelBarePtrCallConv); + patterns.add(converter, kernelBarePtrCallConv, + typeCheckKernelArgs); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp b/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp index 789c4d76cee0d..a3624eb31e26e 100644 --- a/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp +++ b/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp @@ -11,9 +11,13 @@ //===----------------------------------------------------------------------===// #include "mlir/Conversion/ConvertToSPIRV/ConvertToSPIRVPass.h" +#include "mlir/Conversion/GPUCommon/GPUCommonPass.h" #include "mlir/Conversion/GPUToSPIRV/GPUToSPIRVPass.h" +#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/GPU/Transforms/Passes.h" +#include "mlir/Dialect/LLVMIR/Transforms/RequestCWrappers.h" #include "mlir/Dialect/MemRef/Transforms/Passes.h" #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h" #include "mlir/Dialect/SPIRV/Transforms/Passes.h" @@ -29,6 +33,9 @@ struct VulkanRunnerPipelineOptions Option spirvWebGPUPrepare{ *this, "spirv-webgpu-prepare", llvm::cl::desc("Run MLIR transforms used when targetting WebGPU")}; + Option toLlvm{*this, "to-llvm", + llvm::cl::desc("Run MLIR transforms to lower host code " + "to LLVM, intended for mlir-cpu-runner")}; }; void buildTestVulkanRunnerPipeline(OpPassManager &passManager, @@ -56,6 +63,19 @@ void buildTestVulkanRunnerPipeline(OpPassManager &passManager, spirvModulePM.addPass(spirv::createSPIRVWebGPUPreparePass()); passManager.addPass(createGpuModuleToBinaryPass()); + + if (options.toLlvm) { + passManager.addPass(createFinalizeMemRefToLLVMConversionPass()); + passManager.nest().addPass( + LLVM::createRequestCWrappersPass()); + // vulkan-runtime-wrappers.cpp uses the non-bare-pointer calling convention, + // and the type check is needed to prevent accidental ABI mismatches. + GpuToLLVMConversionPassOptions opt; + opt.hostBarePtrCallConv = false; + opt.kernelBarePtrCallConv = false; + opt.typeCheckKernelArgs = true; + passManager.addPass(createGpuToLLVMConversionPass(opt)); + } } } // namespace @@ -65,7 +85,7 @@ void registerTestVulkanRunnerPipeline() { PassPipelineRegistration( "test-vulkan-runner-pipeline", "Runs a series of passes for lowering GPU-dialect MLIR to " - "SPIR-V-dialect MLIR intended for mlir-vulkan-runner.", + "SPIR-V-dialect MLIR intended for mlir-vulkan-runner or mlir-cpu-runner.", buildTestVulkanRunnerPipeline); } } // namespace mlir::test diff --git a/mlir/test/mlir-vulkan-runner/addf.mlir b/mlir/test/mlir-vulkan-runner/addf.mlir index d435f75a28805..71f87a8b0d5c8 100644 --- a/mlir/test/mlir-vulkan-runner/addf.mlir +++ b/mlir/test/mlir-vulkan-runner/addf.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -test-vulkan-runner-pipeline \ -// RUN: | mlir-vulkan-runner - --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils --entry-point-result=void | FileCheck %s +// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \ +// RUN: | mlir-cpu-runner - --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils --entry-point-result=void | FileCheck %s // CHECK: [3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3] module attributes { diff --git a/mlir/test/mlir-vulkan-runner/addf_if.mlir b/mlir/test/mlir-vulkan-runner/addf_if.mlir index 8ae995c65e7e8..6fe51a83482dc 100644 --- a/mlir/test/mlir-vulkan-runner/addf_if.mlir +++ b/mlir/test/mlir-vulkan-runner/addf_if.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -test-vulkan-runner-pipeline \ -// RUN: | mlir-vulkan-runner - --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils --entry-point-result=void | FileCheck %s +// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \ +// RUN: | mlir-cpu-runner - --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils --entry-point-result=void | FileCheck %s // CHECK: [3.3, 3.3, 3.3, 3.3, 0, 0, 0, 0] module attributes { diff --git a/mlir/test/mlir-vulkan-runner/addui_extended.mlir b/mlir/test/mlir-vulkan-runner/addui_extended.mlir index b8db451421459..0894bc301f2e3 100644 --- a/mlir/test/mlir-vulkan-runner/addui_extended.mlir +++ b/mlir/test/mlir-vulkan-runner/addui_extended.mlir @@ -1,13 +1,13 @@ // Make sure that addition with carry produces expected results // with and without expansion to primitive add/cmp ops for WebGPU. -// RUN: mlir-opt %s -test-vulkan-runner-pipeline \ -// RUN: | mlir-vulkan-runner - \ +// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \ +// RUN: | mlir-cpu-runner - \ // RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ // RUN: --entry-point-result=void | FileCheck %s -// RUN: mlir-opt %s -test-vulkan-runner-pipeline=spirv-webgpu-prepare \ -// RUN: | mlir-vulkan-runner - \ +// RUN: mlir-opt %s -test-vulkan-runner-pipeline="spirv-webgpu-prepare to-llvm" \ +// RUN: | mlir-cpu-runner - \ // RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ // RUN: --entry-point-result=void | FileCheck %s diff --git a/mlir/test/mlir-vulkan-runner/smul_extended.mlir b/mlir/test/mlir-vulkan-runner/smul_extended.mlir index 334aec843e197..0ef86f46562e8 100644 --- a/mlir/test/mlir-vulkan-runner/smul_extended.mlir +++ b/mlir/test/mlir-vulkan-runner/smul_extended.mlir @@ -1,13 +1,13 @@ // Make sure that signed extended multiplication produces expected results // with and without expansion to primitive mul/add ops for WebGPU. -// RUN: mlir-opt %s -test-vulkan-runner-pipeline \ -// RUN: | mlir-vulkan-runner - \ +// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \ +// RUN: | mlir-cpu-runner - \ // RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ // RUN: --entry-point-result=void | FileCheck %s -// RUN: mlir-opt %s -test-vulkan-runner-pipeline=spirv-webgpu-prepare \ -// RUN: | mlir-vulkan-runner - \ +// RUN: mlir-opt %s -test-vulkan-runner-pipeline="spirv-webgpu-prepare to-llvm" \ +// RUN: | mlir-cpu-runner - \ // RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ // RUN: --entry-point-result=void | FileCheck %s diff --git a/mlir/test/mlir-vulkan-runner/time.mlir b/mlir/test/mlir-vulkan-runner/time.mlir index 6a0bfef36793b..f628447874238 100644 --- a/mlir/test/mlir-vulkan-runner/time.mlir +++ b/mlir/test/mlir-vulkan-runner/time.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -test-vulkan-runner-pipeline \ -// RUN: | mlir-vulkan-runner - --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils --entry-point-result=void | FileCheck %s +// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \ +// RUN: | mlir-cpu-runner - --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils --entry-point-result=void | FileCheck %s // CHECK: Compute shader execution time // CHECK: Command buffer submit time diff --git a/mlir/test/mlir-vulkan-runner/umul_extended.mlir b/mlir/test/mlir-vulkan-runner/umul_extended.mlir index 803b8c3d336d3..5936c808435c1 100644 --- a/mlir/test/mlir-vulkan-runner/umul_extended.mlir +++ b/mlir/test/mlir-vulkan-runner/umul_extended.mlir @@ -1,13 +1,13 @@ // Make sure that unsigned extended multiplication produces expected results // with and without expansion to primitive mul/add ops for WebGPU. -// RUN: mlir-opt %s -test-vulkan-runner-pipeline \ -// RUN: | mlir-vulkan-runner - \ +// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \ +// RUN: | mlir-cpu-runner - \ // RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ // RUN: --entry-point-result=void | FileCheck %s -// RUN: mlir-opt %s -test-vulkan-runner-pipeline=spirv-webgpu-prepare \ -// RUN: | mlir-vulkan-runner - \ +// RUN: mlir-opt %s -test-vulkan-runner-pipeline="spirv-webgpu-prepare to-llvm" \ +// RUN: | mlir-cpu-runner - \ // RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ // RUN: --entry-point-result=void | FileCheck %s diff --git a/mlir/test/mlir-vulkan-runner/vector-deinterleave.mlir b/mlir/test/mlir-vulkan-runner/vector-deinterleave.mlir index 097f3905949d8..ebeb19cd6bcc5 100644 --- a/mlir/test/mlir-vulkan-runner/vector-deinterleave.mlir +++ b/mlir/test/mlir-vulkan-runner/vector-deinterleave.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -test-vulkan-runner-pipeline \ -// RUN: | mlir-vulkan-runner - \ +// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \ +// RUN: | mlir-cpu-runner - \ // RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ // RUN: --entry-point-result=void | FileCheck %s diff --git a/mlir/test/mlir-vulkan-runner/vector-interleave.mlir b/mlir/test/mlir-vulkan-runner/vector-interleave.mlir index 5dd4abbd1fb19..9314baf9b39c7 100644 --- a/mlir/test/mlir-vulkan-runner/vector-interleave.mlir +++ b/mlir/test/mlir-vulkan-runner/vector-interleave.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -test-vulkan-runner-pipeline \ -// RUN: | mlir-vulkan-runner - \ +// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \ +// RUN: | mlir-cpu-runner - \ // RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ // RUN: --entry-point-result=void | FileCheck %s diff --git a/mlir/test/mlir-vulkan-runner/vector-shuffle.mlir b/mlir/test/mlir-vulkan-runner/vector-shuffle.mlir index be97b48b1812e..cf3e2c569426b 100644 --- a/mlir/test/mlir-vulkan-runner/vector-shuffle.mlir +++ b/mlir/test/mlir-vulkan-runner/vector-shuffle.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -test-vulkan-runner-pipeline \ -// RUN: | mlir-vulkan-runner - \ +// RUN: mlir-opt %s -test-vulkan-runner-pipeline=to-llvm \ +// RUN: | mlir-cpu-runner - \ // RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ // RUN: --entry-point-result=void | FileCheck %s diff --git a/mlir/tools/mlir-vulkan-runner/vulkan-runtime-wrappers.cpp b/mlir/tools/mlir-vulkan-runner/vulkan-runtime-wrappers.cpp index f1ed571734459..ffd1114cec6aa 100644 --- a/mlir/tools/mlir-vulkan-runner/vulkan-runtime-wrappers.cpp +++ b/mlir/tools/mlir-vulkan-runner/vulkan-runtime-wrappers.cpp @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include "VulkanRuntime.h" @@ -26,6 +28,38 @@ namespace { +class VulkanModule; + +// Class to be a thing that can be returned from `mgpuModuleGetFunction`. +struct VulkanFunction { + VulkanModule *module; + std::string name; + + VulkanFunction(VulkanModule *module, const char *name) + : module(module), name(name) {} +}; + +// Class to own a copy of the SPIR-V provided to `mgpuModuleLoad` and to manage +// allocation of pointers returned from `mgpuModuleGetFunction`. +class VulkanModule { +public: + VulkanModule(const uint8_t *ptr, size_t sizeInBytes) + : blob(ptr, ptr + sizeInBytes) {} + ~VulkanModule() = default; + + VulkanFunction *getFunction(const char *name) { + return functions.emplace_back(std::make_unique(this, name)) + .get(); + } + + uint8_t *blobData() { return blob.data(); } + size_t blobSizeInBytes() const { return blob.size(); } + +private: + std::vector blob; + std::vector> functions; +}; + class VulkanRuntimeManager { public: VulkanRuntimeManager() = default; @@ -91,6 +125,94 @@ void bindMemRef(void *vkRuntimeManager, DescriptorSetIndex setIndex, } extern "C" { + +//===----------------------------------------------------------------------===// +// +// New wrappers, intended for mlir-cpu-runner. Calls to these are generated by +// GPUToLLVMConversionPass. +// +//===----------------------------------------------------------------------===// + +VULKAN_WRAPPER_SYMBOL_EXPORT void *mgpuStreamCreate() { + return new VulkanRuntimeManager(); +} + +VULKAN_WRAPPER_SYMBOL_EXPORT void mgpuStreamDestroy(void *vkRuntimeManager) { + delete static_cast(vkRuntimeManager); +} + +VULKAN_WRAPPER_SYMBOL_EXPORT void mgpuStreamSynchronize(void *) { + // Currently a no-op as the other operations are synchronous. +} + +VULKAN_WRAPPER_SYMBOL_EXPORT void *mgpuModuleLoad(const void *data, + size_t gpuBlobSize) { + // gpuBlobSize is the size of the data in bytes. + return new VulkanModule(static_cast(data), gpuBlobSize); +} + +VULKAN_WRAPPER_SYMBOL_EXPORT void mgpuModuleUnload(void *vkModule) { + delete static_cast(vkModule); +} + +VULKAN_WRAPPER_SYMBOL_EXPORT void *mgpuModuleGetFunction(void *vkModule, + const char *name) { + if (!vkModule) + abort(); + return static_cast(vkModule)->getFunction(name); +} + +VULKAN_WRAPPER_SYMBOL_EXPORT void +mgpuLaunchKernel(void *vkKernel, size_t gridX, size_t gridY, size_t gridZ, + size_t /*blockX*/, size_t /*blockY*/, size_t /*blockZ*/, + size_t /*smem*/, void *vkRuntimeManager, void **params, + void ** /*extra*/, size_t paramsCount) { + auto manager = static_cast(vkRuntimeManager); + + // The non-bare-pointer memref ABI interacts badly with mgpuLaunchKernel's + // signature: + // - The memref descriptor struct gets split into several elements, each + // passed as their own "param". + // - No metadata is provided as to the rank or element type/size of a memref. + // Here we assume that all MemRefs have rank 1 and an element size of + // 4 bytes. This means each descriptor struct will have five members. + // TODO(https://github.com/llvm/llvm-project/issues/73457): Refactor the + // ABI/API of mgpuLaunchKernel to use a different ABI for memrefs, so + // that other memref types can also be used. This will allow migrating + // the remaining tests and removal of mlir-vulkan-runner. + const size_t paramsPerMemRef = 5; + if (paramsCount % paramsPerMemRef != 0) { + abort(); + } + const DescriptorSetIndex setIndex = 0; + BindingIndex bindIndex = 0; + for (size_t i = 0; i < paramsCount; i += paramsPerMemRef) { + auto memref = static_cast *>(params[i]); + bindMemRef(manager, setIndex, bindIndex, memref); + ++bindIndex; + } + + manager->setNumWorkGroups(NumWorkGroups{static_cast(gridX), + static_cast(gridY), + static_cast(gridZ)}); + + auto function = static_cast(vkKernel); + // Expected size should be in bytes. + manager->setShaderModule( + function->module->blobData(), + static_cast(function->module->blobSizeInBytes())); + manager->setEntryPoint(function->name.c_str()); + + manager->runOnVulkan(); +} + +//===----------------------------------------------------------------------===// +// +// Old wrappers, intended for mlir-vulkan-runner. Calls to these are generated +// by LaunchFuncToVulkanCallsPass. +// +//===----------------------------------------------------------------------===// + /// Initializes `VulkanRuntimeManager` and returns a pointer to it. VULKAN_WRAPPER_SYMBOL_EXPORT void *initVulkan() { return new VulkanRuntimeManager();