diff --git a/mlir/include/mlir/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.h b/mlir/include/mlir/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.h index 4942c39f9745f..393658652dbac 100644 --- a/mlir/include/mlir/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.h +++ b/mlir/include/mlir/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.h @@ -29,10 +29,11 @@ void populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns, amdgpu::Chipset chipset); +namespace amdgpu { /// Remap common GPU memory spaces (Workgroup, Private, etc) to LLVM address /// spaces. -void populateCommonAMDGPUTypeAndAttributeConversions( - TypeConverter &typeConverter); +void populateCommonGPUTypeAndAttributeConversions(TypeConverter &typeConverter); +} // namespace amdgpu /// Remap AMDGPU memory spaces to LLVM address spaces /// by mapping amdgpu::AddressSpace::fat_raw_buffer to ptr addrspace(7), diff --git a/mlir/include/mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h b/mlir/include/mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h index ee5b8cecb529c..bcdf58aec4bb9 100644 --- a/mlir/include/mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h +++ b/mlir/include/mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h @@ -14,10 +14,11 @@ namespace mlir { class Attribute; class LLVMTypeConverter; -class MemRefType; class MLIRContext; -class RewritePatternSet; +class MemRefType; class Pass; +class RewritePatternSet; +class TypeConverter; #define GEN_PASS_DECL_CONVERTNVGPUTONVVMPASS #include "mlir/Conversion/Passes.h.inc" @@ -34,6 +35,12 @@ MemRefType getMBarrierMemrefType(MLIRContext *context, MBarrierGroupType barrierType); } // namespace nvgpu +namespace nvgpu { +/// Remap common GPU memory spaces (Workgroup, Private, etc) to LLVM address +/// spaces. +void populateCommonGPUTypeAndAttributeConversions(TypeConverter &typeConverter); +} // namespace nvgpu + void populateNVGPUToNVVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns); } // namespace mlir diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index 455e59c4a272a..af825ad42dd82 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -2998,7 +2998,7 @@ struct ConvertAMDGPUToROCDLPass LLVMTypeConverter converter(ctx); populateAMDGPUToROCDLConversionPatterns(converter, patterns, *maybeChipset); - populateCommonAMDGPUTypeAndAttributeConversions(converter); + amdgpu::populateCommonGPUTypeAndAttributeConversions(converter); LLVMConversionTarget target(getContext()); target.addIllegalDialect<::mlir::amdgpu::AMDGPUDialect>(); target.addLegalDialect<::mlir::LLVM::LLVMDialect>(); @@ -3010,7 +3010,7 @@ struct ConvertAMDGPUToROCDLPass }; } // namespace -void mlir::populateCommonAMDGPUTypeAndAttributeConversions( +void mlir::amdgpu::populateCommonGPUTypeAndAttributeConversions( TypeConverter &typeConverter) { populateGpuMemorySpaceAttributeConversions( typeConverter, [](gpu::AddressSpace space) { diff --git a/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt b/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt index 1debf4e770569..983aadf2c1517 100644 --- a/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt +++ b/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt @@ -19,6 +19,7 @@ add_mlir_conversion_library(MLIRGPUToNVVMTransforms MLIRLLVMDialect MLIRMemRefToLLVM MLIRNVGPUDialect + MLIRNVGPUToNVVM MLIRNVVMDialect MLIRPass MLIRTransformUtils diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 5848489274c13..2561ca00d4b4f 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -19,6 +19,7 @@ #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/LoweringOptions.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" @@ -446,23 +447,8 @@ void mlir::configureGpuToNVVMConversionLegality(ConversionTarget &target) { } void mlir::configureGpuToNVVMTypeConverter(LLVMTypeConverter &converter) { - // NVVM uses alloca in the default address space to represent private - // memory allocations, so drop private annotations. NVVM uses address - // space 3 for shared memory. NVVM uses the default address space to - // represent global memory. - populateGpuMemorySpaceAttributeConversions( - converter, [](gpu::AddressSpace space) -> unsigned { - switch (space) { - case gpu::AddressSpace::Global: - return static_cast(NVVM::NVVMMemorySpace::Global); - case gpu::AddressSpace::Workgroup: - return static_cast(NVVM::NVVMMemorySpace::Shared); - case gpu::AddressSpace::Private: - return 0; - } - llvm_unreachable("unknown address space enum value"); - return static_cast(NVVM::NVVMMemorySpace::Generic); - }); + nvgpu::populateCommonGPUTypeAndAttributeConversions(converter); + // Lowering for MMAMatrixType. converter.addConversion([&](gpu::MMAMatrixType type) -> Type { return convertMMAToLLVMType(type); diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 7d7feb58aa726..51741414d2060 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -349,7 +349,7 @@ struct LowerGpuOpsToROCDLOpsPass final } LLVMTypeConverter converter(ctx, options); - populateCommonAMDGPUTypeAndAttributeConversions(converter); + amdgpu::populateCommonGPUTypeAndAttributeConversions(converter); RewritePatternSet llvmPatterns(ctx); LLVMConversionTarget target(getContext()); diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp index 64a7f562af0e5..6edc8f5c86dd3 100644 --- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp +++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp @@ -401,19 +401,8 @@ struct ConvertNVGPUToNVVMPass RewritePatternSet patterns(&getContext()); LLVMTypeConverter converter(&getContext(), options); IRRewriter rewriter(&getContext()); - populateGpuMemorySpaceAttributeConversions( - converter, [](gpu::AddressSpace space) -> unsigned { - switch (space) { - case gpu::AddressSpace::Global: - return static_cast(NVVM::NVVMMemorySpace::Global); - case gpu::AddressSpace::Workgroup: - return static_cast(NVVM::NVVMMemorySpace::Shared); - case gpu::AddressSpace::Private: - return 0; - } - llvm_unreachable("unknown address space enum value"); - return static_cast(NVVM::NVVMMemorySpace::Generic); - }); + nvgpu::populateCommonGPUTypeAndAttributeConversions(converter); + /// device-side async tokens cannot be materialized in nvvm. We just /// convert them to a dummy i32 type in order to easily drop them during /// conversion. @@ -1719,6 +1708,26 @@ struct NVGPURcpOpLowering : public ConvertOpToLLVMPattern { }; } // namespace +void mlir::nvgpu::populateCommonGPUTypeAndAttributeConversions( + TypeConverter &typeConverter) { + // NVVM uses alloca in the default address space to represent private + // memory allocations, so drop private annotations. NVVM uses address + // space 3 for shared memory. NVVM uses the default address space to + // represent global memory. + populateGpuMemorySpaceAttributeConversions( + typeConverter, [](gpu::AddressSpace space) -> unsigned { + switch (space) { + case gpu::AddressSpace::Global: + return static_cast(NVVM::NVVMMemorySpace::Global); + case gpu::AddressSpace::Workgroup: + return static_cast(NVVM::NVVMMemorySpace::Shared); + case gpu::AddressSpace::Private: + return 0; + } + llvm_unreachable("unknown address space enum value"); + }); +} + void mlir::populateNVGPUToNVVMConversionPatterns( const LLVMTypeConverter &converter, RewritePatternSet &patterns) { patterns.add< diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp index fdace3b662314..a95cc1f10b6cf 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp +++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp @@ -13,6 +13,7 @@ #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" #include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h" #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h" #include "mlir/Dialect/AMDGPU/Utils/Chipset.h" #include "mlir/Dialect/Arith/IR/Arith.h" @@ -62,25 +63,7 @@ using namespace mlir::transform::gpu; void transform::ApplyGPUToNVVMConversionPatternsOp::populatePatterns( TypeConverter &typeConverter, RewritePatternSet &patterns) { auto &llvmTypeConverter = static_cast(typeConverter); - // NVVM uses alloca in the default address space to represent private - // memory allocations, so drop private annotations. NVVM uses address - // space 3 for shared memory. NVVM uses the default address space to - // represent global memory. - // Used in populateGpuToNVVMConversionPatternsso attaching here for now. - // TODO: We should have a single to_nvvm_type_converter. - populateGpuMemorySpaceAttributeConversions( - llvmTypeConverter, [](AddressSpace space) -> unsigned { - switch (space) { - case AddressSpace::Global: - return static_cast(NVVM::NVVMMemorySpace::Global); - case AddressSpace::Workgroup: - return static_cast(NVVM::NVVMMemorySpace::Shared); - case AddressSpace::Private: - return 0; - } - llvm_unreachable("unknown address space enum value"); - return static_cast(NVVM::NVVMMemorySpace::Generic); - }); + nvgpu::populateCommonGPUTypeAndAttributeConversions(llvmTypeConverter); // Used in GPUToNVVM/WmmaOpsToNvvm.cpp so attaching here for now. // TODO: We should have a single to_nvvm_type_converter. llvmTypeConverter.addConversion( @@ -129,7 +112,7 @@ LogicalResult transform::ApplyGPUSubgroupReduceToNVVMConversionPatternsOp:: void transform::ApplyGPUToROCDLConversionPatternsOp::populatePatterns( TypeConverter &typeConverter, RewritePatternSet &patterns) { auto &llvmTypeConverter = static_cast(typeConverter); - populateCommonAMDGPUTypeAndAttributeConversions(llvmTypeConverter); + amdgpu::populateCommonGPUTypeAndAttributeConversions(llvmTypeConverter); FailureOr maybeChipset = amdgpu::Chipset::parse(getChipset()); assert(llvm::succeeded(maybeChipset) && "expected valid chipset"); diff --git a/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp b/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp index 0d053139e79de..4e6b8ea43e698 100644 --- a/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp +++ b/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp @@ -49,19 +49,7 @@ void ApplyNVGPUToNVVMConversionPatternsOp::populatePatterns( /// device-side async tokens cannot be materialized in nvvm. We just /// convert them to a dummy i32 type in order to easily drop them during /// conversion. - populateGpuMemorySpaceAttributeConversions( - llvmTypeConverter, [](gpu::AddressSpace space) -> unsigned { - switch (space) { - case gpu::AddressSpace::Global: - return static_cast(NVVM::NVVMMemorySpace::Global); - case gpu::AddressSpace::Workgroup: - return static_cast(NVVM::NVVMMemorySpace::Shared); - case gpu::AddressSpace::Private: - return 0; - } - llvm_unreachable("unknown address space enum value"); - return static_cast(NVVM::NVVMMemorySpace::Generic); - }); + nvgpu::populateCommonGPUTypeAndAttributeConversions(llvmTypeConverter); llvmTypeConverter.addConversion([&](DeviceAsyncTokenType type) -> Type { return llvmTypeConverter.convertType( IntegerType::get(type.getContext(), 32));