Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions mlir/include/mlir/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ void populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter,
RewritePatternSet &patterns,
amdgpu::Chipset chipset);

namespace amdgpu {
/// Remap common GPU memory spaces (Workgroup, Private, etc) to LLVM address
/// spaces.
void populateCommonAMDGPUTypeAndAttributeConversions(
TypeConverter &typeConverter);
void populateCommonGPUTypeAndAttributeConversions(TypeConverter &typeConverter);
} // namespace amdgpu

/// Remap AMDGPU memory spaces to LLVM address spaces
/// by mapping amdgpu::AddressSpace::fat_raw_buffer to ptr addrspace(7),
Expand Down
11 changes: 9 additions & 2 deletions mlir/include/mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@ namespace mlir {

class Attribute;
class LLVMTypeConverter;
class MemRefType;
class MLIRContext;
class RewritePatternSet;
class MemRefType;
class Pass;
class RewritePatternSet;
class TypeConverter;

#define GEN_PASS_DECL_CONVERTNVGPUTONVVMPASS
#include "mlir/Conversion/Passes.h.inc"
Expand All @@ -34,6 +35,12 @@ MemRefType getMBarrierMemrefType(MLIRContext *context,
MBarrierGroupType barrierType);
} // namespace nvgpu

namespace nvgpu {
/// Remap common GPU memory spaces (Workgroup, Private, etc) to LLVM address
/// spaces.
void populateCommonGPUTypeAndAttributeConversions(TypeConverter &typeConverter);
} // namespace nvgpu

void populateNVGPUToNVVMConversionPatterns(const LLVMTypeConverter &converter,
RewritePatternSet &patterns);
} // namespace mlir
Expand Down
4 changes: 2 additions & 2 deletions mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2998,7 +2998,7 @@ struct ConvertAMDGPUToROCDLPass
LLVMTypeConverter converter(ctx);

populateAMDGPUToROCDLConversionPatterns(converter, patterns, *maybeChipset);
populateCommonAMDGPUTypeAndAttributeConversions(converter);
amdgpu::populateCommonGPUTypeAndAttributeConversions(converter);
LLVMConversionTarget target(getContext());
target.addIllegalDialect<::mlir::amdgpu::AMDGPUDialect>();
target.addLegalDialect<::mlir::LLVM::LLVMDialect>();
Expand All @@ -3010,7 +3010,7 @@ struct ConvertAMDGPUToROCDLPass
};
} // namespace

void mlir::populateCommonAMDGPUTypeAndAttributeConversions(
void mlir::amdgpu::populateCommonGPUTypeAndAttributeConversions(
TypeConverter &typeConverter) {
populateGpuMemorySpaceAttributeConversions(
typeConverter, [](gpu::AddressSpace space) {
Expand Down
1 change: 1 addition & 0 deletions mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ add_mlir_conversion_library(MLIRGPUToNVVMTransforms
MLIRLLVMDialect
MLIRMemRefToLLVM
MLIRNVGPUDialect
MLIRNVGPUToNVVM
MLIRNVVMDialect
MLIRPass
MLIRTransformUtils
Expand Down
20 changes: 3 additions & 17 deletions mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
#include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
Expand Down Expand Up @@ -446,23 +447,8 @@ void mlir::configureGpuToNVVMConversionLegality(ConversionTarget &target) {
}

void mlir::configureGpuToNVVMTypeConverter(LLVMTypeConverter &converter) {
// NVVM uses alloca in the default address space to represent private
// memory allocations, so drop private annotations. NVVM uses address
// space 3 for shared memory. NVVM uses the default address space to
// represent global memory.
populateGpuMemorySpaceAttributeConversions(
converter, [](gpu::AddressSpace space) -> unsigned {
switch (space) {
case gpu::AddressSpace::Global:
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Global);
case gpu::AddressSpace::Workgroup:
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared);
case gpu::AddressSpace::Private:
return 0;
}
llvm_unreachable("unknown address space enum value");
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Generic);
});
nvgpu::populateCommonGPUTypeAndAttributeConversions(converter);

// Lowering for MMAMatrixType.
converter.addConversion([&](gpu::MMAMatrixType type) -> Type {
return convertMMAToLLVMType(type);
Expand Down
2 changes: 1 addition & 1 deletion mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ struct LowerGpuOpsToROCDLOpsPass final
}

LLVMTypeConverter converter(ctx, options);
populateCommonAMDGPUTypeAndAttributeConversions(converter);
amdgpu::populateCommonGPUTypeAndAttributeConversions(converter);

RewritePatternSet llvmPatterns(ctx);
LLVMConversionTarget target(getContext());
Expand Down
35 changes: 22 additions & 13 deletions mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,19 +401,8 @@ struct ConvertNVGPUToNVVMPass
RewritePatternSet patterns(&getContext());
LLVMTypeConverter converter(&getContext(), options);
IRRewriter rewriter(&getContext());
populateGpuMemorySpaceAttributeConversions(
converter, [](gpu::AddressSpace space) -> unsigned {
switch (space) {
case gpu::AddressSpace::Global:
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Global);
case gpu::AddressSpace::Workgroup:
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared);
case gpu::AddressSpace::Private:
return 0;
}
llvm_unreachable("unknown address space enum value");
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Generic);
});
nvgpu::populateCommonGPUTypeAndAttributeConversions(converter);

/// device-side async tokens cannot be materialized in nvvm. We just
/// convert them to a dummy i32 type in order to easily drop them during
/// conversion.
Expand Down Expand Up @@ -1719,6 +1708,26 @@ struct NVGPURcpOpLowering : public ConvertOpToLLVMPattern<nvgpu::RcpOp> {
};
} // namespace

void mlir::nvgpu::populateCommonGPUTypeAndAttributeConversions(
TypeConverter &typeConverter) {
// NVVM uses alloca in the default address space to represent private
// memory allocations, so drop private annotations. NVVM uses address
// space 3 for shared memory. NVVM uses the default address space to
// represent global memory.
populateGpuMemorySpaceAttributeConversions(
typeConverter, [](gpu::AddressSpace space) -> unsigned {
switch (space) {
case gpu::AddressSpace::Global:
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Global);
case gpu::AddressSpace::Workgroup:
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared);
case gpu::AddressSpace::Private:
return 0;
}
llvm_unreachable("unknown address space enum value");
});
}

void mlir::populateNVGPUToNVVMConversionPatterns(
const LLVMTypeConverter &converter, RewritePatternSet &patterns) {
patterns.add<
Expand Down
23 changes: 3 additions & 20 deletions mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
#include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h"
#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
#include "mlir/Dialect/AMDGPU/Utils/Chipset.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
Expand Down Expand Up @@ -62,25 +63,7 @@ using namespace mlir::transform::gpu;
void transform::ApplyGPUToNVVMConversionPatternsOp::populatePatterns(
TypeConverter &typeConverter, RewritePatternSet &patterns) {
auto &llvmTypeConverter = static_cast<LLVMTypeConverter &>(typeConverter);
// NVVM uses alloca in the default address space to represent private
// memory allocations, so drop private annotations. NVVM uses address
// space 3 for shared memory. NVVM uses the default address space to
// represent global memory.
// Used in populateGpuToNVVMConversionPatternsso attaching here for now.
// TODO: We should have a single to_nvvm_type_converter.
populateGpuMemorySpaceAttributeConversions(
llvmTypeConverter, [](AddressSpace space) -> unsigned {
switch (space) {
case AddressSpace::Global:
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Global);
case AddressSpace::Workgroup:
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared);
case AddressSpace::Private:
return 0;
}
llvm_unreachable("unknown address space enum value");
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Generic);
});
nvgpu::populateCommonGPUTypeAndAttributeConversions(llvmTypeConverter);
// Used in GPUToNVVM/WmmaOpsToNvvm.cpp so attaching here for now.
// TODO: We should have a single to_nvvm_type_converter.
llvmTypeConverter.addConversion(
Expand Down Expand Up @@ -129,7 +112,7 @@ LogicalResult transform::ApplyGPUSubgroupReduceToNVVMConversionPatternsOp::
void transform::ApplyGPUToROCDLConversionPatternsOp::populatePatterns(
TypeConverter &typeConverter, RewritePatternSet &patterns) {
auto &llvmTypeConverter = static_cast<LLVMTypeConverter &>(typeConverter);
populateCommonAMDGPUTypeAndAttributeConversions(llvmTypeConverter);
amdgpu::populateCommonGPUTypeAndAttributeConversions(llvmTypeConverter);
FailureOr<amdgpu::Chipset> maybeChipset =
amdgpu::Chipset::parse(getChipset());
assert(llvm::succeeded(maybeChipset) && "expected valid chipset");
Expand Down
14 changes: 1 addition & 13 deletions mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,7 @@ void ApplyNVGPUToNVVMConversionPatternsOp::populatePatterns(
/// device-side async tokens cannot be materialized in nvvm. We just
/// convert them to a dummy i32 type in order to easily drop them during
/// conversion.
populateGpuMemorySpaceAttributeConversions(
llvmTypeConverter, [](gpu::AddressSpace space) -> unsigned {
switch (space) {
case gpu::AddressSpace::Global:
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Global);
case gpu::AddressSpace::Workgroup:
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared);
case gpu::AddressSpace::Private:
return 0;
}
llvm_unreachable("unknown address space enum value");
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Generic);
});
nvgpu::populateCommonGPUTypeAndAttributeConversions(llvmTypeConverter);
llvmTypeConverter.addConversion([&](DeviceAsyncTokenType type) -> Type {
return llvmTypeConverter.convertType(
IntegerType::get(type.getContext(), 32));
Expand Down