@@ -3218,11 +3218,6 @@ struct AMDGPULowerDescriptor : public ConvertOpToLLVMPattern<DescriptorOp> {
32183218
32193219 Location loc = op.getLoc ();
32203220
3221- IntegerType i32 = rewriter.getI32Type ();
3222- [[maybe_unused]] Type v4i32 =
3223- this ->typeConverter ->convertType (VectorType::get (4 , i32 ));
3224- assert (v4i32 && " expected type conversion to succeed" );
3225-
32263221 SmallVector<Value> consts;
32273222 for (int64_t i = 0 ; i < 8 ; ++i)
32283223 consts.push_back (createI32Constant (rewriter, loc, i));
@@ -3237,6 +3232,32 @@ struct AMDGPULowerDescriptor : public ConvertOpToLLVMPattern<DescriptorOp> {
32373232 }
32383233};
32393234
3235+ template <typename SourceOp, typename TargetOp>
3236+ struct AMDGPUTensorLoadStoreOpLowering
3237+ : public ConvertOpToLLVMPattern<SourceOp> {
3238+ using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern;
3239+ using Adaptor = typename ConvertOpToLLVMPattern<SourceOp>::OneToNOpAdaptor;
3240+ AMDGPUTensorLoadStoreOpLowering (const LLVMTypeConverter &converter,
3241+ Chipset chipset)
3242+ : ConvertOpToLLVMPattern<SourceOp>(converter), chipset(chipset) {}
3243+ Chipset chipset;
3244+
3245+ LogicalResult
3246+ matchAndRewrite (SourceOp op, Adaptor adaptor,
3247+ ConversionPatternRewriter &rewriter) const override {
3248+ if (chipset < kGfx1250 )
3249+ return op->emitOpError (" is only supported on gfx1250" );
3250+
3251+ ValueRange desc = adaptor.getDesc ();
3252+ rewriter.replaceOpWithNewOp <TargetOp>(op, desc[0 ], desc[1 ], desc[2 ],
3253+ desc[3 ], /* cachePolicy=*/ 0 ,
3254+ /* alias_scopes=*/ nullptr ,
3255+ /* noalias_scopes=*/ nullptr ,
3256+ /* tbaa=*/ nullptr );
3257+ return success ();
3258+ }
3259+ };
3260+
32403261struct ConvertAMDGPUToROCDLPass
32413262 : public impl::ConvertAMDGPUToROCDLPassBase<ConvertAMDGPUToROCDLPass> {
32423263 using Base::Base;
@@ -3306,6 +3327,33 @@ void mlir::populateAMDGPUTypeAndAttributeConversions(
33063327 Type i32 = IntegerType::get (type.getContext (), 32 );
33073328 return typeConverter.convertType (VectorType::get (4 , i32 ));
33083329 });
3330+ typeConverter.addConversion (
3331+ [&](TDMDescriptorType type,
3332+ SmallVectorImpl<Type> &result) -> std::optional<LogicalResult> {
3333+ Type i32 = IntegerType::get (type.getContext (), 32 );
3334+ Type v4i32 = typeConverter.convertType (VectorType::get (4 , i32 ));
3335+ Type v8i32 = typeConverter.convertType (VectorType::get (8 , i32 ));
3336+ llvm::append_values (result, v4i32, v8i32, v4i32, v4i32);
3337+ return success ();
3338+ });
3339+
3340+ auto addUnrealizedCast = [](OpBuilder &builder, TypeRange types,
3341+ ValueRange inputs,
3342+ Location loc) -> SmallVector<Value> {
3343+ // Only create unrealized_conversion_cast for TDMDescriptorType.
3344+ // All other types which are not expected, should be
3345+ // materialized by other target materialization functions.
3346+ if (inputs.size () != 1 )
3347+ return {};
3348+
3349+ if (!isa<TDMDescriptorType>(inputs[0 ].getType ()))
3350+ return {};
3351+
3352+ auto cast = UnrealizedConversionCastOp::create (builder, loc, types, inputs);
3353+ return cast.getResults ();
3354+ };
3355+
3356+ typeConverter.addTargetMaterialization (addUnrealizedCast);
33093357}
33103358
33113359void mlir::populateAMDGPUToROCDLConversionPatterns (LLVMTypeConverter &converter,
@@ -3336,7 +3384,11 @@ void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter,
33363384 AMDGPUMakeDmaBaseLowering<MakeDmaBaseOp>,
33373385 AMDGPUMakeDmaBaseLowering<MakeGatherDmaBaseOp>,
33383386 AMDGPULowerDescriptor<MakeDmaDescriptorOp>,
3339- AMDGPULowerDescriptor<MakeGatherDmaDescriptorOp>>(converter,
3340- chipset);
3387+ AMDGPULowerDescriptor<MakeGatherDmaDescriptorOp>,
3388+ AMDGPUTensorLoadStoreOpLowering<TensorLoadToLDSOp,
3389+ ROCDL::TensorLoadToLDSOp>,
3390+ AMDGPUTensorLoadStoreOpLowering<TensorStoreFromLDSOp,
3391+ ROCDL::TensorStoreFromLDSOp>>(
3392+ converter, chipset);
33413393 patterns.add <AMDGPUSwizzleBitModeLowering>(converter);
33423394}
0 commit comments