diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index cc525d703ae57..710aed5031f5b 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -294,19 +294,22 @@ struct CUFAllocOpConversion : public mlir::OpRewritePattern { matchAndRewrite(cuf::AllocOp op, mlir::PatternRewriter &rewriter) const override { + mlir::Location loc = op.getLoc(); + if (inDeviceContext(op.getOperation())) { // In device context just replace the cuf.alloc operation with a fir.alloc // the cuf.free will be removed. - rewriter.replaceOpWithNewOp( - op, op.getInType(), op.getUniqName() ? *op.getUniqName() : "", + auto allocaOp = rewriter.create( + loc, op.getInType(), op.getUniqName() ? *op.getUniqName() : "", op.getBindcName() ? *op.getBindcName() : "", op.getTypeparams(), op.getShape()); + allocaOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); + rewriter.replaceOp(op, allocaOp); return mlir::success(); } auto mod = op->getParentOfType(); fir::FirOpBuilder builder(rewriter, mod); - mlir::Location loc = op.getLoc(); mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); if (!mlir::dyn_cast_or_null(op.getInType())) { @@ -359,6 +362,7 @@ struct CUFAllocOpConversion : public mlir::OpRewritePattern { llvm::SmallVector args{fir::runtime::createArguments( builder, loc, fTy, bytes, memTy, sourceFile, sourceLine)}; auto callOp = builder.create(loc, func, args); + callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); auto convOp = builder.createConvert(loc, op.getResult().getType(), callOp.getResult(0)); rewriter.replaceOp(op, convOp); @@ -381,6 +385,7 @@ struct CUFAllocOpConversion : public mlir::OpRewritePattern { llvm::SmallVector args{fir::runtime::createArguments( builder, loc, fTy, sizeInBytes, sourceFile, sourceLine)}; auto callOp = builder.create(loc, func, args); + callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); auto convOp = builder.createConvert(loc, op.getResult().getType(), callOp.getResult(0)); rewriter.replaceOp(op, convOp); @@ -508,7 +513,8 @@ struct CUFFreeOpConversion : public mlir::OpRewritePattern { fir::factory::locationToLineNo(builder, loc, fTy.getInput(2)); llvm::SmallVector args{fir::runtime::createArguments( builder, loc, fTy, op.getDevptr(), sourceFile, sourceLine)}; - builder.create(loc, func, args); + auto callOp = builder.create(loc, func, args); + callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); rewriter.eraseOp(op); return mlir::success(); } diff --git a/flang/test/Fir/CUDA/cuda-alloc-free.fir b/flang/test/Fir/CUDA/cuda-alloc-free.fir index 6194f0071cd79..31f2ed022b6c4 100644 --- a/flang/test/Fir/CUDA/cuda-alloc-free.fir +++ b/flang/test/Fir/CUDA/cuda-alloc-free.fir @@ -11,7 +11,7 @@ func.func @_QPsub1() { // CHECK-LABEL: func.func @_QPsub1() // CHECK: %[[BYTES:.*]] = fir.convert %c4{{.*}} : (index) -> i64 -// CHECK: %[[ALLOC:.*]] = fir.call @_FortranACUFMemAlloc(%[[BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref, i32) -> !fir.llvm_ptr +// CHECK: %[[ALLOC:.*]] = fir.call @_FortranACUFMemAlloc(%[[BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda} : (i64, i32, !fir.ref, i32) -> !fir.llvm_ptr // CHECK: %[[CONV:.*]] = fir.convert %3 : (!fir.llvm_ptr) -> !fir.ref // CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[CONV]] {data_attr = #cuf.cuda, uniq_name = "_QFsub1Eidev"} : (!fir.ref) -> (!fir.ref, !fir.ref) // CHECK: %[[DEVPTR:.*]] = fir.convert %[[DECL]]#1 : (!fir.ref) -> !fir.llvm_ptr @@ -26,7 +26,7 @@ func.func @_QPsub2() { // CHECK-LABEL: func.func @_QPsub2() // CHECK: %[[BYTES:.*]] = arith.muli %c10{{.*}}, %c4{{.*}} : index // CHECK: %[[CONV_BYTES:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: %{{.*}} = fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref, i32) -> !fir.llvm_ptr +// CHECK: %{{.*}} = fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda} : (i64, i32, !fir.ref, i32) -> !fir.llvm_ptr // CHECK: fir.call @_FortranACUFMemFree func.func @_QPsub3(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref {fir.bindc_name = "m"}) { @@ -58,7 +58,7 @@ func.func @_QPsub3(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref< // CHECK: %[[NBELEM:.*]] = arith.muli %[[N]], %[[M]] : index // CHECK: %[[BYTES:.*]] = arith.muli %[[NBELEM]], %c4{{.*}} : index // CHECK: %[[CONV_BYTES:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: %{{.*}} = fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref, i32) -> !fir.llvm_ptr +// CHECK: %{{.*}} = fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda} : (i64, i32, !fir.ref, i32) -> !fir.llvm_ptr // CHECK: fir.call @_FortranACUFMemFree func.func @_QPtest_type() { @@ -71,7 +71,7 @@ func.func @_QPtest_type() { // CHECK-LABEL: func.func @_QPtest_type() // CHECK: %[[BYTES:.*]] = arith.constant 12 : index // CHECK: %[[CONV_BYTES:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref, i32) -> !fir.llvm_ptr +// CHECK: fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda} : (i64, i32, !fir.ref, i32) -> !fir.llvm_ptr gpu.module @cuda_device_mod { gpu.func @_QMalloc() kernel { @@ -81,7 +81,7 @@ gpu.module @cuda_device_mod { } // CHECK-LABEL: gpu.func @_QMalloc() kernel -// CHECK: fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QMallocEa"} +// CHECK: fir.alloca !fir.box>> {bindc_name = "a", cuf.data_attr = #cuf.cuda, uniq_name = "_QMallocEa"} func.func @_QQalloc_char() attributes {fir.bindc_name = "alloc_char"} { %c1 = arith.constant 1 : index @@ -92,6 +92,6 @@ func.func @_QQalloc_char() attributes {fir.bindc_name = "alloc_char"} { // CHECK-LABEL: func.func @_QQalloc_char() // CHECK: %[[BYTES:.*]] = arith.muli %c10{{.*}}, %c1{{.*}} : index // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFMemAlloc(%[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref, i32) -> !fir.llvm_ptr +// CHECK: fir.call @_FortranACUFMemAlloc(%[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda} : (i64, i32, !fir.ref, i32) -> !fir.llvm_ptr } // end module diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir index b8457b846716e..08573110821cc 100644 --- a/flang/test/Fir/CUDA/cuda-allocate.fir +++ b/flang/test/Fir/CUDA/cuda-allocate.fir @@ -15,7 +15,7 @@ func.func @_QPsub1() { } // CHECK-LABEL: func.func @_QPsub1() -// CHECK: %[[DESC_RT_CALL:.*]] = fir.call @_FortranACUFAllocDescriptor(%{{.*}}, %{{.*}}, %{{.*}}) : (i64, !fir.ref, i32) -> !fir.ref> +// CHECK: %[[DESC_RT_CALL:.*]] = fir.call @_FortranACUFAllocDescriptor(%{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda} : (i64, !fir.ref, i32) -> !fir.ref> // CHECK: %[[DESC:.*]] = fir.convert %[[DESC_RT_CALL]] : (!fir.ref>) -> !fir.ref>>> // CHECK: %[[DECL_DESC:.*]]:2 = hlfir.declare %[[DESC]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref>>>) -> !fir.ref> @@ -24,7 +24,7 @@ func.func @_QPsub1() { // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref>>>) -> !fir.ref> // CHECK: %{{.*}} = fir.call @_FortranAAllocatableDeallocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref>>>) -> !fir.ref> -// CHECK: fir.call @_FortranACUFFreeDescriptor(%[[BOX_NONE]], %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref, i32) -> () +// CHECK: fir.call @_FortranACUFFreeDescriptor(%[[BOX_NONE]], %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda} : (!fir.ref>, !fir.ref, i32) -> () fir.global @_QMmod1Ea {data_attr = #cuf.cuda} : !fir.box>> { %0 = fir.zero_bits !fir.heap> diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir index 415d0015918bb..b62c500f4a2d3 100644 --- a/flang/test/Fir/CUDA/cuda-data-transfer.fir +++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir @@ -329,7 +329,7 @@ func.func @_QPtest_array_type() { // CHECK-LABEL: func.func @_QPtest_array_type() // CHECK: %[[BYTES:.*]] = arith.muli %c10{{.*}}, %c12 : index // CHECK: %[[CONV_BYTES:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref, i32) -> !fir.llvm_ptr +// CHECK: fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda} : (i64, i32, !fir.ref, i32) -> !fir.llvm_ptr // CHECK: %[[BYTES:.*]] = arith.muli %c10{{.*}}, %c12{{.*}} : i64 // CHECK: fir.call @_FortranACUFDataTransferPtrPtr(%{{.*}}, %{{.*}}, %[[BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> ()