Skip to content

Commit f1b075d

Browse files
authored
[flang][cuda] Pass the pinned variable in allocate calls (llvm#125310)
1 parent 2c030a1 commit f1b075d

File tree

7 files changed

+93
-55
lines changed

7 files changed

+93
-55
lines changed

flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,13 @@ constexpr TypeBuilderFunc getModel<bool &>() {
401401
};
402402
}
403403
template <>
404+
constexpr TypeBuilderFunc getModel<bool *>() {
405+
return [](mlir::MLIRContext *context) -> mlir::Type {
406+
TypeBuilderFunc f{getModel<bool>()};
407+
return fir::ReferenceType::get(f(context));
408+
};
409+
}
410+
template <>
404411
constexpr TypeBuilderFunc getModel<unsigned short>() {
405412
return [](mlir::MLIRContext *context) -> mlir::Type {
406413
return mlir::IntegerType::get(

flang/include/flang/Runtime/CUDA/allocatable.h

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,30 @@ extern "C" {
1818

1919
/// Perform allocation of the descriptor.
2020
int RTDECL(CUFAllocatableAllocate)(Descriptor &, int64_t stream = -1,
21-
bool hasStat = false, const Descriptor *errMsg = nullptr,
22-
const char *sourceFile = nullptr, int sourceLine = 0);
21+
bool *pinned = nullptr, bool hasStat = false,
22+
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
23+
int sourceLine = 0);
2324

2425
/// Perform allocation of the descriptor with synchronization of it when
2526
/// necessary.
2627
int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int64_t stream = -1,
27-
bool hasStat = false, const Descriptor *errMsg = nullptr,
28-
const char *sourceFile = nullptr, int sourceLine = 0);
28+
bool *pinned = nullptr, bool hasStat = false,
29+
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
30+
int sourceLine = 0);
2931

3032
/// Perform allocation of the descriptor without synchronization. Assign data
3133
/// from source.
3234
int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
33-
const Descriptor &source, int64_t stream = -1, bool hasStat = false,
34-
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
35-
int sourceLine = 0);
35+
const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
36+
bool hasStat = false, const Descriptor *errMsg = nullptr,
37+
const char *sourceFile = nullptr, int sourceLine = 0);
3638

3739
/// Perform allocation of the descriptor with synchronization of it when
3840
/// necessary. Assign data from source.
3941
int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
40-
const Descriptor &source, int64_t stream = -1, bool hasStat = false,
41-
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
42-
int sourceLine = 0);
42+
const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
43+
bool hasStat = false, const Descriptor *errMsg = nullptr,
44+
const char *sourceFile = nullptr, int sourceLine = 0);
4345

4446
/// Perform deallocation of the descriptor with synchronization of it when
4547
/// necessary.

flang/include/flang/Runtime/CUDA/pointer.h

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,30 @@ extern "C" {
1818

1919
/// Perform allocation of the descriptor.
2020
int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t stream = -1,
21-
bool hasStat = false, const Descriptor *errMsg = nullptr,
22-
const char *sourceFile = nullptr, int sourceLine = 0);
21+
bool *pinned = nullptr, bool hasStat = false,
22+
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
23+
int sourceLine = 0);
2324

2425
/// Perform allocation of the descriptor with synchronization of it when
2526
/// necessary.
2627
int RTDECL(CUFPointerAllocateSync)(Descriptor &, int64_t stream = -1,
27-
bool hasStat = false, const Descriptor *errMsg = nullptr,
28-
const char *sourceFile = nullptr, int sourceLine = 0);
28+
bool *pinned = nullptr, bool hasStat = false,
29+
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
30+
int sourceLine = 0);
2931

3032
/// Perform allocation of the descriptor without synchronization. Assign data
3133
/// from source.
3234
int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
33-
const Descriptor &source, int64_t stream = -1, bool hasStat = false,
34-
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
35-
int sourceLine = 0);
35+
const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
36+
bool hasStat = false, const Descriptor *errMsg = nullptr,
37+
const char *sourceFile = nullptr, int sourceLine = 0);
3638

3739
/// Perform allocation of the descriptor with synchronization of it when
3840
/// necessary. Assign data from source.
3941
int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
40-
const Descriptor &source, int64_t stream = -1, bool hasStat = false,
41-
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
42-
int sourceLine = 0);
42+
const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
43+
bool hasStat = false, const Descriptor *errMsg = nullptr,
44+
const char *sourceFile = nullptr, int sourceLine = 0);
4345

4446
} // extern "C"
4547

flang/lib/Optimizer/Transforms/CUFOpConversion.cpp

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ static mlir::LogicalResult convertOpToCall(OpTy op,
103103
mlir::Value sourceLine;
104104
if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>)
105105
sourceLine = fir::factory::locationToLineNo(
106-
builder, loc, op.getSource() ? fTy.getInput(6) : fTy.getInput(5));
106+
builder, loc, op.getSource() ? fTy.getInput(7) : fTy.getInput(6));
107107
else
108108
sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(4));
109109

@@ -119,22 +119,28 @@ static mlir::LogicalResult convertOpToCall(OpTy op,
119119
}
120120
llvm::SmallVector<mlir::Value> args;
121121
if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>) {
122+
mlir::Value pinned =
123+
op.getPinned()
124+
? op.getPinned()
125+
: builder.createNullConstant(
126+
loc, fir::ReferenceType::get(
127+
mlir::IntegerType::get(op.getContext(), 1)));
122128
if (op.getSource()) {
123129
mlir::Value stream =
124130
op.getStream()
125131
? op.getStream()
126132
: builder.createIntegerConstant(loc, fTy.getInput(2), -1);
127-
args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(),
128-
op.getSource(), stream, hasStat,
129-
errmsg, sourceFile, sourceLine);
133+
args = fir::runtime::createArguments(
134+
builder, loc, fTy, op.getBox(), op.getSource(), stream, pinned,
135+
hasStat, errmsg, sourceFile, sourceLine);
130136
} else {
131137
mlir::Value stream =
132138
op.getStream()
133139
? op.getStream()
134140
: builder.createIntegerConstant(loc, fTy.getInput(1), -1);
135141
args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(),
136-
stream, hasStat, errmsg, sourceFile,
137-
sourceLine);
142+
stream, pinned, hasStat, errmsg,
143+
sourceFile, sourceLine);
138144
}
139145
} else {
140146
args =
@@ -153,11 +159,6 @@ struct CUFAllocateOpConversion
153159
mlir::LogicalResult
154160
matchAndRewrite(cuf::AllocateOp op,
155161
mlir::PatternRewriter &rewriter) const override {
156-
// TODO: Pinned is a reference to a logical value that can be set to true
157-
// when pinned allocation succeed. This will require a new entry point.
158-
if (op.getPinned())
159-
return mlir::failure();
160-
161162
auto mod = op->getParentOfType<mlir::ModuleOp>();
162163
fir::FirOpBuilder builder(rewriter, mod);
163164
mlir::Location loc = op.getLoc();

flang/runtime/CUDA/allocatable.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ extern "C" {
2323
RT_EXT_API_GROUP_BEGIN
2424

2525
int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
26-
bool hasStat, const Descriptor *errMsg, const char *sourceFile,
27-
int sourceLine) {
26+
bool *pinned, bool hasStat, const Descriptor *errMsg,
27+
const char *sourceFile, int sourceLine) {
2828
int stat{RTNAME(CUFAllocatableAllocate)(
29-
desc, stream, hasStat, errMsg, sourceFile, sourceLine)};
29+
desc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
3030
#ifndef RT_DEVICE_COMPILATION
3131
// Descriptor synchronization is only done when the allocation is done
3232
// from the host.
@@ -41,8 +41,8 @@ int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
4141
}
4242

4343
int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
44-
bool hasStat, const Descriptor *errMsg, const char *sourceFile,
45-
int sourceLine) {
44+
bool *pinned, bool hasStat, const Descriptor *errMsg,
45+
const char *sourceFile, int sourceLine) {
4646
if (desc.HasAddendum()) {
4747
Terminator terminator{sourceFile, sourceLine};
4848
// TODO: This require a bit more work to set the correct type descriptor
@@ -53,14 +53,19 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
5353
// Perform the standard allocation.
5454
int stat{RTNAME(AllocatableAllocate)(
5555
desc, hasStat, errMsg, sourceFile, sourceLine)};
56+
if (pinned) {
57+
// Set pinned according to stat. More infrastructre is needed to set it
58+
// closer to the actual allocation call.
59+
*pinned = (stat == StatOk);
60+
}
5661
return stat;
5762
}
5863

5964
int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
60-
const Descriptor &source, int64_t stream, bool hasStat,
65+
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
6166
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
6267
int stat{RTNAME(CUFAllocatableAllocate)(
63-
alloc, stream, hasStat, errMsg, sourceFile, sourceLine)};
68+
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
6469
if (stat == StatOk) {
6570
Terminator terminator{sourceFile, sourceLine};
6671
Fortran::runtime::DoFromSourceAssign(
@@ -70,10 +75,10 @@ int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
7075
}
7176

7277
int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
73-
const Descriptor &source, int64_t stream, bool hasStat,
78+
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
7479
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
7580
int stat{RTNAME(CUFAllocatableAllocateSync)(
76-
alloc, stream, hasStat, errMsg, sourceFile, sourceLine)};
81+
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
7782
if (stat == StatOk) {
7883
Terminator terminator{sourceFile, sourceLine};
7984
Fortran::runtime::DoFromSourceAssign(

flang/runtime/CUDA/pointer.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@ namespace Fortran::runtime::cuda {
2121
extern "C" {
2222
RT_EXT_API_GROUP_BEGIN
2323

24-
int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool hasStat,
25-
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
24+
int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool *pinned,
25+
bool hasStat, const Descriptor *errMsg, const char *sourceFile,
26+
int sourceLine) {
2627
if (desc.HasAddendum()) {
2728
Terminator terminator{sourceFile, sourceLine};
2829
// TODO: This require a bit more work to set the correct type descriptor
@@ -33,14 +34,19 @@ int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool hasStat,
3334
// Perform the standard allocation.
3435
int stat{
3536
RTNAME(PointerAllocate)(desc, hasStat, errMsg, sourceFile, sourceLine)};
37+
if (pinned) {
38+
// Set pinned according to stat. More infrastructre is needed to set it
39+
// closer to the actual allocation call.
40+
*pinned = (stat == StatOk);
41+
}
3642
return stat;
3743
}
3844

3945
int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t stream,
40-
bool hasStat, const Descriptor *errMsg, const char *sourceFile,
41-
int sourceLine) {
46+
bool *pinned, bool hasStat, const Descriptor *errMsg,
47+
const char *sourceFile, int sourceLine) {
4248
int stat{RTNAME(CUFPointerAllocate)(
43-
desc, stream, hasStat, errMsg, sourceFile, sourceLine)};
49+
desc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
4450
#ifndef RT_DEVICE_COMPILATION
4551
// Descriptor synchronization is only done when the allocation is done
4652
// from the host.
@@ -55,10 +61,10 @@ int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t stream,
5561
}
5662

5763
int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
58-
const Descriptor &source, int64_t stream, bool hasStat,
64+
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
5965
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
6066
int stat{RTNAME(CUFPointerAllocate)(
61-
pointer, stream, hasStat, errMsg, sourceFile, sourceLine)};
67+
pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
6268
if (stat == StatOk) {
6369
Terminator terminator{sourceFile, sourceLine};
6470
Fortran::runtime::DoFromSourceAssign(
@@ -68,10 +74,10 @@ int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
6874
}
6975

7076
int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
71-
const Descriptor &source, int64_t stream, bool hasStat,
77+
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
7278
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
7379
int stat{RTNAME(CUFPointerAllocateSync)(
74-
pointer, stream, hasStat, errMsg, sourceFile, sourceLine)};
80+
pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
7581
if (stat == StatOk) {
7682
Terminator terminator{sourceFile, sourceLine};
7783
Fortran::runtime::DoFromSourceAssign(

flang/test/Fir/CUDA/cuda-allocate.fir

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ func.func @_QPsub1() {
1919
// CHECK: %[[DESC:.*]] = fir.convert %[[DESC_RT_CALL]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
2020
// CHECK: %[[DECL_DESC:.*]]:2 = hlfir.declare %[[DESC]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
2121
// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
22-
// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
22+
// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
2323

2424
// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
2525
// CHECK: %{{.*}} = fir.call @_FortranAAllocatableDeallocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
@@ -47,7 +47,7 @@ func.func @_QPsub3() {
4747
// CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_ADDR]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmod1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
4848

4949
// CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
50-
// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
50+
// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
5151

5252
// CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
5353
// CHECK: fir.call @_FortranACUFAllocatableDeallocate(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
@@ -87,7 +87,7 @@ func.func @_QPsub5() {
8787
}
8888

8989
// CHECK-LABEL: func.func @_QPsub5()
90-
// CHECK: fir.call @_FortranACUFAllocatableAllocate({{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
90+
// CHECK: fir.call @_FortranACUFAllocatableAllocate({{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
9191
// CHECK: fir.call @_FortranAAllocatableDeallocate({{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
9292

9393

@@ -118,7 +118,7 @@ func.func @_QQsub6() attributes {fir.bindc_name = "test"} {
118118
// CHECK: %[[B:.*]]:2 = hlfir.declare %[[B_ADDR]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMdataEb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
119119
// CHECK: _FortranAAllocatableSetBounds
120120
// CHECK: %[[B_BOX:.*]] = fir.convert %[[B]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
121-
// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
121+
// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
122122

123123

124124
func.func @_QPallocate_source() {
@@ -142,7 +142,7 @@ func.func @_QPallocate_source() {
142142
// CHECK: %[[SOURCE:.*]] = fir.load %[[DECL_HOST]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
143143
// CHECK: %[[DEV_CONV:.*]] = fir.convert %[[DECL_DEV]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>
144144
// CHECK: %[[SOURCE_CONV:.*]] = fir.convert %[[SOURCE]] : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>) -> !fir.box<none>
145-
// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
145+
// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
146146

147147

148148
fir.global @_QMmod1Ea_d {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?x?xf32>>> {
@@ -179,7 +179,7 @@ func.func @_QQallocate_stream() {
179179
// CHECK: %[[STREAM_ALLOCA:.*]] = fir.alloca i64 {bindc_name = "stream1", uniq_name = "_QFEstream1"}
180180
// CHECK: %[[STREAM:.*]] = fir.declare %[[STREAM_ALLOCA]] {uniq_name = "_QFEstream1"} : (!fir.ref<i64>) -> !fir.ref<i64>
181181
// CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]] : !fir.ref<i64>
182-
// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
182+
// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
183183

184184

185185
func.func @_QPp_alloc() {
@@ -255,4 +255,19 @@ func.func @_QMmod1Ppointer_source_global() {
255255
// CHECK-LABEL: func.func @_QMmod1Ppointer_source_global()
256256
// CHECK: fir.call @_FortranACUFPointerAllocateSourceSync
257257

258+
func.func @_QQpinned() attributes {fir.bindc_name = "testasync"} {
259+
%0 = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFEa"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
260+
%4 = fir.declare %0 {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
261+
%13 = fir.alloca !fir.logical<4> {bindc_name = "pinnedflag", uniq_name = "_QFEpinnedflag"}
262+
%14 = fir.declare %13 {uniq_name = "_QFEpinnedflag"} : (!fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>>
263+
%18 = cuf.allocate %4 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> pinned(%14 : !fir.ref<!fir.logical<4>>) {data_attr = #cuf.cuda<pinned>, hasStat} -> i32
264+
return
265+
}
266+
267+
// CHECK-LABEL: func.func @_QQpinned() attributes {fir.bindc_name = "testasync"} {
268+
// CHECK: %[[PINNED:.*]] = fir.alloca !fir.logical<4> {bindc_name = "pinnedflag", uniq_name = "_QFEpinnedflag"}
269+
// CHECK: %[[DECL_PINNED:.*]] = fir.declare %[[PINNED]] {uniq_name = "_QFEpinnedflag"} : (!fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>>
270+
// CHECK: %[[CONV_PINNED:.*]] = fir.convert %[[DECL_PINNED]] : (!fir.ref<!fir.logical<4>>) -> !fir.ref<i1>
271+
// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %{{.*}}, %[[CONV_PINNED]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
272+
258273
} // end of module

0 commit comments

Comments
 (0)