diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h index 9adbe6a497214..ecbc8ce6b525a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h @@ -14,6 +14,7 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/OpenACC/OpenACC.h" +#include "llvm/ADT/TypeSwitch.h" namespace clang { // Simple type-trait to see if the first template arg is one of the list, so we // can tell whether to `if-constexpr` a bunch of stuff. @@ -36,6 +37,72 @@ template constexpr bool isCombinedType = false; template constexpr bool isCombinedType> = true; +namespace { +struct DataOperandInfo { + mlir::Location beginLoc; + mlir::Value varValue; + llvm::StringRef name; +}; + +inline mlir::Value emitOpenACCIntExpr(CIRGen::CIRGenFunction &cgf, + CIRGen::CIRGenBuilderTy &builder, + const Expr *intExpr) { + mlir::Value expr = cgf.emitScalarExpr(intExpr); + mlir::Location exprLoc = cgf.cgm.getLoc(intExpr->getBeginLoc()); + + mlir::IntegerType targetType = mlir::IntegerType::get( + &cgf.getMLIRContext(), cgf.getContext().getIntWidth(intExpr->getType()), + intExpr->getType()->isSignedIntegerOrEnumerationType() + ? mlir::IntegerType::SignednessSemantics::Signed + : mlir::IntegerType::SignednessSemantics::Unsigned); + + auto conversionOp = builder.create( + exprLoc, targetType, expr); + return conversionOp.getResult(0); +} + +// A helper function that gets the information from an operand to a data +// clause, so that it can be used to emit the data operations. +inline DataOperandInfo getDataOperandInfo(CIRGen::CIRGenFunction &cgf, + CIRGen::CIRGenBuilderTy &builder, + OpenACCDirectiveKind dk, + const Expr *e) { + // TODO: OpenACC: Cache was different enough as to need a separate + // `ActOnCacheVar`, so we are going to need to do some investigations here + // when it comes to implement this for cache. + if (dk == OpenACCDirectiveKind::Cache) { + cgf.cgm.errorNYI(e->getSourceRange(), + "OpenACC data operand for 'cache' directive"); + return {cgf.cgm.getLoc(e->getBeginLoc()), {}, {}}; + } + + const Expr *curVarExpr = e->IgnoreParenImpCasts(); + + mlir::Location exprLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc()); + + // TODO: OpenACC: Assemble the list of bounds. + if (isa(curVarExpr)) { + cgf.cgm.errorNYI(curVarExpr->getSourceRange(), + "OpenACC data clause array subscript/section"); + return {exprLoc, {}, {}}; + } + + // TODO: OpenACC: if this is a member expr, emit the VarPtrPtr correctly. + if (isa(curVarExpr)) { + cgf.cgm.errorNYI(curVarExpr->getSourceRange(), + "OpenACC Data clause member expr"); + return {exprLoc, {}, {}}; + } + + // Sema has made sure that only 4 types of things can get here, array + // subscript, array section, member expr, or DRE to a var decl (or the former + // 3 wrapping a var-decl), so we should be able to assume this is right. + const auto *dre = cast(curVarExpr); + const auto *vd = cast(dre->getFoundDecl()->getCanonicalDecl()); + return {exprLoc, cgf.emitDeclRefLValue(dre).getPointer(), vd->getName()}; +} +} // namespace + template class OpenACCClauseCIREmitter final : public OpenACCClauseVisitor> { @@ -54,6 +121,11 @@ class OpenACCClauseCIREmitter final SourceLocation dirLoc; llvm::SmallVector lastDeviceTypeValues; + // Keep track of the async-clause so that we can shortcut updating the data + // operands async clauses. + bool hasAsyncClause = false; + // Keep track of the data operands so that we can update their async clauses. + llvm::SmallVector dataOperands; void setLastDeviceTypeClause(const OpenACCDeviceTypeClause &clause) { lastDeviceTypeValues.clear(); @@ -69,19 +141,8 @@ class OpenACCClauseCIREmitter final cgf.cgm.errorNYI(c.getSourceRange(), "OpenACC Clause", c.getClauseKind()); } - mlir::Value createIntExpr(const Expr *intExpr) { - mlir::Value expr = cgf.emitScalarExpr(intExpr); - mlir::Location exprLoc = cgf.cgm.getLoc(intExpr->getBeginLoc()); - - mlir::IntegerType targetType = mlir::IntegerType::get( - &cgf.getMLIRContext(), cgf.getContext().getIntWidth(intExpr->getType()), - intExpr->getType()->isSignedIntegerOrEnumerationType() - ? mlir::IntegerType::SignednessSemantics::Signed - : mlir::IntegerType::SignednessSemantics::Unsigned); - - auto conversionOp = builder.create( - exprLoc, targetType, expr); - return conversionOp.getResult(0); + mlir::Value emitOpenACCIntExpr(const Expr *intExpr) { + return clang::emitOpenACCIntExpr(cgf, builder, intExpr); } // 'condition' as an OpenACC grammar production is used for 'if' and (some @@ -157,6 +218,104 @@ class OpenACCClauseCIREmitter final computeEmitter.Visit(&c); } + template + void addDataOperand(const Expr *varOperand, mlir::acc::DataClause dataClause, + bool structured, bool implicit) { + DataOperandInfo opInfo = + getDataOperandInfo(cgf, builder, dirKind, varOperand); + mlir::ValueRange bounds; + + // TODO: OpenACC: we should comprehend the 'modifier-list' here for the data + // operand. At the moment, we don't have a uniform way to assign these + // properly, and the dialect cannot represent anything other than 'readonly' + // and 'zero' on copyin/copyout/create, so for now, we skip it. + + auto beforeOp = + builder.create(opInfo.beginLoc, opInfo.varValue, structured, + implicit, opInfo.name, bounds); + operation.getDataClauseOperandsMutable().append(beforeOp.getResult()); + + AfterOpTy afterOp; + { + mlir::OpBuilder::InsertionGuard guardCase(builder); + builder.setInsertionPointAfter(operation); + afterOp = builder.create(opInfo.beginLoc, beforeOp.getResult(), + opInfo.varValue, structured, implicit, + opInfo.name, bounds); + } + + // Set the 'rest' of the info for both operations. + beforeOp.setDataClause(dataClause); + afterOp.setDataClause(dataClause); + + // Make sure we record these, so 'async' values can be updated later. + dataOperands.push_back(beforeOp.getOperation()); + dataOperands.push_back(afterOp.getOperation()); + } + + // Helper function that covers for the fact that we don't have this function + // on all operation types. + mlir::ArrayAttr getAsyncOnlyAttr() { + if constexpr (isOneOfTypes) + return operation.getAsyncOnlyAttr(); + + // Note: 'wait' has async as well, but it cannot have data clauses, so we + // don't have to handle them here. + + llvm_unreachable("getting asyncOnly when clause not valid on operation?"); + } + + // Helper function that covers for the fact that we don't have this function + // on all operation types. + mlir::ArrayAttr getAsyncOperandsDeviceTypeAttr() { + if constexpr (isOneOfTypes) + return operation.getAsyncOperandsDeviceTypeAttr(); + + // Note: 'wait' has async as well, but it cannot have data clauses, so we + // don't have to handle them here. + + llvm_unreachable( + "getting asyncOperandsDeviceType when clause not valid on operation?"); + } + + // Helper function that covers for the fact that we don't have this function + // on all operation types. + mlir::OperandRange getAsyncOperands() { + if constexpr (isOneOfTypes) + return operation.getAsyncOperands(); + + // Note: 'wait' has async as well, but it cannot have data clauses, so we + // don't have to handle them here. + + llvm_unreachable( + "getting asyncOperandsDeviceType when clause not valid on operation?"); + } + + // The 'data' clauses all require that we add the 'async' values from the + // operation to them. We've collected the data operands along the way, so use + // that list to get the current 'async' values. + void updateDataOperandAsyncValues() { + if (!hasAsyncClause || dataOperands.empty()) + return; + + // TODO: OpenACC: Handle this correctly for combined constructs. + + for (mlir::Operation *dataOp : dataOperands) { + llvm::TypeSwitch(dataOp) + .Case([&](auto op) { + op.setAsyncOnlyAttr(getAsyncOnlyAttr()); + op.setAsyncOperandsDeviceTypeAttr(getAsyncOperandsDeviceTypeAttr()); + op.getAsyncOperandsMutable().assign(getAsyncOperands()); + }) + .Default([&](mlir::Operation *) { + llvm_unreachable("Not a data operation?"); + }); + } + } + public: OpenACCClauseCIREmitter(OpTy &operation, CIRGen::CIRGenFunction &cgf, CIRGen::CIRGenBuilderTy &builder, @@ -168,6 +327,14 @@ class OpenACCClauseCIREmitter final clauseNotImplemented(clause); } + // The entry point for the CIR emitter. All users should use this rather than + // 'visitClauseList', as this also handles the things that have to happen + // 'after' the clauses are all visited. + void emitClauses(ArrayRef clauses) { + this->VisitClauseList(clauses); + updateDataOperandAsyncValues(); + } + void VisitDefaultClause(const OpenACCDefaultClause &clause) { // This type-trait checks if 'op'(the first arg) is one of the mlir::acc // operations listed in the rest of the arguments. @@ -227,7 +394,7 @@ class OpenACCClauseCIREmitter final if constexpr (isOneOfTypes) { operation.addNumWorkersOperand(builder.getContext(), - createIntExpr(clause.getIntExpr()), + emitOpenACCIntExpr(clause.getIntExpr()), lastDeviceTypeValues); } else if constexpr (isCombinedType) { applyToComputeOp(clause); @@ -240,7 +407,7 @@ class OpenACCClauseCIREmitter final if constexpr (isOneOfTypes) { operation.addVectorLengthOperand(builder.getContext(), - createIntExpr(clause.getIntExpr()), + emitOpenACCIntExpr(clause.getIntExpr()), lastDeviceTypeValues); } else if constexpr (isCombinedType) { applyToComputeOp(clause); @@ -250,14 +417,26 @@ class OpenACCClauseCIREmitter final } void VisitAsyncClause(const OpenACCAsyncClause &clause) { + hasAsyncClause = true; if constexpr (isOneOfTypes) { if (!clause.hasIntExpr()) operation.addAsyncOnly(builder.getContext(), lastDeviceTypeValues); - else - operation.addAsyncOperand(builder.getContext(), - createIntExpr(clause.getIntExpr()), + else { + + mlir::Value intExpr; + { + // Async int exprs can be referenced by the data operands, which means + // that the int-exprs have to appear before them. IF there is a data + // operand already, set the insertion point to 'before' it. + mlir::OpBuilder::InsertionGuard guardCase(builder); + if (!dataOperands.empty()) + builder.setInsertionPoint(dataOperands.front()); + intExpr = emitOpenACCIntExpr(clause.getIntExpr()); + } + operation.addAsyncOperand(builder.getContext(), intExpr, lastDeviceTypeValues); + } } else if constexpr (isOneOfTypes) { // Wait doesn't have a device_type, so its handling here is slightly // different. @@ -265,7 +444,7 @@ class OpenACCClauseCIREmitter final operation.setAsync(true); else operation.getAsyncOperandMutable().append( - createIntExpr(clause.getIntExpr())); + emitOpenACCIntExpr(clause.getIntExpr())); } else if constexpr (isCombinedType) { applyToComputeOp(clause); } else { @@ -321,7 +500,7 @@ class OpenACCClauseCIREmitter final if constexpr (isOneOfTypes) { operation.getDeviceNumMutable().append( - createIntExpr(clause.getIntExpr())); + emitOpenACCIntExpr(clause.getIntExpr())); } else { llvm_unreachable( "init, shutdown, set, are only valid device_num constructs"); @@ -333,7 +512,7 @@ class OpenACCClauseCIREmitter final mlir::acc::KernelsOp>) { llvm::SmallVector values; for (const Expr *E : clause.getIntExprs()) - values.push_back(createIntExpr(E)); + values.push_back(emitOpenACCIntExpr(E)); operation.addNumGangsOperands(builder.getContext(), values, lastDeviceTypeValues); @@ -352,9 +531,9 @@ class OpenACCClauseCIREmitter final } else { llvm::SmallVector values; if (clause.hasDevNumExpr()) - values.push_back(createIntExpr(clause.getDevNumExpr())); + values.push_back(emitOpenACCIntExpr(clause.getDevNumExpr())); for (const Expr *E : clause.getQueueIdExprs()) - values.push_back(createIntExpr(E)); + values.push_back(emitOpenACCIntExpr(E)); operation.addWaitOperands(builder.getContext(), clause.hasDevNumExpr(), values, lastDeviceTypeValues); } @@ -370,7 +549,7 @@ class OpenACCClauseCIREmitter final void VisitDefaultAsyncClause(const OpenACCDefaultAsyncClause &clause) { if constexpr (isOneOfTypes) { operation.getDefaultAsyncMutable().append( - createIntExpr(clause.getIntExpr())); + emitOpenACCIntExpr(clause.getIntExpr())); } else { llvm_unreachable("set, is only valid device_num constructs"); } @@ -460,7 +639,7 @@ class OpenACCClauseCIREmitter final if constexpr (isOneOfTypes) { if (clause.hasIntExpr()) operation.addWorkerNumOperand(builder.getContext(), - createIntExpr(clause.getIntExpr()), + emitOpenACCIntExpr(clause.getIntExpr()), lastDeviceTypeValues); else operation.addEmptyWorker(builder.getContext(), lastDeviceTypeValues); @@ -478,7 +657,7 @@ class OpenACCClauseCIREmitter final if constexpr (isOneOfTypes) { if (clause.hasIntExpr()) operation.addVectorOperand(builder.getContext(), - createIntExpr(clause.getIntExpr()), + emitOpenACCIntExpr(clause.getIntExpr()), lastDeviceTypeValues); else operation.addEmptyVector(builder.getContext(), lastDeviceTypeValues); @@ -514,7 +693,7 @@ class OpenACCClauseCIREmitter final } else if (isa(expr)) { values.push_back(createConstantInt(exprLoc, 64, -1)); } else { - values.push_back(createIntExpr(expr)); + values.push_back(emitOpenACCIntExpr(expr)); } } @@ -527,6 +706,20 @@ class OpenACCClauseCIREmitter final llvm_unreachable("Unknown construct kind in VisitGangClause"); } } + + void VisitCopyClause(const OpenACCCopyClause &clause) { + if constexpr (isOneOfTypes) { + for (auto var : clause.getVarList()) + addDataOperand( + var, mlir::acc::DataClause::acc_copy, /*structured=*/true, + /*implicit=*/false); + } else { + // TODO: When we've implemented this for everything, switch this to an + // unreachable. data, declare, combined constructs remain. + return clauseNotImplemented(clause); + } + } }; template diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp index fc76f57ce7c29..3c18f5d9e205c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp @@ -39,8 +39,7 @@ mlir::LogicalResult CIRGenFunction::emitOpenACCOpAssociatedStmt( // Sets insertion point before the 'op', since every new expression needs to // be before the operation. builder.setInsertionPoint(op); - makeClauseEmitter(op, *this, builder, dirKind, dirLoc) - .VisitClauseList(clauses); + makeClauseEmitter(op, *this, builder, dirKind, dirLoc).emitClauses(clauses); } { @@ -115,7 +114,7 @@ mlir::LogicalResult CIRGenFunction::emitOpenACCOpCombinedConstruct( // We don't bother setting the insertion point, since the clause emitter // is going to have to do this correctly. makeClauseEmitter(inf, *this, builder, dirKind, dirLoc) - .VisitClauseList(clauses); + .emitClauses(clauses); } builder.create(end); @@ -137,8 +136,7 @@ Op CIRGenFunction::emitOpenACCOp( // Sets insertion point before the 'op', since every new expression needs to // be before the operation. builder.setInsertionPoint(op); - makeClauseEmitter(op, *this, builder, dirKind, dirLoc) - .VisitClauseList(clauses); + makeClauseEmitter(op, *this, builder, dirKind, dirLoc).emitClauses(clauses); } return op; } diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp index 2b78bc1a6d4a5..8a868fdc96350 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp @@ -96,7 +96,7 @@ CIRGenFunction::emitOpenACCLoopConstruct(const OpenACCLoopConstruct &s) { builder.setInsertionPoint(op); makeClauseEmitter(op, *this, builder, s.getDirectiveKind(), s.getDirectiveLoc()) - .VisitClauseList(s.clauses()); + .emitClauses(s.clauses()); } mlir::LogicalResult stmtRes = mlir::success(); diff --git a/clang/test/CIR/CodeGenOpenACC/compute-copy.c b/clang/test/CIR/CodeGenOpenACC/compute-copy.c new file mode 100644 index 0000000000000..a542409f07152 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/compute-copy.c @@ -0,0 +1,213 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +int global; +void acc_compute(int parmVar) { + // CHECK: cir.func @acc_compute(%[[ARG:.*]]: !s32i{{.*}}) { + // CHECK-NEXT: %[[PARM:.*]] = cir.alloca !s32i, !cir.ptr, ["parmVar", init] + int localVar1; + short localVar2; + float localVar3; + // CHECK-NEXT: %[[LOCAL1:.*]] = cir.alloca !s32i, !cir.ptr, ["localVar1"] + // CHECK-NEXT: %[[LOCAL2:.*]] = cir.alloca !s16i, !cir.ptr, ["localVar2"] + // CHECK-NEXT: %[[LOCAL3:.*]] = cir.alloca !cir.float, !cir.ptr, ["localVar3"] + // CHECK-NEXT: %[[LOCALPTR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["localPointer"] + // CHECK-NEXT: %[[LOCALARRAY:.*]] = cir.alloca !cir.array, !cir.ptr>, ["localArray"] + // CHECK-NEXT: cir.store %[[ARG]], %[[PARM]] : !s32i, !cir.ptr + +#pragma acc parallel copy(localVar1, parmVar) copy(localVar2) copy(localVar3, parmVar) + ; + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "parmVar"} loc + // CHECK-NEXT: %[[COPYIN3:.*]] = acc.copyin varPtr(%[[LOCAL2]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "localVar2"} loc + // CHECK-NEXT: %[[COPYIN4:.*]] = acc.copyin varPtr(%[[LOCAL3]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "localVar3"} loc + // CHECK-NEXT: %[[COPYIN5:.*]] = acc.copyin varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "parmVar"} loc + // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]], %[[COPYIN2]], %[[COPYIN3]], %[[COPYIN4]], %[[COPYIN5]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN5]] : !cir.ptr) to varPtr(%[[PARM]] : !cir.ptr) {dataClause = #acc, name = "parmVar"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN4]] : !cir.ptr) to varPtr(%[[LOCAL3]] : !cir.ptr) {dataClause = #acc, name = "localVar3"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN3]] : !cir.ptr) to varPtr(%[[LOCAL2]] : !cir.ptr) {dataClause = #acc, name = "localVar2"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN2]] : !cir.ptr) to varPtr(%[[PARM]] : !cir.ptr) {dataClause = #acc, name = "parmVar"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc + +#pragma acc serial copy(localVar1, parmVar) copy(localVar2) copy(localVar3, parmVar) + ; + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "parmVar"} loc + // CHECK-NEXT: %[[COPYIN3:.*]] = acc.copyin varPtr(%[[LOCAL2]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "localVar2"} loc + // CHECK-NEXT: %[[COPYIN4:.*]] = acc.copyin varPtr(%[[LOCAL3]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "localVar3"} loc + // CHECK-NEXT: %[[COPYIN5:.*]] = acc.copyin varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "parmVar"} loc + // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]], %[[COPYIN2]], %[[COPYIN3]], %[[COPYIN4]], %[[COPYIN5]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN5]] : !cir.ptr) to varPtr(%[[PARM]] : !cir.ptr) {dataClause = #acc, name = "parmVar"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN4]] : !cir.ptr) to varPtr(%[[LOCAL3]] : !cir.ptr) {dataClause = #acc, name = "localVar3"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN3]] : !cir.ptr) to varPtr(%[[LOCAL2]] : !cir.ptr) {dataClause = #acc, name = "localVar2"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN2]] : !cir.ptr) to varPtr(%[[PARM]] : !cir.ptr) {dataClause = #acc, name = "parmVar"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc + +#pragma acc kernels copy(localVar1, parmVar) copy(localVar2) copy(localVar3, parmVar) + ; + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "parmVar"} loc + // CHECK-NEXT: %[[COPYIN3:.*]] = acc.copyin varPtr(%[[LOCAL2]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "localVar2"} loc + // CHECK-NEXT: %[[COPYIN4:.*]] = acc.copyin varPtr(%[[LOCAL3]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "localVar3"} loc + // CHECK-NEXT: %[[COPYIN5:.*]] = acc.copyin varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "parmVar"} loc + // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]], %[[COPYIN2]], %[[COPYIN3]], %[[COPYIN4]], %[[COPYIN5]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN5]] : !cir.ptr) to varPtr(%[[PARM]] : !cir.ptr) {dataClause = #acc, name = "parmVar"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN4]] : !cir.ptr) to varPtr(%[[LOCAL3]] : !cir.ptr) {dataClause = #acc, name = "localVar3"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN3]] : !cir.ptr) to varPtr(%[[LOCAL2]] : !cir.ptr) {dataClause = #acc, name = "localVar2"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN2]] : !cir.ptr) to varPtr(%[[PARM]] : !cir.ptr) {dataClause = #acc, name = "parmVar"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc + + // TODO: OpenACC: Represent alwaysin/alwaysout/always correctly. For now, + // these do nothing to the IR. +#pragma acc parallel copy(alwaysin: localVar1) copy(alwaysout: localVar2) copy(always: localVar3) + ; + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[LOCAL2]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "localVar2"} loc + // CHECK-NEXT: %[[COPYIN3:.*]] = acc.copyin varPtr(%[[LOCAL3]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "localVar3"} loc + // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]], %[[COPYIN2]], %[[COPYIN3]] : !cir.ptr, !cir.ptr, !cir.ptr) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN3]] : !cir.ptr) to varPtr(%[[LOCAL3]] : !cir.ptr) {dataClause = #acc, name = "localVar3"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN2]] : !cir.ptr) to varPtr(%[[LOCAL2]] : !cir.ptr) {dataClause = #acc, name = "localVar2"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc + +#pragma acc serial copy(always, alwaysin, alwaysout: localVar1) + ; + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc + + short *localPointer; + float localArray[5]; + +#pragma acc kernels copy(localArray, localPointer, global) + ; + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr>) -> !cir.ptr> {dataClause = #acc, name = "localArray"} loc + // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr>) -> !cir.ptr> {dataClause = #acc, name = "localPointer"} loc + // CHECK-NEXT: %[[GLOBAL_REF:.*]] = cir.get_global @global : !cir.ptr + // CHECK-NEXT: %[[COPYIN3:.*]] = acc.copyin varPtr(%[[GLOBAL_REF]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "global"} loc + // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]], %[[COPYIN2]], %[[COPYIN3]] : !cir.ptr>, !cir.ptr>, !cir.ptr) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN3]] : !cir.ptr) to varPtr(%[[GLOBAL_REF]] : !cir.ptr) {dataClause = #acc, name = "global"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN2]] : !cir.ptr>) to varPtr(%[[LOCALPTR]] : !cir.ptr>) {dataClause = #acc, name = "localPointer"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr>) to varPtr(%[[LOCALARRAY]] : !cir.ptr>) {dataClause = #acc, name = "localArray"} loc + +#pragma acc parallel copy(localVar1) async + ; + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr) async { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) async to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc + +#pragma acc serial async copy(localVar1, localVar2) + ; + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[LOCAL2]] : !cir.ptr) async -> !cir.ptr {dataClause = #acc, name = "localVar2"} loc + // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]], %[[COPYIN2]] : !cir.ptr, !cir.ptr) async { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN2]] : !cir.ptr) async to varPtr(%[[LOCAL2]] : !cir.ptr) {dataClause = #acc, name = "localVar2"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) async to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc + +#pragma acc kernels copy(localVar1, localVar2) async(1) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async(%[[ONE_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[LOCAL2]] : !cir.ptr) async(%[[ONE_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "localVar2"} loc + // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]], %[[COPYIN2]] : !cir.ptr, !cir.ptr) async(%[[ONE_CAST]] : si32) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN2]] : !cir.ptr) async(%[[ONE_CAST]] : si32) to varPtr(%[[LOCAL2]] : !cir.ptr) {dataClause = #acc, name = "localVar2"} loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) async(%[[ONE_CAST]] : si32) to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc + +#pragma acc parallel async(1) copy(localVar1) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async(%[[ONE_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr) async(%[[ONE_CAST]] : si32) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) async(%[[ONE_CAST]] : si32) to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc + +#pragma acc serial copy(localVar1) device_type(nvidia, radeon) async + ; + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async([#acc.device_type, #acc.device_type]) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr) async([#acc.device_type, #acc.device_type]) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) async([#acc.device_type, #acc.device_type]) to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc + +#pragma acc kernels copy(localVar1) device_type(nvidia, radeon) async(1) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async(%[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr) async(%[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) async(%[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc + +#pragma acc parallel copy(localVar1) async device_type(nvidia, radeon) async + ; + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async([#acc.device_type, #acc.device_type, #acc.device_type]) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr) async([#acc.device_type, #acc.device_type, #acc.device_type]) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) async([#acc.device_type, #acc.device_type, #acc.device_type]) to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc + +#pragma acc serial copy(localVar1) async(0) device_type(nvidia, radeon) async(1) + ; + // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i + // CHECK-NEXT: %[[ZERO_CAST:.*]] = builtin.unrealized_conversion_cast %[[ZERO]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc + +#pragma acc kernels copy(localVar1) async device_type(nvidia, radeon) async(1) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async([#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr) async([#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) { + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) async([#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc + +#pragma acc parallel copy(localVar1) async(1) device_type(nvidia, radeon) async + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async([#acc.device_type, #acc.device_type], %[[ONE_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + + // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr) async([#acc.device_type, #acc.device_type], %[[ONE_CAST]] : si32) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) async([#acc.device_type, #acc.device_type], %[[ONE_CAST]] : si32) to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc + +#pragma acc serial copy(localVar1) async(0) device_type(nvidia, radeon) async(1) + ; + // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i + // CHECK-NEXT: %[[ZERO_CAST:.*]] = builtin.unrealized_conversion_cast %[[ZERO]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc + // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) to varPtr(%[[LOCAL1]] : !cir.ptr) {dataClause = #acc, name = "localVar1"} loc +}