diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index e79cf75bbce75..635dad8b0e119 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -361,5 +361,7 @@ considered for standardization. Please post on the | device extension | `'ompx_bare' clause on 'target teams' construct | :good:`prototyped` | #66844, #70612 | | | `_ | | | +------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ +| device extension | Multi-dim `'num_teams' clause on 'target teams ompx_bare' construct | :good:`partial` | #99732, #101407 | ++------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+ .. _Discourse forums (Runtimes - OpenMP category): https://discourse.llvm.org/c/runtimes/openmp/35 diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 25f5bd37bbe94..82a1f5449ed96 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -307,6 +307,9 @@ Python Binding Changes OpenMP Support -------------- +- `num_teams` now accepts multiple expressions when it is used along in ``target teams ompx_bare`` construct. + This allows the target region to be launched with multi-dim grid on GPUs. + Additional Information ====================== diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index b029c72fa7d8f..50ac1e0ea8db7 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -6131,43 +6131,54 @@ class OMPMapClause final : public OMPMappableExprListClause, /// \endcode /// In this example directive '#pragma omp teams' has clause 'num_teams' /// with single expression 'n'. -class OMPNumTeamsClause : public OMPClause, public OMPClauseWithPreInit { - friend class OMPClauseReader; +/// +/// When 'ompx_bare' clause exists on a 'target' directive, 'num_teams' clause +/// can accept up to three expressions. +/// +/// \code +/// #pragma omp target teams ompx_bare num_teams(x, y, z) +/// \endcode +class OMPNumTeamsClause final + : public OMPVarListClause, + public OMPClauseWithPreInit, + private llvm::TrailingObjects { + friend OMPVarListClause; + friend TrailingObjects; /// Location of '('. SourceLocation LParenLoc; - /// NumTeams number. - Stmt *NumTeams = nullptr; + OMPNumTeamsClause(const ASTContext &C, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc, unsigned N) + : OMPVarListClause(llvm::omp::OMPC_num_teams, StartLoc, LParenLoc, EndLoc, + N), + OMPClauseWithPreInit(this) {} - /// Set the NumTeams number. - /// - /// \param E NumTeams number. - void setNumTeams(Expr *E) { NumTeams = E; } + /// Build an empty clause. + OMPNumTeamsClause(unsigned N) + : OMPVarListClause(llvm::omp::OMPC_num_teams, SourceLocation(), + SourceLocation(), SourceLocation(), N), + OMPClauseWithPreInit(this) {} public: - /// Build 'num_teams' clause. + /// Creates clause with a list of variables \a VL. /// - /// \param E Expression associated with this clause. - /// \param HelperE Helper Expression associated with this clause. - /// \param CaptureRegion Innermost OpenMP region where expressions in this - /// clause must be captured. + /// \param C AST context. /// \param StartLoc Starting location of the clause. /// \param LParenLoc Location of '('. /// \param EndLoc Ending location of the clause. - OMPNumTeamsClause(Expr *E, Stmt *HelperE, OpenMPDirectiveKind CaptureRegion, - SourceLocation StartLoc, SourceLocation LParenLoc, - SourceLocation EndLoc) - : OMPClause(llvm::omp::OMPC_num_teams, StartLoc, EndLoc), - OMPClauseWithPreInit(this), LParenLoc(LParenLoc), NumTeams(E) { - setPreInitStmt(HelperE, CaptureRegion); - } + /// \param VL List of references to the variables. + /// \param PreInit + static OMPNumTeamsClause * + Create(const ASTContext &C, OpenMPDirectiveKind CaptureRegion, + SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc, ArrayRef VL, Stmt *PreInit); - /// Build an empty clause. - OMPNumTeamsClause() - : OMPClause(llvm::omp::OMPC_num_teams, SourceLocation(), - SourceLocation()), - OMPClauseWithPreInit(this) {} + /// Creates an empty clause with \a N variables. + /// + /// \param C AST context. + /// \param N The number of variables. + static OMPNumTeamsClause *CreateEmpty(const ASTContext &C, unsigned N); /// Sets the location of '('. void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } @@ -6175,16 +6186,22 @@ class OMPNumTeamsClause : public OMPClause, public OMPClauseWithPreInit { /// Returns the location of '('. SourceLocation getLParenLoc() const { return LParenLoc; } - /// Return NumTeams number. - Expr *getNumTeams() { return cast(NumTeams); } + /// Return NumTeams expressions. + ArrayRef getNumTeams() { return getVarRefs(); } - /// Return NumTeams number. - Expr *getNumTeams() const { return cast(NumTeams); } + /// Return NumTeams expressions. + ArrayRef getNumTeams() const { + return const_cast(this)->getNumTeams(); + } - child_range children() { return child_range(&NumTeams, &NumTeams + 1); } + child_range children() { + return child_range(reinterpret_cast(varlist_begin()), + reinterpret_cast(varlist_end())); + } const_child_range children() const { - return const_child_range(&NumTeams, &NumTeams + 1); + auto Children = const_cast(this)->children(); + return const_child_range(Children.begin(), Children.end()); } child_range used_children() { diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index dcf5dbf449f8b..9a6e8a9ea1c7b 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3793,8 +3793,8 @@ bool RecursiveASTVisitor::VisitOMPMapClause(OMPMapClause *C) { template bool RecursiveASTVisitor::VisitOMPNumTeamsClause( OMPNumTeamsClause *C) { + TRY_TO(VisitOMPClauseList(C)); TRY_TO(VisitOMPClauseWithPreInit(C)); - TRY_TO(TraverseStmt(C->getNumTeams())); return true; } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 581434d33c5c9..8e98aa028db08 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11639,6 +11639,7 @@ def warn_omp_unterminated_declare_target : Warning< InGroup; def err_ompx_bare_no_grid : Error< "'ompx_bare' clauses requires explicit grid size via 'num_teams' and 'thread_limit' clauses">; +def err_omp_multi_expr_not_allowed: Error<"only one expression allowed to '%0' clause">; } // end of OpenMP category let CategoryName = "Related Result Type Issue" in { diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index aa61dae9415e2..703c1511fc3ae 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -1226,7 +1226,8 @@ class SemaOpenMP : public SemaBase { const OMPVarListLocTy &Locs, bool NoDiagnose = false, ArrayRef UnresolvedMappers = std::nullopt); /// Called on well-formed 'num_teams' clause. - OMPClause *ActOnOpenMPNumTeamsClause(Expr *NumTeams, SourceLocation StartLoc, + OMPClause *ActOnOpenMPNumTeamsClause(ArrayRef VarList, + SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); /// Called on well-formed 'thread_limit' clause. diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 042a5df5906ca..9ec2f593b4477 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1720,6 +1720,24 @@ const Expr *OMPDoacrossClause::getLoopData(unsigned NumLoop) const { return *It; } +OMPNumTeamsClause *OMPNumTeamsClause::Create( + const ASTContext &C, OpenMPDirectiveKind CaptureRegion, + SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc, + ArrayRef VL, Stmt *PreInit) { + void *Mem = C.Allocate(totalSizeToAlloc(VL.size())); + OMPNumTeamsClause *Clause = + new (Mem) OMPNumTeamsClause(C, StartLoc, LParenLoc, EndLoc, VL.size()); + Clause->setVarRefs(VL); + Clause->setPreInitStmt(PreInit, CaptureRegion); + return Clause; +} + +OMPNumTeamsClause *OMPNumTeamsClause::CreateEmpty(const ASTContext &C, + unsigned N) { + void *Mem = C.Allocate(totalSizeToAlloc(N)); + return new (Mem) OMPNumTeamsClause(N); +} + //===----------------------------------------------------------------------===// // OpenMP clauses printing methods //===----------------------------------------------------------------------===// @@ -1977,9 +1995,11 @@ void OMPClausePrinter::VisitOMPDeviceClause(OMPDeviceClause *Node) { } void OMPClausePrinter::VisitOMPNumTeamsClause(OMPNumTeamsClause *Node) { - OS << "num_teams("; - Node->getNumTeams()->printPretty(OS, nullptr, Policy, 0); - OS << ")"; + if (!Node->varlist_empty()) { + OS << "num_teams"; + VisitOMPClauseList(Node, '('); + OS << ")"; + } } void OMPClausePrinter::VisitOMPThreadLimitClause(OMPThreadLimitClause *Node) { diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index f1e723b4242ee..00ba8e490ac4e 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -843,9 +843,8 @@ void OMPClauseProfiler::VisitOMPAllocateClause(const OMPAllocateClause *C) { VisitOMPClauseList(C); } void OMPClauseProfiler::VisitOMPNumTeamsClause(const OMPNumTeamsClause *C) { + VisitOMPClauseList(C); VistOMPClauseWithPreInit(C); - if (C->getNumTeams()) - Profiler->VisitStmt(C->getNumTeams()); } void OMPClauseProfiler::VisitOMPThreadLimitClause( const OMPThreadLimitClause *C) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index d869aa3322cce..4a62e7a702cec 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6036,8 +6036,9 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( dyn_cast_or_null(ChildStmt)) { if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { if (NestedDir->hasClausesOfKind()) { - const Expr *NumTeams = - NestedDir->getSingleClause()->getNumTeams(); + const Expr *NumTeams = NestedDir->getSingleClause() + ->getNumTeams() + .front(); if (NumTeams->isIntegerConstantExpr(CGF.getContext())) if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) @@ -6062,7 +6063,7 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( case OMPD_target_teams_distribute_parallel_for_simd: { if (D.hasClausesOfKind()) { const Expr *NumTeams = - D.getSingleClause()->getNumTeams(); + D.getSingleClause()->getNumTeams().front(); if (NumTeams->isIntegerConstantExpr(CGF.getContext())) if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); @@ -9575,6 +9576,20 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, MappedVarSet, CombinedInfo); genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet); } + +static void emitNumTeamsForBareTargetDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::SmallVectorImpl &NumTeams) { + const auto *C = D.getSingleClause(); + assert(!C->varlist_empty() && "ompx_bare requires explicit num_teams"); + CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); + for (auto *E : C->getNumTeams()) { + llvm::Value *V = CGF.EmitScalarExpr(E); + NumTeams.push_back( + CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true)); + } +} + static void emitTargetCallKernelLaunch( CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, @@ -9644,8 +9659,14 @@ static void emitTargetCallKernelLaunch( return CGF.Builder.saveIP(); }; + bool IsBare = D.hasClausesOfKind(); + SmallVector NumTeams; + if (IsBare) + emitNumTeamsForBareTargetDirective(CGF, D, NumTeams); + else + NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D)); + llvm::Value *DeviceID = emitDeviceID(Device, CGF); - llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D); llvm::Value *NumThreads = OMPRuntime->emitNumThreadsForTargetDirective(CGF, D); llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index b1ac9361957ff..0cb8b7804f644 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -6859,7 +6859,7 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, const auto *NT = S.getSingleClause(); const auto *TL = S.getSingleClause(); if (NT || TL) { - const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; + const Expr *NumTeams = NT ? NT->getNumTeams().front() : nullptr; const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index e975e96c5c7e4..50930aa0e9a4a 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3098,7 +3098,6 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, case OMPC_simdlen: case OMPC_collapse: case OMPC_ordered: - case OMPC_num_teams: case OMPC_thread_limit: case OMPC_priority: case OMPC_grainsize: @@ -3252,6 +3251,13 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, ? ParseOpenMPSimpleClause(CKind, WrongDirective) : ParseOpenMPClause(CKind, WrongDirective); break; + case OMPC_num_teams: + if (!FirstClause) { + Diag(Tok, diag::err_omp_more_one_clause) + << getOpenMPDirectiveName(DKind) << getOpenMPClauseName(CKind) << 0; + ErrorFound = true; + } + [[clang::fallthrough]]; case OMPC_private: case OMPC_firstprivate: case OMPC_lastprivate: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 4f50efda155fb..8d25358ef5fa3 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -13004,6 +13004,24 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetUpdateDirective( Clauses, AStmt); } +// This checks whether num_teams clause only has one expression. +static bool checkNumTeamsClauseSingleExpr(SemaBase &SemaRef, + ArrayRef Clauses) { + auto NumTeamsClauseItr = + llvm::find_if(Clauses, llvm::IsaPred); + if (NumTeamsClauseItr != Clauses.end()) { + ArrayRef NumTeams = + cast(*NumTeamsClauseItr)->getNumTeams(); + if (NumTeams.size() > 1) { + SemaRef.Diag(NumTeams[1]->getBeginLoc(), + diag::err_omp_multi_expr_not_allowed) + << getOpenMPClauseName(OMPC_num_teams); + return false; + } + } + return true; +} + StmtResult SemaOpenMP::ActOnOpenMPTeamsDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, @@ -13011,6 +13029,9 @@ StmtResult SemaOpenMP::ActOnOpenMPTeamsDirective(ArrayRef Clauses, if (!AStmt) return StmtError(); + if (!checkNumTeamsClauseSingleExpr(*this, Clauses)) + return StmtError(); + // Report affected OpenMP target offloading behavior when in HIP lang-mode. if (getLangOpts().HIP && (DSAStack->getParentDirective() == OMPD_target)) Diag(StartLoc, diag::warn_hip_omp_target_directives); @@ -13785,6 +13806,9 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDirective( return StmtError(); } + if (!HasBareClause && !checkNumTeamsClauseSingleExpr(*this, Clauses)) + return StmtError(); + return OMPTargetTeamsDirective::Create(getASTContext(), StartLoc, EndLoc, Clauses, AStmt); } @@ -13795,6 +13819,9 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeDirective( if (!AStmt) return StmtError(); + if (!checkNumTeamsClauseSingleExpr(*this, Clauses)) + return StmtError(); + CapturedStmt *CS = setBranchProtectedScope(SemaRef, OMPD_target_teams_distribute, AStmt); @@ -13821,6 +13848,9 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeParallelForDirective( if (!AStmt) return StmtError(); + if (!checkNumTeamsClauseSingleExpr(*this, Clauses)) + return StmtError(); + CapturedStmt *CS = setBranchProtectedScope( SemaRef, OMPD_target_teams_distribute_parallel_for, AStmt); @@ -13848,6 +13878,9 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective( if (!AStmt) return StmtError(); + if (!checkNumTeamsClauseSingleExpr(*this, Clauses)) + return StmtError(); + CapturedStmt *CS = setBranchProtectedScope( SemaRef, OMPD_target_teams_distribute_parallel_for_simd, AStmt); @@ -13878,6 +13911,9 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( if (!AStmt) return StmtError(); + if (!checkNumTeamsClauseSingleExpr(*this, Clauses)) + return StmtError(); + CapturedStmt *CS = setBranchProtectedScope( SemaRef, OMPD_target_teams_distribute_simd, AStmt); @@ -14925,9 +14961,6 @@ OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, case OMPC_ordered: Res = ActOnOpenMPOrderedClause(StartLoc, EndLoc, LParenLoc, Expr); break; - case OMPC_num_teams: - Res = ActOnOpenMPNumTeamsClause(Expr, StartLoc, LParenLoc, EndLoc); - break; case OMPC_thread_limit: Res = ActOnOpenMPThreadLimitClause(Expr, StartLoc, LParenLoc, EndLoc); break; @@ -15031,6 +15064,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, case OMPC_affinity: case OMPC_when: case OMPC_bind: + case OMPC_num_teams: default: llvm_unreachable("Clause is not allowed."); } @@ -16894,6 +16928,9 @@ OMPClause *SemaOpenMP::ActOnOpenMPVarListClause(OpenMPClauseKind Kind, static_cast(ExtraModifier), ExtraModifierLoc, ColonLoc, VarList, StartLoc, LParenLoc, EndLoc); break; + case OMPC_num_teams: + Res = ActOnOpenMPNumTeamsClause(VarList, StartLoc, LParenLoc, EndLoc); + break; case OMPC_if: case OMPC_depobj: case OMPC_final: @@ -16924,7 +16961,6 @@ OMPClause *SemaOpenMP::ActOnOpenMPVarListClause(OpenMPClauseKind Kind, case OMPC_device: case OMPC_threads: case OMPC_simd: - case OMPC_num_teams: case OMPC_thread_limit: case OMPC_priority: case OMPC_grainsize: @@ -21587,32 +21623,40 @@ const ValueDecl *SemaOpenMP::getOpenMPDeclareMapperVarName() const { return cast(DSAStack->getDeclareMapperVarRef())->getDecl(); } -OMPClause *SemaOpenMP::ActOnOpenMPNumTeamsClause(Expr *NumTeams, +OMPClause *SemaOpenMP::ActOnOpenMPNumTeamsClause(ArrayRef VarList, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - Expr *ValExpr = NumTeams; - Stmt *HelperValStmt = nullptr; - - // OpenMP [teams Constrcut, Restrictions] - // The num_teams expression must evaluate to a positive integer value. - if (!isNonNegativeIntegerValue(ValExpr, SemaRef, OMPC_num_teams, - /*StrictlyPositive=*/true)) + if (VarList.empty()) return nullptr; + for (Expr *ValExpr : VarList) { + // OpenMP [teams Constrcut, Restrictions] + // The num_teams expression must evaluate to a positive integer value. + if (!isNonNegativeIntegerValue(ValExpr, SemaRef, OMPC_num_teams, + /*StrictlyPositive=*/true)) + return nullptr; + } + OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); OpenMPDirectiveKind CaptureRegion = getOpenMPCaptureRegionForClause( DKind, OMPC_num_teams, getLangOpts().OpenMP); - if (CaptureRegion != OMPD_unknown && - !SemaRef.CurContext->isDependentContext()) { + if (CaptureRegion == OMPD_unknown || SemaRef.CurContext->isDependentContext()) + return OMPNumTeamsClause::Create(getASTContext(), CaptureRegion, StartLoc, + LParenLoc, EndLoc, VarList, + /*PreInit=*/nullptr); + + llvm::MapVector Captures; + SmallVector Vars; + for (Expr *ValExpr : VarList) { ValExpr = SemaRef.MakeFullExpr(ValExpr).get(); - llvm::MapVector Captures; ValExpr = tryBuildCapture(SemaRef, ValExpr, Captures).get(); - HelperValStmt = buildPreInits(getASTContext(), Captures); + Vars.push_back(ValExpr); } - return new (getASTContext()) OMPNumTeamsClause( - ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, EndLoc); + Stmt *PreInit = buildPreInits(getASTContext(), Captures); + return OMPNumTeamsClause::Create(getASTContext(), CaptureRegion, StartLoc, + LParenLoc, EndLoc, Vars, PreInit); } OMPClause *SemaOpenMP::ActOnOpenMPThreadLimitClause(Expr *ThreadLimit, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 8d3e1edf7a45d..3fbfb2ec989ce 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -2065,10 +2065,11 @@ class TreeTransform { /// /// By default, performs semantic analysis to build the new statement. /// Subclasses may override this routine to provide different behavior. - OMPClause *RebuildOMPNumTeamsClause(Expr *NumTeams, SourceLocation StartLoc, + OMPClause *RebuildOMPNumTeamsClause(ArrayRef VarList, + SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().OpenMP().ActOnOpenMPNumTeamsClause(NumTeams, StartLoc, + return getSema().OpenMP().ActOnOpenMPNumTeamsClause(VarList, StartLoc, LParenLoc, EndLoc); } @@ -10872,7 +10873,7 @@ TreeTransform::TransformOMPAllocateClause(OMPAllocateClause *C) { template OMPClause * TreeTransform::TransformOMPNumTeamsClause(OMPNumTeamsClause *C) { - ExprResult E = getDerived().TransformExpr(C->getNumTeams()); + ExprResult E = getDerived().TransformExpr(C->getNumTeams().front()); if (E.isInvalid()) return nullptr; return getDerived().RebuildOMPNumTeamsClause( diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 85ff3ab8974ee..3f35bcdc70b4f 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -104,6 +104,7 @@ #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -10569,7 +10570,7 @@ OMPClause *OMPClauseReader::readClause() { break; } case llvm::omp::OMPC_num_teams: - C = new (Context) OMPNumTeamsClause(); + C = OMPNumTeamsClause::CreateEmpty(Context, Record.readInt()); break; case llvm::omp::OMPC_thread_limit: C = new (Context) OMPThreadLimitClause(); @@ -11357,8 +11358,13 @@ void OMPClauseReader::VisitOMPAllocateClause(OMPAllocateClause *C) { void OMPClauseReader::VisitOMPNumTeamsClause(OMPNumTeamsClause *C) { VisitOMPClauseWithPreInit(C); - C->setNumTeams(Record.readSubExpr()); C->setLParenLoc(Record.readSourceLocation()); + unsigned NumVars = C->varlist_size(); + SmallVector Vars; + Vars.reserve(NumVars); + for ([[maybe_unused]] unsigned I : llvm::seq(NumVars)) + Vars.push_back(Record.readSubExpr()); + C->setVarRefs(Vars); } void OMPClauseReader::VisitOMPThreadLimitClause(OMPThreadLimitClause *C) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index f0f9d397f1717..657eb6d3d1cc4 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7528,9 +7528,11 @@ void OMPClauseWriter::VisitOMPAllocateClause(OMPAllocateClause *C) { } void OMPClauseWriter::VisitOMPNumTeamsClause(OMPNumTeamsClause *C) { + Record.push_back(C->varlist_size()); VisitOMPClauseWithPreInit(C); - Record.AddStmt(C->getNumTeams()); Record.AddSourceLocation(C->getLParenLoc()); + for (auto *VE : C->varlist()) + Record.AddStmt(VE); } void OMPClauseWriter::VisitOMPThreadLimitClause(OMPThreadLimitClause *C) { diff --git a/clang/test/OpenMP/target_teams_ast_print.cpp b/clang/test/OpenMP/target_teams_ast_print.cpp index 2ff34e4498bfe..1590a996289f8 100644 --- a/clang/test/OpenMP/target_teams_ast_print.cpp +++ b/clang/test/OpenMP/target_teams_ast_print.cpp @@ -115,6 +115,10 @@ int main (int argc, char **argv) { // CHECK-NEXT: #pragma omp target teams ompx_bare num_teams(1) thread_limit(32) a=3; // CHECK-NEXT: a = 3; +#pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(32) +// CHECK-NEXT: #pragma omp target teams ompx_bare num_teams(1,2,3) thread_limit(32) + a=4; +// CHECK-NEXT: a = 4; #pragma omp target teams default(none), private(argc,b) num_teams(f) firstprivate(argv) reduction(| : c, d) reduction(* : e) thread_limit(f+g) // CHECK-NEXT: #pragma omp target teams default(none) private(argc,b) num_teams(f) firstprivate(argv) reduction(|: c,d) reduction(*: e) thread_limit(f + g) foo(); diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp index 24dc2fd2e49f4..595740c1f1314 100644 --- a/clang/test/OpenMP/target_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_codegen.cpp @@ -127,6 +127,18 @@ int foo(int n) { aa += 1; } + #pragma omp target teams ompx_bare num_teams(1, 2) thread_limit(1) + { + a += 1; + aa += 1; + } + + #pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(1) + { + a += 1; + aa += 1; + } + // We capture 3 VLA sizes in this target region @@ -348,22 +360,34 @@ int bar(int n){ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS20:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: [[KERNEL_ARGS21:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 // CHECK1-NEXT: [[A_CASTED24:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [9 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_PTRS28:%.*]] = alloca [9 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [9 x ptr], align 8 +// CHECK1-NEXT: [[AA_CASTED25:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS26:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS27:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS28:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS29:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: [[A_CASTED32:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_CASTED33:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS35:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS36:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS37:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: [[A_CASTED40:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS43:%.*]] = alloca [9 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS44:%.*]] = alloca [9 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS45:%.*]] = alloca [9 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8 -// CHECK1-NEXT: [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 // CHECK1-NEXT: [[NN:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_PTRS36:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS37:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[KERNEL_ARGS38:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[NN_CASTED41:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS42:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_PTRS43:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS44:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS51:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS52:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS53:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS54:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: [[NN_CASTED57:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS58:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS59:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS60:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[A]], align 4 @@ -603,206 +627,312 @@ int bar(int n){ // CHECK1-NEXT: [[TMP122:%.*]] = load i32, ptr [[A]], align 4 // CHECK1-NEXT: store i32 [[TMP122]], ptr [[A_CASTED24]], align 4 // CHECK1-NEXT: [[TMP123:%.*]] = load i64, ptr [[A_CASTED24]], align 8 -// CHECK1-NEXT: [[TMP124:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP124]], 20 -// CHECK1-NEXT: br i1 [[CMP25]], label [[OMP_IF_THEN26:%.*]], label [[OMP_IF_ELSE33:%.*]] -// CHECK1: omp_if.then26: -// CHECK1-NEXT: [[TMP125:%.*]] = mul nuw i64 [[TMP2]], 4 -// CHECK1-NEXT: [[TMP126:%.*]] = mul nuw i64 5, [[TMP5]] -// CHECK1-NEXT: [[TMP127:%.*]] = mul nuw i64 [[TMP126]], 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.7, i64 72, i1 false) -// CHECK1-NEXT: [[TMP128:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP123]], ptr [[TMP128]], align 8 -// CHECK1-NEXT: [[TMP129:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP123]], ptr [[TMP129]], align 8 -// CHECK1-NEXT: [[TMP130:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP130]], align 8 -// CHECK1-NEXT: [[TMP131:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[B]], ptr [[TMP131]], align 8 -// CHECK1-NEXT: [[TMP132:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[B]], ptr [[TMP132]], align 8 -// CHECK1-NEXT: [[TMP133:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP133]], align 8 -// CHECK1-NEXT: [[TMP134:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP134]], align 8 -// CHECK1-NEXT: [[TMP135:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP135]], align 8 -// CHECK1-NEXT: [[TMP136:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP136]], align 8 -// CHECK1-NEXT: [[TMP137:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP137]], align 8 -// CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP138]], align 8 -// CHECK1-NEXT: [[TMP139:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 3 -// CHECK1-NEXT: store i64 [[TMP125]], ptr [[TMP139]], align 8 -// CHECK1-NEXT: [[TMP140:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP124:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: store i16 [[TMP124]], ptr [[AA_CASTED25]], align 2 +// CHECK1-NEXT: [[TMP125:%.*]] = load i64, ptr [[AA_CASTED25]], align 8 +// CHECK1-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP123]], ptr [[TMP126]], align 8 +// CHECK1-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP123]], ptr [[TMP127]], align 8 +// CHECK1-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP128]], align 8 +// CHECK1-NEXT: [[TMP129:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP125]], ptr [[TMP129]], align 8 +// CHECK1-NEXT: [[TMP130:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP125]], ptr [[TMP130]], align 8 +// CHECK1-NEXT: [[TMP131:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP131]], align 8 +// CHECK1-NEXT: [[TMP132:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP133:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 0 +// CHECK1-NEXT: store i32 3, ptr [[TMP134]], align 4 +// CHECK1-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, ptr [[TMP135]], align 4 +// CHECK1-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP132]], ptr [[TMP136]], align 8 +// CHECK1-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP133]], ptr [[TMP137]], align 8 +// CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP138]], align 8 +// CHECK1-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP139]], align 8 +// CHECK1-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 6 // CHECK1-NEXT: store ptr null, ptr [[TMP140]], align 8 -// CHECK1-NEXT: [[TMP141:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[C]], ptr [[TMP141]], align 8 -// CHECK1-NEXT: [[TMP142:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[C]], ptr [[TMP142]], align 8 -// CHECK1-NEXT: [[TMP143:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP143]], align 8 -// CHECK1-NEXT: [[TMP144:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 5 -// CHECK1-NEXT: store i64 5, ptr [[TMP144]], align 8 -// CHECK1-NEXT: [[TMP145:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 5 -// CHECK1-NEXT: store i64 5, ptr [[TMP145]], align 8 -// CHECK1-NEXT: [[TMP146:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 5 -// CHECK1-NEXT: store ptr null, ptr [[TMP146]], align 8 -// CHECK1-NEXT: [[TMP147:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 6 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP147]], align 8 -// CHECK1-NEXT: [[TMP148:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 6 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP148]], align 8 -// CHECK1-NEXT: [[TMP149:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP149]], align 8 -// CHECK1-NEXT: [[TMP150:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 7 -// CHECK1-NEXT: store ptr [[VLA1]], ptr [[TMP150]], align 8 -// CHECK1-NEXT: [[TMP151:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 7 -// CHECK1-NEXT: store ptr [[VLA1]], ptr [[TMP151]], align 8 -// CHECK1-NEXT: [[TMP152:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 7 -// CHECK1-NEXT: store i64 [[TMP127]], ptr [[TMP152]], align 8 -// CHECK1-NEXT: [[TMP153:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP153]], align 8 -// CHECK1-NEXT: [[TMP154:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[TMP154]], align 8 -// CHECK1-NEXT: [[TMP155:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[TMP155]], align 8 -// CHECK1-NEXT: [[TMP156:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 8 -// CHECK1-NEXT: store ptr null, ptr [[TMP156]], align 8 -// CHECK1-NEXT: [[TMP157:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP158:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP159:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP160:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK1-NEXT: store i32 3, ptr [[TMP160]], align 4 -// CHECK1-NEXT: [[TMP161:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 -// CHECK1-NEXT: store i32 9, ptr [[TMP161]], align 4 -// CHECK1-NEXT: [[TMP162:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP157]], ptr [[TMP162]], align 8 -// CHECK1-NEXT: [[TMP163:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP158]], ptr [[TMP163]], align 8 -// CHECK1-NEXT: [[TMP164:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[TMP159]], ptr [[TMP164]], align 8 -// CHECK1-NEXT: [[TMP165:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP165]], align 8 -// CHECK1-NEXT: [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP166]], align 8 -// CHECK1-NEXT: [[TMP167:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP141]], align 8 +// CHECK1-NEXT: [[TMP142:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP142]], align 8 +// CHECK1-NEXT: [[TMP143:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP143]], align 8 +// CHECK1-NEXT: [[TMP144:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 1, i32 2, i32 0], ptr [[TMP144]], align 4 +// CHECK1-NEXT: [[TMP145:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP145]], align 4 +// CHECK1-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP146]], align 4 +// CHECK1-NEXT: [[TMP147:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.region_id, ptr [[KERNEL_ARGS29]]) +// CHECK1-NEXT: [[TMP148:%.*]] = icmp ne i32 [[TMP147]], 0 +// CHECK1-NEXT: br i1 [[TMP148]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]] +// CHECK1: omp_offload.failed30: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130(i64 [[TMP123]], i64 [[TMP125]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT31]] +// CHECK1: omp_offload.cont31: +// CHECK1-NEXT: [[TMP149:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: store i32 [[TMP149]], ptr [[A_CASTED32]], align 4 +// CHECK1-NEXT: [[TMP150:%.*]] = load i64, ptr [[A_CASTED32]], align 8 +// CHECK1-NEXT: [[TMP151:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: store i16 [[TMP151]], ptr [[AA_CASTED33]], align 2 +// CHECK1-NEXT: [[TMP152:%.*]] = load i64, ptr [[AA_CASTED33]], align 8 +// CHECK1-NEXT: [[TMP153:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP150]], ptr [[TMP153]], align 8 +// CHECK1-NEXT: [[TMP154:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS35]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP150]], ptr [[TMP154]], align 8 +// CHECK1-NEXT: [[TMP155:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP155]], align 8 +// CHECK1-NEXT: [[TMP156:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP152]], ptr [[TMP156]], align 8 +// CHECK1-NEXT: [[TMP157:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS35]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP152]], ptr [[TMP157]], align 8 +// CHECK1-NEXT: [[TMP158:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP158]], align 8 +// CHECK1-NEXT: [[TMP159:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP160:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS35]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP161:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 0 +// CHECK1-NEXT: store i32 3, ptr [[TMP161]], align 4 +// CHECK1-NEXT: [[TMP162:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, ptr [[TMP162]], align 4 +// CHECK1-NEXT: [[TMP163:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP159]], ptr [[TMP163]], align 8 +// CHECK1-NEXT: [[TMP164:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP160]], ptr [[TMP164]], align 8 +// CHECK1-NEXT: [[TMP165:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.9, ptr [[TMP165]], align 8 +// CHECK1-NEXT: [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP166]], align 8 +// CHECK1-NEXT: [[TMP167:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 6 // CHECK1-NEXT: store ptr null, ptr [[TMP167]], align 8 -// CHECK1-NEXT: [[TMP168:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP168]], align 8 -// CHECK1-NEXT: [[TMP169:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP168:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP168]], align 8 +// CHECK1-NEXT: [[TMP169:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 8 // CHECK1-NEXT: store i64 0, ptr [[TMP169]], align 8 -// CHECK1-NEXT: [[TMP170:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP170]], align 4 -// CHECK1-NEXT: [[TMP171:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP171]], align 4 -// CHECK1-NEXT: [[TMP172:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP172]], align 4 -// CHECK1-NEXT: [[TMP173:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148.region_id, ptr [[KERNEL_ARGS30]]) -// CHECK1-NEXT: [[TMP174:%.*]] = icmp ne i32 [[TMP173]], 0 -// CHECK1-NEXT: br i1 [[TMP174]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]] -// CHECK1: omp_offload.failed31: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148(i64 [[TMP123]], ptr [[B]], i64 [[TMP2]], ptr [[VLA]], ptr [[C]], i64 5, i64 [[TMP5]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT32]] -// CHECK1: omp_offload.cont32: -// CHECK1-NEXT: br label [[OMP_IF_END34:%.*]] -// CHECK1: omp_if.else33: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148(i64 [[TMP123]], ptr [[B]], i64 [[TMP2]], ptr [[VLA]], ptr [[C]], i64 5, i64 [[TMP5]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[OMP_IF_END34]] -// CHECK1: omp_if.end34: -// CHECK1-NEXT: store i32 0, ptr [[NN]], align 4 -// CHECK1-NEXT: [[TMP175:%.*]] = load i32, ptr [[NN]], align 4 -// CHECK1-NEXT: store i32 [[TMP175]], ptr [[NN_CASTED]], align 4 -// CHECK1-NEXT: [[TMP176:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK1-NEXT: [[TMP177:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP176]], ptr [[TMP177]], align 8 -// CHECK1-NEXT: [[TMP178:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP176]], ptr [[TMP178]], align 8 -// CHECK1-NEXT: [[TMP179:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP179]], align 8 -// CHECK1-NEXT: [[TMP180:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP181:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP182:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 -// CHECK1-NEXT: store i32 3, ptr [[TMP182]], align 4 -// CHECK1-NEXT: [[TMP183:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP183]], align 4 -// CHECK1-NEXT: [[TMP184:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP180]], ptr [[TMP184]], align 8 -// CHECK1-NEXT: [[TMP185:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP181]], ptr [[TMP185]], align 8 -// CHECK1-NEXT: [[TMP186:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.9, ptr [[TMP186]], align 8 -// CHECK1-NEXT: [[TMP187:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP187]], align 8 -// CHECK1-NEXT: [[TMP188:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP188]], align 8 -// CHECK1-NEXT: [[TMP189:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP189]], align 8 -// CHECK1-NEXT: [[TMP190:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP190]], align 8 -// CHECK1-NEXT: [[TMP191:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP191]], align 8 -// CHECK1-NEXT: [[TMP192:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP192]], align 4 -// CHECK1-NEXT: [[TMP193:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP193]], align 4 -// CHECK1-NEXT: [[TMP194:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP194]], align 4 -// CHECK1-NEXT: [[TMP195:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.region_id, ptr [[KERNEL_ARGS38]]) -// CHECK1-NEXT: [[TMP196:%.*]] = icmp ne i32 [[TMP195]], 0 -// CHECK1-NEXT: br i1 [[TMP196]], label [[OMP_OFFLOAD_FAILED39:%.*]], label [[OMP_OFFLOAD_CONT40:%.*]] -// CHECK1: omp_offload.failed39: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160(i64 [[TMP176]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT40]] -// CHECK1: omp_offload.cont40: -// CHECK1-NEXT: [[TMP197:%.*]] = load i32, ptr [[NN]], align 4 -// CHECK1-NEXT: store i32 [[TMP197]], ptr [[NN_CASTED41]], align 4 -// CHECK1-NEXT: [[TMP198:%.*]] = load i64, ptr [[NN_CASTED41]], align 8 -// CHECK1-NEXT: [[TMP199:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS42]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP198]], ptr [[TMP199]], align 8 -// CHECK1-NEXT: [[TMP200:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS43]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP198]], ptr [[TMP200]], align 8 -// CHECK1-NEXT: [[TMP201:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS44]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP201]], align 8 -// CHECK1-NEXT: [[TMP202:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS42]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP203:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS43]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP204:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0 -// CHECK1-NEXT: store i32 3, ptr [[TMP204]], align 4 -// CHECK1-NEXT: [[TMP205:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP205]], align 4 -// CHECK1-NEXT: [[TMP206:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP202]], ptr [[TMP206]], align 8 -// CHECK1-NEXT: [[TMP207:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP203]], ptr [[TMP207]], align 8 -// CHECK1-NEXT: [[TMP208:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.11, ptr [[TMP208]], align 8 -// CHECK1-NEXT: [[TMP209:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP209]], align 8 -// CHECK1-NEXT: [[TMP210:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP170:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP170]], align 8 +// CHECK1-NEXT: [[TMP171:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 1, i32 2, i32 3], ptr [[TMP171]], align 4 +// CHECK1-NEXT: [[TMP172:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP172]], align 4 +// CHECK1-NEXT: [[TMP173:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP173]], align 4 +// CHECK1-NEXT: [[TMP174:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.region_id, ptr [[KERNEL_ARGS37]]) +// CHECK1-NEXT: [[TMP175:%.*]] = icmp ne i32 [[TMP174]], 0 +// CHECK1-NEXT: br i1 [[TMP175]], label [[OMP_OFFLOAD_FAILED38:%.*]], label [[OMP_OFFLOAD_CONT39:%.*]] +// CHECK1: omp_offload.failed38: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136(i64 [[TMP150]], i64 [[TMP152]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT39]] +// CHECK1: omp_offload.cont39: +// CHECK1-NEXT: [[TMP176:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: store i32 [[TMP176]], ptr [[A_CASTED40]], align 4 +// CHECK1-NEXT: [[TMP177:%.*]] = load i64, ptr [[A_CASTED40]], align 8 +// CHECK1-NEXT: [[TMP178:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP178]], 20 +// CHECK1-NEXT: br i1 [[CMP41]], label [[OMP_IF_THEN42:%.*]], label [[OMP_IF_ELSE49:%.*]] +// CHECK1: omp_if.then42: +// CHECK1-NEXT: [[TMP179:%.*]] = mul nuw i64 [[TMP2]], 4 +// CHECK1-NEXT: [[TMP180:%.*]] = mul nuw i64 5, [[TMP5]] +// CHECK1-NEXT: [[TMP181:%.*]] = mul nuw i64 [[TMP180]], 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.11, i64 72, i1 false) +// CHECK1-NEXT: [[TMP182:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP177]], ptr [[TMP182]], align 8 +// CHECK1-NEXT: [[TMP183:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP177]], ptr [[TMP183]], align 8 +// CHECK1-NEXT: [[TMP184:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP184]], align 8 +// CHECK1-NEXT: [[TMP185:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP185]], align 8 +// CHECK1-NEXT: [[TMP186:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP186]], align 8 +// CHECK1-NEXT: [[TMP187:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP187]], align 8 +// CHECK1-NEXT: [[TMP188:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP188]], align 8 +// CHECK1-NEXT: [[TMP189:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP189]], align 8 +// CHECK1-NEXT: [[TMP190:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP190]], align 8 +// CHECK1-NEXT: [[TMP191:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP191]], align 8 +// CHECK1-NEXT: [[TMP192:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP192]], align 8 +// CHECK1-NEXT: [[TMP193:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP179]], ptr [[TMP193]], align 8 +// CHECK1-NEXT: [[TMP194:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP194]], align 8 +// CHECK1-NEXT: [[TMP195:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[C]], ptr [[TMP195]], align 8 +// CHECK1-NEXT: [[TMP196:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[C]], ptr [[TMP196]], align 8 +// CHECK1-NEXT: [[TMP197:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i64 0, i64 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP197]], align 8 +// CHECK1-NEXT: [[TMP198:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 5 +// CHECK1-NEXT: store i64 5, ptr [[TMP198]], align 8 +// CHECK1-NEXT: [[TMP199:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 5 +// CHECK1-NEXT: store i64 5, ptr [[TMP199]], align 8 +// CHECK1-NEXT: [[TMP200:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i64 0, i64 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP200]], align 8 +// CHECK1-NEXT: [[TMP201:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP201]], align 8 +// CHECK1-NEXT: [[TMP202:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP202]], align 8 +// CHECK1-NEXT: [[TMP203:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i64 0, i64 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP203]], align 8 +// CHECK1-NEXT: [[TMP204:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[VLA1]], ptr [[TMP204]], align 8 +// CHECK1-NEXT: [[TMP205:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[VLA1]], ptr [[TMP205]], align 8 +// CHECK1-NEXT: [[TMP206:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP181]], ptr [[TMP206]], align 8 +// CHECK1-NEXT: [[TMP207:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i64 0, i64 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP207]], align 8 +// CHECK1-NEXT: [[TMP208:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[D]], ptr [[TMP208]], align 8 +// CHECK1-NEXT: [[TMP209:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[D]], ptr [[TMP209]], align 8 +// CHECK1-NEXT: [[TMP210:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i64 0, i64 8 // CHECK1-NEXT: store ptr null, ptr [[TMP210]], align 8 -// CHECK1-NEXT: [[TMP211:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP211]], align 8 -// CHECK1-NEXT: [[TMP212:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP212]], align 8 -// CHECK1-NEXT: [[TMP213:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP213]], align 8 -// CHECK1-NEXT: [[TMP214:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP214]], align 4 -// CHECK1-NEXT: [[TMP215:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP215]], align 4 -// CHECK1-NEXT: [[TMP216:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP216]], align 4 -// CHECK1-NEXT: [[TMP217:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.region_id, ptr [[KERNEL_ARGS45]]) -// CHECK1-NEXT: [[TMP218:%.*]] = icmp ne i32 [[TMP217]], 0 -// CHECK1-NEXT: br i1 [[TMP218]], label [[OMP_OFFLOAD_FAILED46:%.*]], label [[OMP_OFFLOAD_CONT47:%.*]] -// CHECK1: omp_offload.failed46: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163(i64 [[TMP198]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT47]] -// CHECK1: omp_offload.cont47: -// CHECK1-NEXT: [[TMP219:%.*]] = load i32, ptr [[A]], align 4 -// CHECK1-NEXT: [[TMP220:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP220]]) -// CHECK1-NEXT: ret i32 [[TMP219]] +// CHECK1-NEXT: [[TMP211:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP212:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP213:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP214:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0 +// CHECK1-NEXT: store i32 3, ptr [[TMP214]], align 4 +// CHECK1-NEXT: [[TMP215:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1 +// CHECK1-NEXT: store i32 9, ptr [[TMP215]], align 4 +// CHECK1-NEXT: [[TMP216:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP211]], ptr [[TMP216]], align 8 +// CHECK1-NEXT: [[TMP217:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP212]], ptr [[TMP217]], align 8 +// CHECK1-NEXT: [[TMP218:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP213]], ptr [[TMP218]], align 8 +// CHECK1-NEXT: [[TMP219:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP219]], align 8 +// CHECK1-NEXT: [[TMP220:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP220]], align 8 +// CHECK1-NEXT: [[TMP221:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP221]], align 8 +// CHECK1-NEXT: [[TMP222:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP222]], align 8 +// CHECK1-NEXT: [[TMP223:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP223]], align 8 +// CHECK1-NEXT: [[TMP224:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP224]], align 4 +// CHECK1-NEXT: [[TMP225:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP225]], align 4 +// CHECK1-NEXT: [[TMP226:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP226]], align 4 +// CHECK1-NEXT: [[TMP227:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.region_id, ptr [[KERNEL_ARGS46]]) +// CHECK1-NEXT: [[TMP228:%.*]] = icmp ne i32 [[TMP227]], 0 +// CHECK1-NEXT: br i1 [[TMP228]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]] +// CHECK1: omp_offload.failed47: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160(i64 [[TMP177]], ptr [[B]], i64 [[TMP2]], ptr [[VLA]], ptr [[C]], i64 5, i64 [[TMP5]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT48]] +// CHECK1: omp_offload.cont48: +// CHECK1-NEXT: br label [[OMP_IF_END50:%.*]] +// CHECK1: omp_if.else49: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160(i64 [[TMP177]], ptr [[B]], i64 [[TMP2]], ptr [[VLA]], ptr [[C]], i64 5, i64 [[TMP5]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_IF_END50]] +// CHECK1: omp_if.end50: +// CHECK1-NEXT: store i32 0, ptr [[NN]], align 4 +// CHECK1-NEXT: [[TMP229:%.*]] = load i32, ptr [[NN]], align 4 +// CHECK1-NEXT: store i32 [[TMP229]], ptr [[NN_CASTED]], align 4 +// CHECK1-NEXT: [[TMP230:%.*]] = load i64, ptr [[NN_CASTED]], align 8 +// CHECK1-NEXT: [[TMP231:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS51]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP230]], ptr [[TMP231]], align 8 +// CHECK1-NEXT: [[TMP232:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS52]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP230]], ptr [[TMP232]], align 8 +// CHECK1-NEXT: [[TMP233:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS53]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP233]], align 8 +// CHECK1-NEXT: [[TMP234:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS51]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP235:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS52]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP236:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 0 +// CHECK1-NEXT: store i32 3, ptr [[TMP236]], align 4 +// CHECK1-NEXT: [[TMP237:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP237]], align 4 +// CHECK1-NEXT: [[TMP238:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP234]], ptr [[TMP238]], align 8 +// CHECK1-NEXT: [[TMP239:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP235]], ptr [[TMP239]], align 8 +// CHECK1-NEXT: [[TMP240:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.13, ptr [[TMP240]], align 8 +// CHECK1-NEXT: [[TMP241:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.14, ptr [[TMP241]], align 8 +// CHECK1-NEXT: [[TMP242:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP242]], align 8 +// CHECK1-NEXT: [[TMP243:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP243]], align 8 +// CHECK1-NEXT: [[TMP244:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP244]], align 8 +// CHECK1-NEXT: [[TMP245:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP245]], align 8 +// CHECK1-NEXT: [[TMP246:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP246]], align 4 +// CHECK1-NEXT: [[TMP247:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP247]], align 4 +// CHECK1-NEXT: [[TMP248:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP248]], align 4 +// CHECK1-NEXT: [[TMP249:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.region_id, ptr [[KERNEL_ARGS54]]) +// CHECK1-NEXT: [[TMP250:%.*]] = icmp ne i32 [[TMP249]], 0 +// CHECK1-NEXT: br i1 [[TMP250]], label [[OMP_OFFLOAD_FAILED55:%.*]], label [[OMP_OFFLOAD_CONT56:%.*]] +// CHECK1: omp_offload.failed55: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172(i64 [[TMP230]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT56]] +// CHECK1: omp_offload.cont56: +// CHECK1-NEXT: [[TMP251:%.*]] = load i32, ptr [[NN]], align 4 +// CHECK1-NEXT: store i32 [[TMP251]], ptr [[NN_CASTED57]], align 4 +// CHECK1-NEXT: [[TMP252:%.*]] = load i64, ptr [[NN_CASTED57]], align 8 +// CHECK1-NEXT: [[TMP253:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS58]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP252]], ptr [[TMP253]], align 8 +// CHECK1-NEXT: [[TMP254:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS59]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP252]], ptr [[TMP254]], align 8 +// CHECK1-NEXT: [[TMP255:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS60]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP255]], align 8 +// CHECK1-NEXT: [[TMP256:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS58]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP257:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS59]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP258:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 +// CHECK1-NEXT: store i32 3, ptr [[TMP258]], align 4 +// CHECK1-NEXT: [[TMP259:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP259]], align 4 +// CHECK1-NEXT: [[TMP260:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP256]], ptr [[TMP260]], align 8 +// CHECK1-NEXT: [[TMP261:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP257]], ptr [[TMP261]], align 8 +// CHECK1-NEXT: [[TMP262:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.15, ptr [[TMP262]], align 8 +// CHECK1-NEXT: [[TMP263:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.16, ptr [[TMP263]], align 8 +// CHECK1-NEXT: [[TMP264:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP264]], align 8 +// CHECK1-NEXT: [[TMP265:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP265]], align 8 +// CHECK1-NEXT: [[TMP266:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP266]], align 8 +// CHECK1-NEXT: [[TMP267:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP267]], align 8 +// CHECK1-NEXT: [[TMP268:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP268]], align 4 +// CHECK1-NEXT: [[TMP269:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP269]], align 4 +// CHECK1-NEXT: [[TMP270:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP270]], align 4 +// CHECK1-NEXT: [[TMP271:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.region_id, ptr [[KERNEL_ARGS61]]) +// CHECK1-NEXT: [[TMP272:%.*]] = icmp ne i32 [[TMP271]], 0 +// CHECK1-NEXT: br i1 [[TMP272]], label [[OMP_OFFLOAD_FAILED62:%.*]], label [[OMP_OFFLOAD_CONT63:%.*]] +// CHECK1: omp_offload.failed62: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175(i64 [[TMP252]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT63]] +// CHECK1: omp_offload.cont63: +// CHECK1-NEXT: [[TMP273:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP274:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP274]]) +// CHECK1-NEXT: ret i32 [[TMP273]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101 @@ -895,68 +1025,68 @@ int bar(int n){ // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META25:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META25]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META25]] -// CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META25]] -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META25]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META25]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META25]] -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META25]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META25]] +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META27:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META27]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META27]] +// CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META27]] +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META27]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META27]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META27]] +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META27]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META27]] // CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META25]] -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META25]] -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META25]] -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias [[META25]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META27]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META27]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META27]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK1-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 -// CHECK1-NEXT: store i32 3, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META25]] +// CHECK1-NEXT: store i32 3, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META27]] // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 -// CHECK1-NEXT: store i32 3, ptr [[TMP22]], align 4, !noalias [[META25]] +// CHECK1-NEXT: store i32 3, ptr [[TMP22]], align 4, !noalias [[META27]] // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP23]], align 8, !noalias [[META25]] +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP23]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP24]], align 8, !noalias [[META25]] +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP24]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP25]], align 8, !noalias [[META25]] +// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP25]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP26]], align 8, !noalias [[META25]] +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP26]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8, !noalias [[META25]] +// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8, !noalias [[META25]] +// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP29]], align 8, !noalias [[META25]] +// CHECK1-NEXT: store i64 0, ptr [[TMP29]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 9 -// CHECK1-NEXT: store i64 1, ptr [[TMP30]], align 8, !noalias [[META25]] +// CHECK1-NEXT: store i64 1, ptr [[TMP30]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] [[TMP20]], ptr [[TMP31]], align 4, !noalias [[META25]] +// CHECK1-NEXT: store [3 x i32] [[TMP20]], ptr [[TMP31]], align 4, !noalias [[META27]] // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] [[TMP21]], ptr [[TMP32]], align 4, !noalias [[META25]] +// CHECK1-NEXT: store [3 x i32] [[TMP21]], ptr [[TMP32]], align 4, !noalias [[META27]] // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP33]], align 4, !noalias [[META25]] +// CHECK1-NEXT: store i32 0, ptr [[TMP33]], align 4, !noalias [[META27]] // CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 [[TMP18]], i32 [[TMP19]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.region_id, ptr [[KERNEL_ARGS_I]]) // CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 // CHECK1-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__EXIT:%.*]] // CHECK1: omp_offload.failed.i: // CHECK1-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP12]], align 2 -// CHECK1-NEXT: store i16 [[TMP36]], ptr [[AA_CASTED_I]], align 2, !noalias [[META25]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[AA_CASTED_I]], align 8, !noalias [[META25]] +// CHECK1-NEXT: store i16 [[TMP36]], ptr [[AA_CASTED_I]], align 2, !noalias [[META27]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[AA_CASTED_I]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: store i32 [[TMP38]], ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias [[META25]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias [[META25]] +// CHECK1-NEXT: store i32 [[TMP38]], ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias [[META27]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK1-NEXT: store i32 [[TMP40]], ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias [[META25]] -// CHECK1-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 8, !noalias [[META25]] +// CHECK1-NEXT: store i32 [[TMP40]], ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias [[META27]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 8, !noalias [[META27]] // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101(i64 [[TMP37]], i64 [[TMP39]], i64 [[TMP41]]) #[[ATTR3]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] // CHECK1: .omp_outlined..exit: @@ -1105,7 +1235,93 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130 +// CHECK1-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.omp_outlined, i64 [[TMP2]], i64 [[TMP4]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136 +// CHECK1-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined, i64 [[TMP2]], i64 [[TMP4]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160 // CHECK1-SAME: (i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 @@ -1138,11 +1354,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148.omp_outlined, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148.omp_outlined +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -1214,7 +1430,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172 // CHECK1-SAME: (i64 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 @@ -1223,11 +1439,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i64 [[TMP1]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined, i64 [[TMP1]]) // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -1240,11 +1456,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined.omp_outlined, i64 [[TMP1]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined.omp_outlined, i64 [[TMP1]]) // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined.omp_outlined +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined.omp_outlined // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -1256,7 +1472,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175 // CHECK1-SAME: (i64 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 @@ -1265,11 +1481,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined, i64 [[TMP1]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined, i64 [[TMP1]]) // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -1278,11 +1494,11 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined.omp_outlined, ptr [[NN_ADDR]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined.omp_outlined, ptr [[NN_ADDR]]) // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined.omp_outlined +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined.omp_outlined // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -1325,9 +1541,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.13, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr @.offload_sizes.17, ptr [[TMP11]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.14, ptr [[TMP12]], align 8 +// CHECK1-NEXT: store ptr @.offload_maptypes.18, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 @@ -1342,27 +1558,27 @@ int bar(int n){ // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 // CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188(i64 [[TMP1]]) #[[ATTR3]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200(i64 [[TMP1]]) #[[ATTR3]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200 // CHECK1-SAME: (i64 noundef [[VLA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188.omp_outlined, i64 [[TMP0]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200.omp_outlined, i64 [[TMP0]]) // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188.omp_outlined +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200.omp_outlined // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -1445,7 +1661,7 @@ int bar(int n){ // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]] // CHECK1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.15, i64 40, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.19, i64 40, i1 false) // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP10]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 @@ -1492,7 +1708,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.16, ptr [[TMP34]], align 8 +// CHECK1-NEXT: store ptr @.offload_maptypes.20, ptr [[TMP34]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 @@ -1507,16 +1723,16 @@ int bar(int n){ // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP40]], align 4 // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP41]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 // CHECK1-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233(ptr [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], ptr [[VLA]]) #[[ATTR3]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245(ptr [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], ptr [[VLA]]) #[[ATTR3]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233(ptr [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], ptr [[VLA]]) #[[ATTR3]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245(ptr [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], ptr [[VLA]]) #[[ATTR3]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: [[TMP44:%.*]] = mul nsw i64 1, [[TMP2]] @@ -1598,9 +1814,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.17, ptr [[TMP25]], align 8 +// CHECK1-NEXT: store ptr @.offload_sizes.21, ptr [[TMP25]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.18, ptr [[TMP26]], align 8 +// CHECK1-NEXT: store ptr @.offload_maptypes.22, ptr [[TMP26]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 @@ -1615,16 +1831,16 @@ int bar(int n){ // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP33]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 // CHECK1-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215(i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]], ptr [[B]]) #[[ATTR3]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227(i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]], ptr [[B]]) #[[ATTR3]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215(i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]], ptr [[B]]) #[[ATTR3]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227(i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]], ptr [[B]]) #[[ATTR3]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[A]], align 4 @@ -1686,9 +1902,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.19, ptr [[TMP20]], align 8 +// CHECK1-NEXT: store ptr @.offload_sizes.23, ptr [[TMP20]], align 8 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.20, ptr [[TMP21]], align 8 +// CHECK1-NEXT: store ptr @.offload_maptypes.24, ptr [[TMP21]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 @@ -1703,23 +1919,23 @@ int bar(int n){ // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP27]], align 4 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK1-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198(i64 [[TMP1]], i64 [[TMP3]], ptr [[B]]) #[[ATTR3]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210(i64 [[TMP1]], i64 [[TMP3]], ptr [[B]]) #[[ATTR3]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198(i64 [[TMP1]], i64 [[TMP3]], ptr [[B]]) #[[ATTR3]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210(i64 [[TMP1]], i64 [[TMP3]], ptr [[B]]) #[[ATTR3]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[A]], align 4 // CHECK1-NEXT: ret i32 [[TMP31]] // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245 // CHECK1-SAME: (ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 @@ -1740,11 +1956,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233.omp_outlined, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245.omp_outlined, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233.omp_outlined +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245.omp_outlined // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -1782,7 +1998,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227 // CHECK1-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 @@ -1806,11 +2022,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 // CHECK1-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215.omp_outlined, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227.omp_outlined, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215.omp_outlined +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227.omp_outlined // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -1846,7 +2062,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210 // CHECK1-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 @@ -1864,11 +2080,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 // CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198.omp_outlined, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210.omp_outlined, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198.omp_outlined +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210.omp_outlined // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -1937,22 +2153,34 @@ int bar(int n){ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS20:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: [[KERNEL_ARGS21:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 // CHECK3-NEXT: [[A_CASTED24:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [9 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_PTRS28:%.*]] = alloca [9 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [9 x ptr], align 4 +// CHECK3-NEXT: [[AA_CASTED25:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS26:%.*]] = alloca [2 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS27:%.*]] = alloca [2 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS28:%.*]] = alloca [2 x ptr], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS29:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-NEXT: [[A_CASTED32:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_CASTED33:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [2 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS35:%.*]] = alloca [2 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS36:%.*]] = alloca [2 x ptr], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS37:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-NEXT: [[A_CASTED40:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS43:%.*]] = alloca [9 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS44:%.*]] = alloca [9 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS45:%.*]] = alloca [9 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4 -// CHECK3-NEXT: [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-NEXT: [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 // CHECK3-NEXT: [[NN:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_PTRS36:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS37:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: [[KERNEL_ARGS38:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK3-NEXT: [[NN_CASTED41:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS42:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_PTRS43:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS44:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS51:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS52:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS53:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS54:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-NEXT: [[NN_CASTED57:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS58:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS59:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS60:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[A]], align 4 @@ -2190,208 +2418,314 @@ int bar(int n){ // CHECK3-NEXT: [[TMP120:%.*]] = load i32, ptr [[A]], align 4 // CHECK3-NEXT: store i32 [[TMP120]], ptr [[A_CASTED24]], align 4 // CHECK3-NEXT: [[TMP121:%.*]] = load i32, ptr [[A_CASTED24]], align 4 -// CHECK3-NEXT: [[TMP122:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP122]], 20 -// CHECK3-NEXT: br i1 [[CMP25]], label [[OMP_IF_THEN26:%.*]], label [[OMP_IF_ELSE33:%.*]] -// CHECK3: omp_if.then26: -// CHECK3-NEXT: [[TMP123:%.*]] = mul nuw i32 [[TMP1]], 4 -// CHECK3-NEXT: [[TMP124:%.*]] = sext i32 [[TMP123]] to i64 -// CHECK3-NEXT: [[TMP125:%.*]] = mul nuw i32 5, [[TMP3]] -// CHECK3-NEXT: [[TMP126:%.*]] = mul nuw i32 [[TMP125]], 8 -// CHECK3-NEXT: [[TMP127:%.*]] = sext i32 [[TMP126]] to i64 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.7, i32 72, i1 false) -// CHECK3-NEXT: [[TMP128:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP121]], ptr [[TMP128]], align 4 -// CHECK3-NEXT: [[TMP129:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP121]], ptr [[TMP129]], align 4 -// CHECK3-NEXT: [[TMP130:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP130]], align 4 -// CHECK3-NEXT: [[TMP131:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[B]], ptr [[TMP131]], align 4 -// CHECK3-NEXT: [[TMP132:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[B]], ptr [[TMP132]], align 4 -// CHECK3-NEXT: [[TMP133:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP133]], align 4 -// CHECK3-NEXT: [[TMP134:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 2 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP134]], align 4 -// CHECK3-NEXT: [[TMP135:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 2 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP135]], align 4 -// CHECK3-NEXT: [[TMP136:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP136]], align 4 -// CHECK3-NEXT: [[TMP137:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[VLA]], ptr [[TMP137]], align 4 -// CHECK3-NEXT: [[TMP138:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[VLA]], ptr [[TMP138]], align 4 -// CHECK3-NEXT: [[TMP139:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 3 -// CHECK3-NEXT: store i64 [[TMP124]], ptr [[TMP139]], align 4 -// CHECK3-NEXT: [[TMP140:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 3 -// CHECK3-NEXT: store ptr null, ptr [[TMP140]], align 4 -// CHECK3-NEXT: [[TMP141:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[TMP141]], align 4 -// CHECK3-NEXT: [[TMP142:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[TMP142]], align 4 -// CHECK3-NEXT: [[TMP143:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 4 -// CHECK3-NEXT: store ptr null, ptr [[TMP143]], align 4 -// CHECK3-NEXT: [[TMP144:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 5 -// CHECK3-NEXT: store i32 5, ptr [[TMP144]], align 4 -// CHECK3-NEXT: [[TMP145:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 5 -// CHECK3-NEXT: store i32 5, ptr [[TMP145]], align 4 -// CHECK3-NEXT: [[TMP146:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 5 -// CHECK3-NEXT: store ptr null, ptr [[TMP146]], align 4 -// CHECK3-NEXT: [[TMP147:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 6 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP147]], align 4 -// CHECK3-NEXT: [[TMP148:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 6 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP148]], align 4 -// CHECK3-NEXT: [[TMP149:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP149]], align 4 -// CHECK3-NEXT: [[TMP150:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 7 -// CHECK3-NEXT: store ptr [[VLA1]], ptr [[TMP150]], align 4 -// CHECK3-NEXT: [[TMP151:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 7 -// CHECK3-NEXT: store ptr [[VLA1]], ptr [[TMP151]], align 4 -// CHECK3-NEXT: [[TMP152:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 7 -// CHECK3-NEXT: store i64 [[TMP127]], ptr [[TMP152]], align 4 -// CHECK3-NEXT: [[TMP153:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP122:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: store i16 [[TMP122]], ptr [[AA_CASTED25]], align 2 +// CHECK3-NEXT: [[TMP123:%.*]] = load i32, ptr [[AA_CASTED25]], align 4 +// CHECK3-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP121]], ptr [[TMP124]], align 4 +// CHECK3-NEXT: [[TMP125:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP121]], ptr [[TMP125]], align 4 +// CHECK3-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS28]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP126]], align 4 +// CHECK3-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 1 +// CHECK3-NEXT: store i32 [[TMP123]], ptr [[TMP127]], align 4 +// CHECK3-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 1 +// CHECK3-NEXT: store i32 [[TMP123]], ptr [[TMP128]], align 4 +// CHECK3-NEXT: [[TMP129:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS28]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP129]], align 4 +// CHECK3-NEXT: [[TMP130:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP131:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 0 +// CHECK3-NEXT: store i32 3, ptr [[TMP132]], align 4 +// CHECK3-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, ptr [[TMP133]], align 4 +// CHECK3-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP130]], ptr [[TMP134]], align 4 +// CHECK3-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP131]], ptr [[TMP135]], align 4 +// CHECK3-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP136]], align 4 +// CHECK3-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP137]], align 4 +// CHECK3-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP138]], align 4 +// CHECK3-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP139]], align 4 +// CHECK3-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP140]], align 8 +// CHECK3-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP141]], align 8 +// CHECK3-NEXT: [[TMP142:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 1, i32 2, i32 0], ptr [[TMP142]], align 4 +// CHECK3-NEXT: [[TMP143:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP143]], align 4 +// CHECK3-NEXT: [[TMP144:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP144]], align 4 +// CHECK3-NEXT: [[TMP145:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.region_id, ptr [[KERNEL_ARGS29]]) +// CHECK3-NEXT: [[TMP146:%.*]] = icmp ne i32 [[TMP145]], 0 +// CHECK3-NEXT: br i1 [[TMP146]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]] +// CHECK3: omp_offload.failed30: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130(i32 [[TMP121]], i32 [[TMP123]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT31]] +// CHECK3: omp_offload.cont31: +// CHECK3-NEXT: [[TMP147:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP147]], ptr [[A_CASTED32]], align 4 +// CHECK3-NEXT: [[TMP148:%.*]] = load i32, ptr [[A_CASTED32]], align 4 +// CHECK3-NEXT: [[TMP149:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: store i16 [[TMP149]], ptr [[AA_CASTED33]], align 2 +// CHECK3-NEXT: [[TMP150:%.*]] = load i32, ptr [[AA_CASTED33]], align 4 +// CHECK3-NEXT: [[TMP151:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP148]], ptr [[TMP151]], align 4 +// CHECK3-NEXT: [[TMP152:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS35]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP148]], ptr [[TMP152]], align 4 +// CHECK3-NEXT: [[TMP153:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS36]], i32 0, i32 0 // CHECK3-NEXT: store ptr null, ptr [[TMP153]], align 4 -// CHECK3-NEXT: [[TMP154:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 8 -// CHECK3-NEXT: store ptr [[D]], ptr [[TMP154]], align 4 -// CHECK3-NEXT: [[TMP155:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 8 -// CHECK3-NEXT: store ptr [[D]], ptr [[TMP155]], align 4 -// CHECK3-NEXT: [[TMP156:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP154:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 1 +// CHECK3-NEXT: store i32 [[TMP150]], ptr [[TMP154]], align 4 +// CHECK3-NEXT: [[TMP155:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS35]], i32 0, i32 1 +// CHECK3-NEXT: store i32 [[TMP150]], ptr [[TMP155]], align 4 +// CHECK3-NEXT: [[TMP156:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS36]], i32 0, i32 1 // CHECK3-NEXT: store ptr null, ptr [[TMP156]], align 4 -// CHECK3-NEXT: [[TMP157:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP158:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP159:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP160:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK3-NEXT: store i32 3, ptr [[TMP160]], align 4 -// CHECK3-NEXT: [[TMP161:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 -// CHECK3-NEXT: store i32 9, ptr [[TMP161]], align 4 -// CHECK3-NEXT: [[TMP162:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP157]], ptr [[TMP162]], align 4 -// CHECK3-NEXT: [[TMP163:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP158]], ptr [[TMP163]], align 4 -// CHECK3-NEXT: [[TMP164:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 4 -// CHECK3-NEXT: store ptr [[TMP159]], ptr [[TMP164]], align 4 -// CHECK3-NEXT: [[TMP165:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP165]], align 4 -// CHECK3-NEXT: [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP157:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP158:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS35]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP159:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 0 +// CHECK3-NEXT: store i32 3, ptr [[TMP159]], align 4 +// CHECK3-NEXT: [[TMP160:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, ptr [[TMP160]], align 4 +// CHECK3-NEXT: [[TMP161:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP157]], ptr [[TMP161]], align 4 +// CHECK3-NEXT: [[TMP162:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP158]], ptr [[TMP162]], align 4 +// CHECK3-NEXT: [[TMP163:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.9, ptr [[TMP163]], align 4 +// CHECK3-NEXT: [[TMP164:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP164]], align 4 +// CHECK3-NEXT: [[TMP165:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP165]], align 4 +// CHECK3-NEXT: [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 7 // CHECK3-NEXT: store ptr null, ptr [[TMP166]], align 4 -// CHECK3-NEXT: [[TMP167:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP167]], align 4 -// CHECK3-NEXT: [[TMP168:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP167:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP167]], align 8 +// CHECK3-NEXT: [[TMP168:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 9 // CHECK3-NEXT: store i64 0, ptr [[TMP168]], align 8 -// CHECK3-NEXT: [[TMP169:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP169]], align 8 -// CHECK3-NEXT: [[TMP170:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP170]], align 4 -// CHECK3-NEXT: [[TMP171:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP171]], align 4 -// CHECK3-NEXT: [[TMP172:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP172]], align 4 -// CHECK3-NEXT: [[TMP173:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148.region_id, ptr [[KERNEL_ARGS30]]) -// CHECK3-NEXT: [[TMP174:%.*]] = icmp ne i32 [[TMP173]], 0 -// CHECK3-NEXT: br i1 [[TMP174]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]] -// CHECK3: omp_offload.failed31: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148(i32 [[TMP121]], ptr [[B]], i32 [[TMP1]], ptr [[VLA]], ptr [[C]], i32 5, i32 [[TMP3]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT32]] -// CHECK3: omp_offload.cont32: -// CHECK3-NEXT: br label [[OMP_IF_END34:%.*]] -// CHECK3: omp_if.else33: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148(i32 [[TMP121]], ptr [[B]], i32 [[TMP1]], ptr [[VLA]], ptr [[C]], i32 5, i32 [[TMP3]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[OMP_IF_END34]] -// CHECK3: omp_if.end34: -// CHECK3-NEXT: store i32 0, ptr [[NN]], align 4 -// CHECK3-NEXT: [[TMP175:%.*]] = load i32, ptr [[NN]], align 4 -// CHECK3-NEXT: store i32 [[TMP175]], ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP176:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP177:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP176]], ptr [[TMP177]], align 4 -// CHECK3-NEXT: [[TMP178:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP176]], ptr [[TMP178]], align 4 -// CHECK3-NEXT: [[TMP179:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP179]], align 4 -// CHECK3-NEXT: [[TMP180:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP181:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP182:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 -// CHECK3-NEXT: store i32 3, ptr [[TMP182]], align 4 -// CHECK3-NEXT: [[TMP183:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP183]], align 4 -// CHECK3-NEXT: [[TMP184:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP180]], ptr [[TMP184]], align 4 -// CHECK3-NEXT: [[TMP185:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP181]], ptr [[TMP185]], align 4 -// CHECK3-NEXT: [[TMP186:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.9, ptr [[TMP186]], align 4 -// CHECK3-NEXT: [[TMP187:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP187]], align 4 -// CHECK3-NEXT: [[TMP188:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP188]], align 4 -// CHECK3-NEXT: [[TMP189:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP189]], align 4 -// CHECK3-NEXT: [[TMP190:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP190]], align 8 -// CHECK3-NEXT: [[TMP191:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP191]], align 8 -// CHECK3-NEXT: [[TMP192:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP192]], align 4 -// CHECK3-NEXT: [[TMP193:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP193]], align 4 -// CHECK3-NEXT: [[TMP194:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP194]], align 4 -// CHECK3-NEXT: [[TMP195:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.region_id, ptr [[KERNEL_ARGS38]]) -// CHECK3-NEXT: [[TMP196:%.*]] = icmp ne i32 [[TMP195]], 0 -// CHECK3-NEXT: br i1 [[TMP196]], label [[OMP_OFFLOAD_FAILED39:%.*]], label [[OMP_OFFLOAD_CONT40:%.*]] -// CHECK3: omp_offload.failed39: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160(i32 [[TMP176]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT40]] -// CHECK3: omp_offload.cont40: -// CHECK3-NEXT: [[TMP197:%.*]] = load i32, ptr [[NN]], align 4 -// CHECK3-NEXT: store i32 [[TMP197]], ptr [[NN_CASTED41]], align 4 -// CHECK3-NEXT: [[TMP198:%.*]] = load i32, ptr [[NN_CASTED41]], align 4 -// CHECK3-NEXT: [[TMP199:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS42]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP198]], ptr [[TMP199]], align 4 -// CHECK3-NEXT: [[TMP200:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS43]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP198]], ptr [[TMP200]], align 4 -// CHECK3-NEXT: [[TMP201:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS44]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP201]], align 4 -// CHECK3-NEXT: [[TMP202:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS42]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP203:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS43]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP204:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0 -// CHECK3-NEXT: store i32 3, ptr [[TMP204]], align 4 -// CHECK3-NEXT: [[TMP205:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP205]], align 4 -// CHECK3-NEXT: [[TMP206:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP202]], ptr [[TMP206]], align 4 -// CHECK3-NEXT: [[TMP207:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP203]], ptr [[TMP207]], align 4 -// CHECK3-NEXT: [[TMP208:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.11, ptr [[TMP208]], align 4 -// CHECK3-NEXT: [[TMP209:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP209]], align 4 -// CHECK3-NEXT: [[TMP210:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP169:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 1, i32 2, i32 3], ptr [[TMP169]], align 4 +// CHECK3-NEXT: [[TMP170:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP170]], align 4 +// CHECK3-NEXT: [[TMP171:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP171]], align 4 +// CHECK3-NEXT: [[TMP172:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.region_id, ptr [[KERNEL_ARGS37]]) +// CHECK3-NEXT: [[TMP173:%.*]] = icmp ne i32 [[TMP172]], 0 +// CHECK3-NEXT: br i1 [[TMP173]], label [[OMP_OFFLOAD_FAILED38:%.*]], label [[OMP_OFFLOAD_CONT39:%.*]] +// CHECK3: omp_offload.failed38: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136(i32 [[TMP148]], i32 [[TMP150]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT39]] +// CHECK3: omp_offload.cont39: +// CHECK3-NEXT: [[TMP174:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP174]], ptr [[A_CASTED40]], align 4 +// CHECK3-NEXT: [[TMP175:%.*]] = load i32, ptr [[A_CASTED40]], align 4 +// CHECK3-NEXT: [[TMP176:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP176]], 20 +// CHECK3-NEXT: br i1 [[CMP41]], label [[OMP_IF_THEN42:%.*]], label [[OMP_IF_ELSE49:%.*]] +// CHECK3: omp_if.then42: +// CHECK3-NEXT: [[TMP177:%.*]] = mul nuw i32 [[TMP1]], 4 +// CHECK3-NEXT: [[TMP178:%.*]] = sext i32 [[TMP177]] to i64 +// CHECK3-NEXT: [[TMP179:%.*]] = mul nuw i32 5, [[TMP3]] +// CHECK3-NEXT: [[TMP180:%.*]] = mul nuw i32 [[TMP179]], 8 +// CHECK3-NEXT: [[TMP181:%.*]] = sext i32 [[TMP180]] to i64 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.11, i32 72, i1 false) +// CHECK3-NEXT: [[TMP182:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP175]], ptr [[TMP182]], align 4 +// CHECK3-NEXT: [[TMP183:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP175]], ptr [[TMP183]], align 4 +// CHECK3-NEXT: [[TMP184:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP184]], align 4 +// CHECK3-NEXT: [[TMP185:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP185]], align 4 +// CHECK3-NEXT: [[TMP186:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP186]], align 4 +// CHECK3-NEXT: [[TMP187:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP187]], align 4 +// CHECK3-NEXT: [[TMP188:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP188]], align 4 +// CHECK3-NEXT: [[TMP189:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP189]], align 4 +// CHECK3-NEXT: [[TMP190:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP190]], align 4 +// CHECK3-NEXT: [[TMP191:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[VLA]], ptr [[TMP191]], align 4 +// CHECK3-NEXT: [[TMP192:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[VLA]], ptr [[TMP192]], align 4 +// CHECK3-NEXT: [[TMP193:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK3-NEXT: store i64 [[TMP178]], ptr [[TMP193]], align 4 +// CHECK3-NEXT: [[TMP194:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP194]], align 4 +// CHECK3-NEXT: [[TMP195:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[TMP195]], align 4 +// CHECK3-NEXT: [[TMP196:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[TMP196]], align 4 +// CHECK3-NEXT: [[TMP197:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i32 0, i32 4 +// CHECK3-NEXT: store ptr null, ptr [[TMP197]], align 4 +// CHECK3-NEXT: [[TMP198:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 5 +// CHECK3-NEXT: store i32 5, ptr [[TMP198]], align 4 +// CHECK3-NEXT: [[TMP199:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 5 +// CHECK3-NEXT: store i32 5, ptr [[TMP199]], align 4 +// CHECK3-NEXT: [[TMP200:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i32 0, i32 5 +// CHECK3-NEXT: store ptr null, ptr [[TMP200]], align 4 +// CHECK3-NEXT: [[TMP201:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP201]], align 4 +// CHECK3-NEXT: [[TMP202:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP202]], align 4 +// CHECK3-NEXT: [[TMP203:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP203]], align 4 +// CHECK3-NEXT: [[TMP204:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 7 +// CHECK3-NEXT: store ptr [[VLA1]], ptr [[TMP204]], align 4 +// CHECK3-NEXT: [[TMP205:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 7 +// CHECK3-NEXT: store ptr [[VLA1]], ptr [[TMP205]], align 4 +// CHECK3-NEXT: [[TMP206:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK3-NEXT: store i64 [[TMP181]], ptr [[TMP206]], align 4 +// CHECK3-NEXT: [[TMP207:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP207]], align 4 +// CHECK3-NEXT: [[TMP208:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 8 +// CHECK3-NEXT: store ptr [[D]], ptr [[TMP208]], align 4 +// CHECK3-NEXT: [[TMP209:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 8 +// CHECK3-NEXT: store ptr [[D]], ptr [[TMP209]], align 4 +// CHECK3-NEXT: [[TMP210:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS45]], i32 0, i32 8 // CHECK3-NEXT: store ptr null, ptr [[TMP210]], align 4 -// CHECK3-NEXT: [[TMP211:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP211]], align 4 -// CHECK3-NEXT: [[TMP212:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP212]], align 8 -// CHECK3-NEXT: [[TMP213:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP213]], align 8 -// CHECK3-NEXT: [[TMP214:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP214]], align 4 -// CHECK3-NEXT: [[TMP215:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP215]], align 4 -// CHECK3-NEXT: [[TMP216:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP216]], align 4 -// CHECK3-NEXT: [[TMP217:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.region_id, ptr [[KERNEL_ARGS45]]) -// CHECK3-NEXT: [[TMP218:%.*]] = icmp ne i32 [[TMP217]], 0 -// CHECK3-NEXT: br i1 [[TMP218]], label [[OMP_OFFLOAD_FAILED46:%.*]], label [[OMP_OFFLOAD_CONT47:%.*]] -// CHECK3: omp_offload.failed46: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163(i32 [[TMP198]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT47]] -// CHECK3: omp_offload.cont47: -// CHECK3-NEXT: [[TMP219:%.*]] = load i32, ptr [[A]], align 4 -// CHECK3-NEXT: [[TMP220:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK3-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP220]]) -// CHECK3-NEXT: ret i32 [[TMP219]] +// CHECK3-NEXT: [[TMP211:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP212:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP213:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP214:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0 +// CHECK3-NEXT: store i32 3, ptr [[TMP214]], align 4 +// CHECK3-NEXT: [[TMP215:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1 +// CHECK3-NEXT: store i32 9, ptr [[TMP215]], align 4 +// CHECK3-NEXT: [[TMP216:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP211]], ptr [[TMP216]], align 4 +// CHECK3-NEXT: [[TMP217:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP212]], ptr [[TMP217]], align 4 +// CHECK3-NEXT: [[TMP218:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP213]], ptr [[TMP218]], align 4 +// CHECK3-NEXT: [[TMP219:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP219]], align 4 +// CHECK3-NEXT: [[TMP220:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP220]], align 4 +// CHECK3-NEXT: [[TMP221:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP221]], align 4 +// CHECK3-NEXT: [[TMP222:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP222]], align 8 +// CHECK3-NEXT: [[TMP223:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP223]], align 8 +// CHECK3-NEXT: [[TMP224:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP224]], align 4 +// CHECK3-NEXT: [[TMP225:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP225]], align 4 +// CHECK3-NEXT: [[TMP226:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP226]], align 4 +// CHECK3-NEXT: [[TMP227:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.region_id, ptr [[KERNEL_ARGS46]]) +// CHECK3-NEXT: [[TMP228:%.*]] = icmp ne i32 [[TMP227]], 0 +// CHECK3-NEXT: br i1 [[TMP228]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]] +// CHECK3: omp_offload.failed47: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160(i32 [[TMP175]], ptr [[B]], i32 [[TMP1]], ptr [[VLA]], ptr [[C]], i32 5, i32 [[TMP3]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT48]] +// CHECK3: omp_offload.cont48: +// CHECK3-NEXT: br label [[OMP_IF_END50:%.*]] +// CHECK3: omp_if.else49: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160(i32 [[TMP175]], ptr [[B]], i32 [[TMP1]], ptr [[VLA]], ptr [[C]], i32 5, i32 [[TMP3]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_IF_END50]] +// CHECK3: omp_if.end50: +// CHECK3-NEXT: store i32 0, ptr [[NN]], align 4 +// CHECK3-NEXT: [[TMP229:%.*]] = load i32, ptr [[NN]], align 4 +// CHECK3-NEXT: store i32 [[TMP229]], ptr [[NN_CASTED]], align 4 +// CHECK3-NEXT: [[TMP230:%.*]] = load i32, ptr [[NN_CASTED]], align 4 +// CHECK3-NEXT: [[TMP231:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS51]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP230]], ptr [[TMP231]], align 4 +// CHECK3-NEXT: [[TMP232:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS52]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP230]], ptr [[TMP232]], align 4 +// CHECK3-NEXT: [[TMP233:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS53]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP233]], align 4 +// CHECK3-NEXT: [[TMP234:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS51]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP235:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS52]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP236:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 0 +// CHECK3-NEXT: store i32 3, ptr [[TMP236]], align 4 +// CHECK3-NEXT: [[TMP237:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP237]], align 4 +// CHECK3-NEXT: [[TMP238:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP234]], ptr [[TMP238]], align 4 +// CHECK3-NEXT: [[TMP239:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP235]], ptr [[TMP239]], align 4 +// CHECK3-NEXT: [[TMP240:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.13, ptr [[TMP240]], align 4 +// CHECK3-NEXT: [[TMP241:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.14, ptr [[TMP241]], align 4 +// CHECK3-NEXT: [[TMP242:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP242]], align 4 +// CHECK3-NEXT: [[TMP243:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP243]], align 4 +// CHECK3-NEXT: [[TMP244:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP244]], align 8 +// CHECK3-NEXT: [[TMP245:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP245]], align 8 +// CHECK3-NEXT: [[TMP246:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP246]], align 4 +// CHECK3-NEXT: [[TMP247:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP247]], align 4 +// CHECK3-NEXT: [[TMP248:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP248]], align 4 +// CHECK3-NEXT: [[TMP249:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.region_id, ptr [[KERNEL_ARGS54]]) +// CHECK3-NEXT: [[TMP250:%.*]] = icmp ne i32 [[TMP249]], 0 +// CHECK3-NEXT: br i1 [[TMP250]], label [[OMP_OFFLOAD_FAILED55:%.*]], label [[OMP_OFFLOAD_CONT56:%.*]] +// CHECK3: omp_offload.failed55: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172(i32 [[TMP230]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT56]] +// CHECK3: omp_offload.cont56: +// CHECK3-NEXT: [[TMP251:%.*]] = load i32, ptr [[NN]], align 4 +// CHECK3-NEXT: store i32 [[TMP251]], ptr [[NN_CASTED57]], align 4 +// CHECK3-NEXT: [[TMP252:%.*]] = load i32, ptr [[NN_CASTED57]], align 4 +// CHECK3-NEXT: [[TMP253:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS58]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP252]], ptr [[TMP253]], align 4 +// CHECK3-NEXT: [[TMP254:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS59]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP252]], ptr [[TMP254]], align 4 +// CHECK3-NEXT: [[TMP255:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS60]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP255]], align 4 +// CHECK3-NEXT: [[TMP256:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS58]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP257:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS59]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP258:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 +// CHECK3-NEXT: store i32 3, ptr [[TMP258]], align 4 +// CHECK3-NEXT: [[TMP259:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP259]], align 4 +// CHECK3-NEXT: [[TMP260:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP256]], ptr [[TMP260]], align 4 +// CHECK3-NEXT: [[TMP261:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP257]], ptr [[TMP261]], align 4 +// CHECK3-NEXT: [[TMP262:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.15, ptr [[TMP262]], align 4 +// CHECK3-NEXT: [[TMP263:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.16, ptr [[TMP263]], align 4 +// CHECK3-NEXT: [[TMP264:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP264]], align 4 +// CHECK3-NEXT: [[TMP265:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP265]], align 4 +// CHECK3-NEXT: [[TMP266:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP266]], align 8 +// CHECK3-NEXT: [[TMP267:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP267]], align 8 +// CHECK3-NEXT: [[TMP268:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP268]], align 4 +// CHECK3-NEXT: [[TMP269:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP269]], align 4 +// CHECK3-NEXT: [[TMP270:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP270]], align 4 +// CHECK3-NEXT: [[TMP271:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.region_id, ptr [[KERNEL_ARGS61]]) +// CHECK3-NEXT: [[TMP272:%.*]] = icmp ne i32 [[TMP271]], 0 +// CHECK3-NEXT: br i1 [[TMP272]], label [[OMP_OFFLOAD_FAILED62:%.*]], label [[OMP_OFFLOAD_CONT63:%.*]] +// CHECK3: omp_offload.failed62: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175(i32 [[TMP252]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT63]] +// CHECK3: omp_offload.cont63: +// CHECK3-NEXT: [[TMP273:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP274:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK3-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP274]]) +// CHECK3-NEXT: ret i32 [[TMP273]] // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101 @@ -2484,68 +2818,68 @@ int bar(int n){ // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META26:![0-9]+]] -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 4, !noalias [[META26]] -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META26]] -// CHECK3-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META26]] -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 4, !noalias [[META26]] -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META26]] -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META26]] -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META26]] -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META28:![0-9]+]] +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 4, !noalias [[META28]] +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META28]] +// CHECK3-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META28]] +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 4, !noalias [[META28]] +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META28]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META28]] +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META28]] +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META26]] -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META26]] -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META26]] -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META28]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META28]] +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META28]] +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK3-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 -// CHECK3-NEXT: store i32 3, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store i32 3, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 -// CHECK3-NEXT: store i32 3, ptr [[TMP22]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store i32 3, ptr [[TMP22]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP23]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP23]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP24]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP24]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 4 -// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP25]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP25]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP26]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP26]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP27]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store ptr null, ptr [[TMP27]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP28]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store ptr null, ptr [[TMP28]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP29]], align 8, !noalias [[META26]] +// CHECK3-NEXT: store i64 0, ptr [[TMP29]], align 8, !noalias [[META28]] // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 9 -// CHECK3-NEXT: store i64 1, ptr [[TMP30]], align 8, !noalias [[META26]] +// CHECK3-NEXT: store i64 1, ptr [[TMP30]], align 8, !noalias [[META28]] // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] [[TMP20]], ptr [[TMP31]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store [3 x i32] [[TMP20]], ptr [[TMP31]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] [[TMP21]], ptr [[TMP32]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store [3 x i32] [[TMP21]], ptr [[TMP32]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP33]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store i32 0, ptr [[TMP33]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 [[TMP18]], i32 [[TMP19]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.region_id, ptr [[KERNEL_ARGS_I]]) // CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 // CHECK3-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__EXIT:%.*]] // CHECK3: omp_offload.failed.i: // CHECK3-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP12]], align 2 -// CHECK3-NEXT: store i16 [[TMP36]], ptr [[AA_CASTED_I]], align 2, !noalias [[META26]] -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[AA_CASTED_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store i16 [[TMP36]], ptr [[AA_CASTED_I]], align 2, !noalias [[META28]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[AA_CASTED_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: store i32 [[TMP38]], ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias [[META26]] -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store i32 [[TMP38]], ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias [[META28]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK3-NEXT: store i32 [[TMP40]], ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias [[META26]] -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store i32 [[TMP40]], ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias [[META28]] +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101(i32 [[TMP37]], i32 [[TMP39]], i32 [[TMP41]]) #[[ATTR3]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] // CHECK3: .omp_outlined..exit: @@ -2694,7 +3028,93 @@ int bar(int n){ // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148 +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130 +// CHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.omp_outlined, i32 [[TMP2]], i32 [[TMP4]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136 +// CHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined, i32 [[TMP2]], i32 [[TMP4]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160 // CHECK3-SAME: (i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -2727,11 +3147,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148.omp_outlined, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148.omp_outlined +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined // CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -2803,7 +3223,7 @@ int bar(int n){ // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160 +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172 // CHECK3-SAME: (i32 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 @@ -2812,11 +3232,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i32 [[TMP1]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined, i32 [[TMP1]]) // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined // CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -2829,11 +3249,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined.omp_outlined, i32 [[TMP1]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined.omp_outlined, i32 [[TMP1]]) // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined.omp_outlined +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined.omp_outlined // CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -2845,7 +3265,7 @@ int bar(int n){ // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163 +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175 // CHECK3-SAME: (i32 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 @@ -2854,11 +3274,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined, i32 [[TMP1]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined, i32 [[TMP1]]) // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined // CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -2867,11 +3287,11 @@ int bar(int n){ // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined.omp_outlined, ptr [[NN_ADDR]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined.omp_outlined, ptr [[NN_ADDR]]) // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined.omp_outlined +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined.omp_outlined // CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -2913,9 +3333,9 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.13, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store ptr @.offload_sizes.17, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.14, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr @.offload_maptypes.18, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 @@ -2930,27 +3350,27 @@ int bar(int n){ // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 // CHECK3-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188(i32 [[TMP0]]) #[[ATTR3]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200(i32 [[TMP0]]) #[[ATTR3]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188 +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200 // CHECK3-SAME: (i32 noundef [[VLA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188.omp_outlined, i32 [[TMP0]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200.omp_outlined, i32 [[TMP0]]) // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188.omp_outlined +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200.omp_outlined // CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[VLA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -3033,7 +3453,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]] // CHECK3-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2 // CHECK3-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.15, i32 40, i1 false) +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.19, i32 40, i1 false) // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 @@ -3080,7 +3500,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK3-NEXT: store ptr [[TMP28]], ptr [[TMP33]], align 4 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.16, ptr [[TMP34]], align 4 +// CHECK3-NEXT: store ptr @.offload_maptypes.20, ptr [[TMP34]], align 4 // CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK3-NEXT: store ptr null, ptr [[TMP35]], align 4 // CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 @@ -3095,16 +3515,16 @@ int bar(int n){ // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP40]], align 4 // CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP41]], align 4 -// CHECK3-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 // CHECK3-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233(ptr [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245(ptr [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233(ptr [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245(ptr [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: [[TMP44:%.*]] = mul nsw i32 1, [[TMP1]] @@ -3186,9 +3606,9 @@ int bar(int n){ // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.17, ptr [[TMP25]], align 4 +// CHECK3-NEXT: store ptr @.offload_sizes.21, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.18, ptr [[TMP26]], align 4 +// CHECK3-NEXT: store ptr @.offload_maptypes.22, ptr [[TMP26]], align 4 // CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK3-NEXT: store ptr null, ptr [[TMP27]], align 4 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 @@ -3203,16 +3623,16 @@ int bar(int n){ // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 // CHECK3-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215(i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]], ptr [[B]]) #[[ATTR3]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227(i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]], ptr [[B]]) #[[ATTR3]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215(i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]], ptr [[B]]) #[[ATTR3]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227(i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]], ptr [[B]]) #[[ATTR3]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[A]], align 4 @@ -3274,9 +3694,9 @@ int bar(int n){ // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.19, ptr [[TMP20]], align 4 +// CHECK3-NEXT: store ptr @.offload_sizes.23, ptr [[TMP20]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.20, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store ptr @.offload_maptypes.24, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK3-NEXT: store ptr null, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 @@ -3291,23 +3711,23 @@ int bar(int n){ // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP27]], align 4 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK3-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198(i32 [[TMP1]], i32 [[TMP3]], ptr [[B]]) #[[ATTR3]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210(i32 [[TMP1]], i32 [[TMP3]], ptr [[B]]) #[[ATTR3]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198(i32 [[TMP1]], i32 [[TMP3]], ptr [[B]]) #[[ATTR3]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210(i32 [[TMP1]], i32 [[TMP3]], ptr [[B]]) #[[ATTR3]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[A]], align 4 // CHECK3-NEXT: ret i32 [[TMP31]] // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233 +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245 // CHECK3-SAME: (ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 @@ -3328,11 +3748,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233.omp_outlined, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245.omp_outlined, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233.omp_outlined +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245.omp_outlined // CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -3370,7 +3790,7 @@ int bar(int n){ // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215 +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227 // CHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -3394,11 +3814,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 // CHECK3-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215.omp_outlined, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227.omp_outlined, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215.omp_outlined +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227.omp_outlined // CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -3434,7 +3854,7 @@ int bar(int n){ // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198 +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210 // CHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -3452,11 +3872,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 // CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198.omp_outlined, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210.omp_outlined, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198.omp_outlined +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210.omp_outlined // CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -3640,7 +4060,97 @@ int bar(int n){ // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148 +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130 +// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.omp_outlined, i64 [[TMP2]], i64 [[TMP4]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136 +// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined, i64 [[TMP2]], i64 [[TMP4]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160 // CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -3675,11 +4185,11 @@ int bar(int n){ // CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148.omp_outlined, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148.omp_outlined +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined // CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -3751,7 +4261,7 @@ int bar(int n){ // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160 +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172 // CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -3762,11 +4272,11 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i64 [[TMP1]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined, i64 [[TMP1]]) // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined // CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -3779,11 +4289,11 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined.omp_outlined, i64 [[TMP1]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined.omp_outlined, i64 [[TMP1]]) // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined.omp_outlined +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined.omp_outlined // CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -3795,7 +4305,7 @@ int bar(int n){ // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163 +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175 // CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -3806,11 +4316,11 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined, i64 [[TMP1]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined, i64 [[TMP1]]) // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined // CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -3819,11 +4329,11 @@ int bar(int n){ // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined.omp_outlined, ptr [[NN_ADDR]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined.omp_outlined, ptr [[NN_ADDR]]) // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined.omp_outlined +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined.omp_outlined // CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -3836,7 +4346,7 @@ int bar(int n){ // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188 +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200 // CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -3844,11 +4354,11 @@ int bar(int n){ // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188.omp_outlined, i64 [[TMP0]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200.omp_outlined, i64 [[TMP0]]) // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188.omp_outlined +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200.omp_outlined // CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -3862,7 +4372,7 @@ int bar(int n){ // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215 +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227 // CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -3888,11 +4398,11 @@ int bar(int n){ // CHECK9-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 // CHECK9-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215.omp_outlined, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227.omp_outlined, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215.omp_outlined +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227.omp_outlined // CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -3928,7 +4438,7 @@ int bar(int n){ // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233 +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245 // CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -3951,11 +4461,11 @@ int bar(int n){ // CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233.omp_outlined, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245.omp_outlined, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233.omp_outlined +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245.omp_outlined // CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -3993,7 +4503,7 @@ int bar(int n){ // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198 +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210 // CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -4013,11 +4523,11 @@ int bar(int n){ // CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 // CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198.omp_outlined, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210.omp_outlined, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198.omp_outlined +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210.omp_outlined // CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -4201,7 +4711,97 @@ int bar(int n){ // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148 +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130 +// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.omp_outlined, i32 [[TMP2]], i32 [[TMP4]]) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136 +// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined, i32 [[TMP2]], i32 [[TMP4]]) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160 // CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -4236,11 +4836,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148.omp_outlined, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l148.omp_outlined +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined // CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -4312,7 +4912,7 @@ int bar(int n){ // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160 +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172 // CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -4323,11 +4923,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i32 [[TMP1]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined, i32 [[TMP1]]) // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined // CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -4340,11 +4940,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined.omp_outlined, i32 [[TMP1]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined.omp_outlined, i32 [[TMP1]]) // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined.omp_outlined +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined.omp_outlined // CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -4356,7 +4956,7 @@ int bar(int n){ // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163 +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175 // CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -4367,11 +4967,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined, i32 [[TMP1]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined, i32 [[TMP1]]) // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined // CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -4380,11 +4980,11 @@ int bar(int n){ // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK11-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined.omp_outlined, ptr [[NN_ADDR]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined.omp_outlined, ptr [[NN_ADDR]]) // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l163.omp_outlined.omp_outlined +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined.omp_outlined // CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -4397,7 +4997,7 @@ int bar(int n){ // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188 +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200 // CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[VLA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -4405,11 +5005,11 @@ int bar(int n){ // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188.omp_outlined, i32 [[TMP0]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200.omp_outlined, i32 [[TMP0]]) // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l188.omp_outlined +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200.omp_outlined // CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[VLA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -4423,7 +5023,7 @@ int bar(int n){ // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215 +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227 // CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -4449,11 +5049,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 // CHECK11-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215.omp_outlined, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227.omp_outlined, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l215.omp_outlined +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227.omp_outlined // CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -4489,7 +5089,7 @@ int bar(int n){ // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233 +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245 // CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -4512,11 +5112,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233.omp_outlined, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245.omp_outlined, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l233.omp_outlined +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245.omp_outlined // CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -4554,7 +5154,7 @@ int bar(int n){ // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198 +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210 // CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -4574,11 +5174,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 // CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198.omp_outlined, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210.omp_outlined, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) // CHECK11-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l198.omp_outlined +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210.omp_outlined // CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 diff --git a/clang/test/OpenMP/target_teams_distribute_num_teams_messages.cpp b/clang/test/OpenMP/target_teams_distribute_num_teams_messages.cpp index c0a31fa19b282..e8f898f1f25ee 100644 --- a/clang/test/OpenMP/target_teams_distribute_num_teams_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_num_teams_messages.cpp @@ -44,6 +44,9 @@ T tmain(T argc) { #pragma omp target teams distribute num_teams(3.14) // expected-error 2 {{expression must have integral or unscoped enumeration type, not 'double'}} for (int i=0; i<100; i++) foo(); +#pragma omp target teams distribute num_teams(1, 2, 3) // expected-error {{only one expression allowed to 'num_teams' clause}} + for (int i=0; i<100; i++) foo(); + return 0; } @@ -85,5 +88,8 @@ int main(int argc, char **argv) { #pragma omp target teams distribute num_teams (3.14) // expected-error {{expression must have integral or unscoped enumeration type, not 'double'}} for (int i=0; i<100; i++) foo(); +#pragma omp target teams distribute num_teams(1, 2, 3) // expected-error {{only one expression allowed to 'num_teams' clause}} + for (int i=0; i<100; i++) foo(); + return tmain(argc); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_num_teams_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_num_teams_messages.cpp index d80b6ea380b93..2a2f5ae27ac55 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_num_teams_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_num_teams_messages.cpp @@ -43,6 +43,8 @@ T tmain(T argc) { for (int i=0; i<100; i++) foo(); #pragma omp target teams distribute parallel for num_teams(3.14) // expected-error 2 {{expression must have integral or unscoped enumeration type, not 'double'}} for (int i=0; i<100; i++) foo(); +#pragma omp target teams distribute parallel for num_teams(1, 2, 3) // expected-error {{only one expression allowed to 'num_teams' clause}} + for (int i=0; i<100; i++) foo(); return 0; } @@ -85,5 +87,8 @@ int main(int argc, char **argv) { #pragma omp target teams distribute parallel for num_teams (3.14) // expected-error {{expression must have integral or unscoped enumeration type, not 'double'}} for (int i=0; i<100; i++) foo(); +#pragma omp target teams distribute parallel for num_teams(1, 2, 3) // expected-error {{only one expression allowed to 'num_teams' clause}} + for (int i=0; i<100; i++) foo(); + return tmain(argc); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/teams_num_teams_messages.cpp b/clang/test/OpenMP/teams_num_teams_messages.cpp index 40da396b01069..09429167ee39e 100644 --- a/clang/test/OpenMP/teams_num_teams_messages.cpp +++ b/clang/test/OpenMP/teams_num_teams_messages.cpp @@ -57,6 +57,9 @@ T tmain(T argc) { #pragma omp target #pragma omp teams num_teams(3.14) // expected-error 2 {{expression must have integral or unscoped enumeration type, not 'double'}} foo(); +#pragma omp target +#pragma omp teams num_teams (1, 2, 3) // expected-error {{only one expression allowed to 'num_teams' clause}} + foo(); return 0; } @@ -111,5 +114,9 @@ int main(int argc, char **argv) { #pragma omp teams num_teams (3.14) // expected-error {{expression must have integral or unscoped enumeration type, not 'double'}} foo(); +#pragma omp target +#pragma omp teams num_teams (1, 2, 3) // expected-error {{only one expression allowed to 'num_teams' clause}} + foo(); + return tmain(argc); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 937d7ff09e4ee..d34da8b4eb158 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2499,8 +2499,8 @@ void OMPClauseEnqueue::VisitOMPDeviceClause(const OMPDeviceClause *C) { } void OMPClauseEnqueue::VisitOMPNumTeamsClause(const OMPNumTeamsClause *C) { + VisitOMPClauseList(C); VisitOMPClauseWithPreInit(C); - Visitor->AddStmt(C->getNumTeams()); } void OMPClauseEnqueue::VisitOMPThreadLimitClause( diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index b158c08ff83f3..9e4e7ebf2a570 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2193,7 +2193,7 @@ class OpenMPIRBuilder { /// The number of iterations Value *NumIterations = nullptr; /// The number of teams. - Value *NumTeams = nullptr; + ArrayRef NumTeams; /// The number of threads. Value *NumThreads = nullptr; /// The size of the dynamic shared memory. @@ -2204,8 +2204,8 @@ class OpenMPIRBuilder { // Constructors for TargetKernelArgs. TargetKernelArgs() {} TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs, - Value *NumIterations, Value *NumTeams, Value *NumThreads, - Value *DynCGGroupMem, bool HasNoWait) + Value *NumIterations, ArrayRef NumTeams, + Value *NumThreads, Value *DynCGGroupMem, bool HasNoWait) : NumTargetItems(NumTargetItems), RTArgs(RTArgs), NumIterations(NumIterations), NumTeams(NumTeams), NumThreads(NumThreads), DynCGGroupMem(DynCGGroupMem), @@ -2856,8 +2856,8 @@ class OpenMPIRBuilder { bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, - TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, - int32_t NumThreads, + TargetRegionEntryInfo &EntryInfo, + ArrayRef NumTeams, int32_t NumThreads, SmallVectorImpl &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index afbb9f9cc1643..31f54f769d2b3 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -497,11 +497,17 @@ void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs, Value *Version = Builder.getInt32(OMP_KERNEL_ARG_VERSION); Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems); auto Int32Ty = Type::getInt32Ty(Builder.getContext()); - Value *ZeroArray = Constant::getNullValue(ArrayType::get(Int32Ty, 3)); + constexpr const size_t MaxDim = 3; + Value *ZeroArray = Constant::getNullValue(ArrayType::get(Int32Ty, MaxDim)); Value *Flags = Builder.getInt64(KernelArgs.HasNoWait); + assert(!KernelArgs.NumTeams.empty()); + Value *NumTeams3D = - Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams, {0}); + Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams[0], {0}); + for (unsigned I = 1; I < std::min(KernelArgs.NumTeams.size(), MaxDim); ++I) + NumTeams3D = + Builder.CreateInsertValue(NumTeams3D, KernelArgs.NumTeams[I], {I}); Value *NumThreads3D = Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads, {0}); @@ -1109,7 +1115,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitKernelLaunch( // of teams and threads so no additional calls to the runtime are required. // Check the error code and execute the host version if required. Builder.restoreIP(emitTargetKernel(Builder, AllocaIP, Return, RTLoc, DeviceID, - Args.NumTeams, Args.NumThreads, + Args.NumTeams.front(), Args.NumThreads, OutlinedFnID, ArgsVector)); BasicBlock *OffloadFailedBlock = @@ -7069,7 +7075,7 @@ void OpenMPIRBuilder::emitOffloadingArraysAndArgs( static void emitTargetCall( OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn, - Constant *OutlinedFnID, int32_t NumTeams, int32_t NumThreads, + Constant *OutlinedFnID, ArrayRef NumTeams, int32_t NumThreads, SmallVectorImpl &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, SmallVector Dependencies = {}) { @@ -7116,10 +7122,21 @@ static void emitTargetCall( /*IsNonContiguous=*/true, /*ForEndCall=*/false); + // emitKernelLaunch + auto &&EmitTargetCallFallbackCB = + [&](OpenMPIRBuilder::InsertPointTy IP) -> OpenMPIRBuilder::InsertPointTy { + Builder.restoreIP(IP); + Builder.CreateCall(OutlinedFn, Args); + return Builder.saveIP(); + }; + + SmallVector NumTeamsC; + for (auto V : NumTeams) + NumTeamsC.push_back(llvm::ConstantInt::get(Builder.getInt32Ty(), V)); + unsigned NumTargetItems = Info.NumberOfPtrs; // TODO: Use correct device ID Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF); - Value *NumTeamsVal = Builder.getInt32(NumTeams); Value *NumThreadsVal = Builder.getInt32(NumThreads); uint32_t SrcLocStrSize; Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); @@ -7131,7 +7148,7 @@ static void emitTargetCall( Value *DynCGGroupMem = Builder.getInt32(0); OpenMPIRBuilder::TargetKernelArgs KArgs(NumTargetItems, RTArgs, NumIterations, - NumTeamsVal, NumThreadsVal, + NumTeamsC, NumThreadsVal, DynCGGroupMem, HasNoWait); // The presence of certain clauses on the target directive require the @@ -7148,10 +7165,10 @@ static void emitTargetCall( } OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget( - const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocaIP, - InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, - int32_t NumThreads, SmallVectorImpl &Args, - GenMapInfoCallbackTy GenMapInfoCB, + const LocationDescription &Loc, InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, + ArrayRef NumTeams, int32_t NumThreads, + SmallVectorImpl &Args, GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, SmallVector Dependencies) { diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 604683370cd27..a213cf17283b2 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -563,14 +563,14 @@ struct AMDGPUKernelTy : public GenericKernelTy { /// Launch the AMDGPU kernel function. Error launchImpl(GenericDeviceTy &GenericDevice, uint32_t NumThreads, - uint64_t NumBlocks, KernelArgsTy &KernelArgs, + uint32_t NumBlocks[3], KernelArgsTy &KernelArgs, KernelLaunchParamsTy LaunchParams, AsyncInfoWrapperTy &AsyncInfoWrapper) const override; /// Print more elaborate kernel launch info for AMDGPU Error printLaunchInfoDetails(GenericDeviceTy &GenericDevice, KernelArgsTy &KernelArgs, uint32_t NumThreads, - uint64_t NumBlocks) const override; + uint64_t NumBlocks[3]) const override; /// Get group and private segment kernel size. uint32_t getGroupSize() const { return GroupSize; } @@ -722,7 +722,7 @@ struct AMDGPUQueueTy { /// Push a kernel launch to the queue. The kernel launch requires an output /// signal and can define an optional input signal (nullptr if none). Error pushKernelLaunch(const AMDGPUKernelTy &Kernel, void *KernelArgs, - uint32_t NumThreads, uint64_t NumBlocks, + uint32_t NumThreads, uint64_t NumBlocks[3], uint32_t GroupSize, uint64_t StackSize, AMDGPUSignalTy *OutputSignal, AMDGPUSignalTy *InputSignal) { @@ -754,9 +754,9 @@ struct AMDGPUQueueTy { Packet->workgroup_size_y = 1; Packet->workgroup_size_z = 1; Packet->reserved0 = 0; - Packet->grid_size_x = NumBlocks * NumThreads; - Packet->grid_size_y = 1; - Packet->grid_size_z = 1; + Packet->grid_size_x = NumBlocks[0] * NumThreads; + Packet->grid_size_y = NumBlocks[1]; + Packet->grid_size_z = NumBlocks[2]; Packet->private_segment_size = Kernel.usesDynamicStack() ? StackSize : Kernel.getPrivateSize(); Packet->group_segment_size = GroupSize; @@ -1224,7 +1224,7 @@ struct AMDGPUStreamTy { /// the kernel finalizes. Once the kernel is finished, the stream will release /// the kernel args buffer to the specified memory manager. Error pushKernelLaunch(const AMDGPUKernelTy &Kernel, void *KernelArgs, - uint32_t NumThreads, uint64_t NumBlocks, + uint32_t NumThreads, uint32_t NumBlocks[3], uint32_t GroupSize, uint64_t StackSize, AMDGPUMemoryManagerTy &MemoryManager) { if (Queue == nullptr) @@ -2813,7 +2813,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { KernelArgsTy KernelArgs = {}; if (auto Err = AMDGPUKernel.launchImpl(*this, /*NumThread=*/1u, - /*NumBlocks=*/1ul, KernelArgs, + /*NumBlocks=*/{1ul, 1ul, 1ul}, KernelArgs, KernelLaunchParamsTy{}, AsyncInfoWrapper)) return Err; @@ -3295,7 +3295,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy { }; Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice, - uint32_t NumThreads, uint64_t NumBlocks, + uint32_t NumThreads, uint64_t NumBlocks[3], KernelArgsTy &KernelArgs, KernelLaunchParamsTy LaunchParams, AsyncInfoWrapperTy &AsyncInfoWrapper) const { @@ -3352,9 +3352,9 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice, // Only COV5 implicitargs needs to be set. COV4 implicitargs are not used. if (ImplArgs && getImplicitArgsSize() == sizeof(utils::AMDGPUImplicitArgsTy)) { - ImplArgs->BlockCountX = NumBlocks; - ImplArgs->BlockCountY = 1; - ImplArgs->BlockCountZ = 1; + ImplArgs->BlockCountX = NumBlocks[0]; + ImplArgs->BlockCountY = NumBlocks[1]; + ImplArgs->BlockCountZ = NumBlocks[2]; ImplArgs->GroupSizeX = NumThreads; ImplArgs->GroupSizeY = 1; ImplArgs->GroupSizeZ = 1; @@ -3370,7 +3370,7 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice, Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice, KernelArgsTy &KernelArgs, uint32_t NumThreads, - uint64_t NumBlocks) const { + uint64_t NumBlocks[3]) const { // Only do all this when the output is requested if (!(getInfoLevel() & OMP_INFOTYPE_PLUGIN_KERNEL)) return Plugin::success(); diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 81823338fe211..a2df3edafc939 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -317,14 +317,14 @@ struct GenericKernelTy { /// Prints generic kernel launch information. Error printLaunchInfo(GenericDeviceTy &GenericDevice, KernelArgsTy &KernelArgs, uint32_t NumThreads, - uint64_t NumBlocks) const; + uint64_t NumBlocks[3]) const; /// Prints plugin-specific kernel launch information after generic kernel /// launch information virtual Error printLaunchInfoDetails(GenericDeviceTy &GenericDevice, KernelArgsTy &KernelArgs, uint32_t NumThreads, - uint64_t NumBlocks) const; + uint64_t NumBlocks[3]) const; private: /// Prepare the arguments before launching the kernel. diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index c3ecbcc62f71f..7377a08d3d868 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -525,7 +525,7 @@ GenericKernelTy::getKernelLaunchEnvironment( Error GenericKernelTy::printLaunchInfo(GenericDeviceTy &GenericDevice, KernelArgsTy &KernelArgs, uint32_t NumThreads, - uint64_t NumBlocks) const { + uint64_t NumBlocks[3]) const { INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(), "Launching kernel %s with %" PRIu64 " blocks and %d threads in %s mode\n", @@ -537,7 +537,7 @@ Error GenericKernelTy::printLaunchInfo(GenericDeviceTy &GenericDevice, Error GenericKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice, KernelArgsTy &KernelArgs, uint32_t NumThreads, - uint64_t NumBlocks) const { + uint64_t NumBlocks[3]) const { return Plugin::success(); }