From 16db9661eb53d8c50beba211065f0327694c24ba Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Fri, 27 Jun 2025 07:54:07 -0500 Subject: [PATCH 1/6] [OpenMP][clang] 6.0: num_threads strict (part 3: codegen) OpenMP 6.0 12.1.2 specifies the behavior of the strict modifier for the num_threads clause on parallel directives, along with the message and severity clauses. This commit implements necessary codegen changes. --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 61 +- clang/lib/CodeGen/CGOpenMPRuntime.h | 54 +- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 61 +- clang/lib/CodeGen/CGOpenMPRuntimeGPU.h | 26 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 10 +- ...tx_target_parallel_num_threads_codegen.cpp | 738 +++++++- .../OpenMP/parallel_num_threads_codegen.cpp | 33 + .../target_parallel_num_threads_messages.cpp | 75 +- ...et_parallel_num_threads_strict_codegen.cpp | 1642 +++++++++++++++++ .../include/llvm/Frontend/OpenMP/OMPKinds.def | 12 + 10 files changed, 2609 insertions(+), 103 deletions(-) create mode 100644 clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index ce2dd4d76368a..aadae117a5b85 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1845,11 +1845,11 @@ void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, CGF.EmitBlock(ContBlock, /*IsFinished=*/true); } -void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Function *OutlinedFn, - ArrayRef CapturedVars, - const Expr *IfCond, - llvm::Value *NumThreads) { +void CGOpenMPRuntime::emitParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, + ArrayRef CapturedVars, const Expr *IfCond, + llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier, + OpenMPSeverityClauseKind Severity, const Expr *Message) { if (!CGF.HaveInsertPoint()) return; llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); @@ -2699,18 +2699,33 @@ llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, CGF.getContext().BoolTy, Loc); } -void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, - llvm::Value *NumThreads, - SourceLocation Loc) { +void CGOpenMPRuntime::emitNumThreadsClause( + CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, + OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity, + const Expr *Message) { if (!CGF.HaveInsertPoint()) return; + llvm::SmallVector Args( + {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), + CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}); // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) - llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), - CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_push_num_threads), - Args); + // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity, + // messsage) if strict modifier is used. + RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads; + if (Modifier == OMPC_NUMTHREADS_strict) { + FnID = OMPRTL___kmpc_push_num_threads_strict; + // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is + // as if sev-level is fatal." + Args.push_back(llvm::ConstantInt::get( + CGM.Int32Ty, Severity == OMPC_SEVERITY_warning ? 1 : 2)); + if (Message) + Args.push_back(CGF.EmitStringLiteralLValue(cast(Message)) + .getPointer(CGF)); + else + Args.push_back(llvm::ConstantPointerNull::get(CGF.VoidPtrTy)); + } + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args); } void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, @@ -12030,12 +12045,11 @@ llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( llvm_unreachable("Not supported in SIMD-only mode"); } -void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, - SourceLocation Loc, - llvm::Function *OutlinedFn, - ArrayRef CapturedVars, - const Expr *IfCond, - llvm::Value *NumThreads) { +void CGOpenMPSIMDRuntime::emitParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, + ArrayRef CapturedVars, const Expr *IfCond, + llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier, + OpenMPSeverityClauseKind Severity, const Expr *Message) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -12138,9 +12152,10 @@ llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, llvm_unreachable("Not supported in SIMD-only mode"); } -void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, - llvm::Value *NumThreads, - SourceLocation Loc) { +void CGOpenMPSIMDRuntime::emitNumThreadsClause( + CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, + OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity, + const Expr *Message) { llvm_unreachable("Not supported in SIMD-only mode"); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 5be48b439f4fd..8f97dd70116c3 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -777,11 +777,22 @@ class CGOpenMPRuntime { /// specified, nullptr otherwise. /// \param NumThreads The value corresponding to the num_threads clause, if /// any, or nullptr. + /// \param NumThreadsModifier The modifier of the num_threads clause, if + /// any, ignored otherwise. + /// \param Severity The severity corresponding to the num_threads clause, if + /// any, ignored otherwise. + /// \param Message The message string corresponding to the num_threads clause, + /// if any, or nullptr. /// - virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Function *OutlinedFn, - ArrayRef CapturedVars, - const Expr *IfCond, llvm::Value *NumThreads); + virtual void + emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Function *OutlinedFn, + ArrayRef CapturedVars, const Expr *IfCond, + llvm::Value *NumThreads, + OpenMPNumThreadsClauseModifier NumThreadsModifier = + OMPC_NUMTHREADS_unknown, + OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal, + const Expr *Message = nullptr); /// Emits a critical region. /// \param CriticalName Name of the critical region. @@ -1040,10 +1051,16 @@ class CGOpenMPRuntime { /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' /// clause. + /// If the modifier 'strict' is given: + /// Emits call to void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 + /// global_tid, kmp_int32 num_threads, int severity, const char *message) to + /// generate code for 'num_threads' clause with 'strict' modifier. /// \param NumThreads An integer value of threads. - virtual void emitNumThreadsClause(CodeGenFunction &CGF, - llvm::Value *NumThreads, - SourceLocation Loc); + virtual void emitNumThreadsClause( + CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, + OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown, + OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal, + const Expr *Message = nullptr); /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. @@ -1737,11 +1754,21 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime { /// specified, nullptr otherwise. /// \param NumThreads The value corresponding to the num_threads clause, if /// any, or nullptr. + /// \param NumThreadsModifier The modifier of the num_threads clause, if + /// any, ignored otherwise. + /// \param Severity The severity corresponding to the num_threads clause, if + /// any, ignored otherwise. + /// \param Message The message string corresponding to the num_threads clause, + /// if any, or nullptr. /// void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef CapturedVars, - const Expr *IfCond, llvm::Value *NumThreads) override; + const Expr *IfCond, llvm::Value *NumThreads, + OpenMPNumThreadsClauseModifier NumThreadsModifier = + OMPC_NUMTHREADS_unknown, + OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal, + const Expr *Message = nullptr) override; /// Emits a critical region. /// \param CriticalName Name of the critical region. @@ -1911,9 +1938,16 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime { /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' /// clause. + /// If the modifier 'strict' is given: + /// Emits call to void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 + /// global_tid, kmp_int32 num_threads, int severity, const char *message) to + /// generate code for 'num_threads' clause with 'strict' modifier. /// \param NumThreads An integer value of threads. - void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, - SourceLocation Loc) override; + void emitNumThreadsClause( + CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, + OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown, + OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal, + const Expr *Message = nullptr) override; /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index e25b6948d30f8..7e4f47dc05329 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -899,9 +899,10 @@ void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF, // Nothing to do. } -void CGOpenMPRuntimeGPU::emitNumThreadsClause(CodeGenFunction &CGF, - llvm::Value *NumThreads, - SourceLocation Loc) { +void CGOpenMPRuntimeGPU::emitNumThreadsClause( + CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, + OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity, + const Expr *Message) { // Nothing to do. } @@ -1201,18 +1202,17 @@ void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF, emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); } -void CGOpenMPRuntimeGPU::emitParallelCall(CodeGenFunction &CGF, - SourceLocation Loc, - llvm::Function *OutlinedFn, - ArrayRef CapturedVars, - const Expr *IfCond, - llvm::Value *NumThreads) { +void CGOpenMPRuntimeGPU::emitParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, + ArrayRef CapturedVars, const Expr *IfCond, + llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier, + OpenMPSeverityClauseKind Severity, const Expr *Message) { if (!CGF.HaveInsertPoint()) return; - auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond, - NumThreads](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond, NumThreads, + NumThreadsModifier, Severity, Message]( + CodeGenFunction &CGF, PrePostActionTy &Action) { CGBuilderTy &Bld = CGF.Builder; llvm::Value *NumThreadsVal = NumThreads; llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn]; @@ -1260,21 +1260,30 @@ void CGOpenMPRuntimeGPU::emitParallelCall(CodeGenFunction &CGF, NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty); assert(IfCondVal && "Expected a value"); + RuntimeFunction FnID = OMPRTL___kmpc_parallel_51; llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); - llvm::Value *Args[] = { - RTLoc, - getThreadID(CGF, Loc), - IfCondVal, - NumThreadsVal, - llvm::ConstantInt::get(CGF.Int32Ty, -1), - FnPtr, - ID, - Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF), - CGF.VoidPtrPtrTy), - llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_parallel_51), - Args); + llvm::SmallVector Args( + {RTLoc, getThreadID(CGF, Loc), IfCondVal, NumThreadsVal, + llvm::ConstantInt::get(CGF.Int32Ty, -1), FnPtr, ID, + Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF), + CGF.VoidPtrPtrTy), + llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())}); + if (NumThreadsModifier == OMPC_NUMTHREADS_strict) { + FnID = OMPRTL___kmpc_parallel_60; + // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect + // is as if sev-level is fatal." + Args.append( + {llvm::ConstantInt::get(CGM.Int32Ty, true), + llvm::ConstantInt::get(CGM.Int32Ty, + Severity == OMPC_SEVERITY_warning ? 1 : 2)}); + if (Message) + Args.push_back(CGF.EmitStringLiteralLValue(cast(Message)) + .getPointer(CGF)); + else + Args.push_back(llvm::ConstantPointerNull::get(CGF.VoidPtrTy)); + } + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args); }; RegionCodeGenTy RCG(ParallelGen); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h index b59f43a6915dd..3e367088a47f8 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -165,9 +165,16 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime { /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' /// clause. + /// If the modifier 'strict' is given: + /// Emits call to void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 + /// global_tid, kmp_int32 num_threads, int severity, const char *message) to + /// generate code for 'num_threads' clause with 'strict' modifier. /// \param NumThreads An integer value of threads. - void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, - SourceLocation Loc) override; + void emitNumThreadsClause( + CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, + OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown, + OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal, + const Expr *Message = nullptr) override; /// This function ought to emit, in the general case, a call to // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed @@ -229,12 +236,21 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime { /// \param IfCond Condition in the associated 'if' clause, if it was /// specified, nullptr otherwise. /// \param NumThreads The value corresponding to the num_threads clause, if - /// any, - /// or nullptr. + /// any, or nullptr. + /// \param NumThreadsModifier The modifier of the num_threads clause, if + /// any, ignored otherwise. + /// \param Severity The severity corresponding to the num_threads clause, if + /// any, ignored otherwise. + /// \param Message The message string corresponding to the num_threads clause, + /// if any, or nullptr. void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef CapturedVars, - const Expr *IfCond, llvm::Value *NumThreads) override; + const Expr *IfCond, llvm::Value *NumThreads, + OpenMPNumThreadsClauseModifier NumThreadsModifier = + OMPC_NUMTHREADS_unknown, + OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal, + const Expr *Message = nullptr) override; /// Emit an implicit/explicit barrier for OpenMP threads. /// \param Kind Directive for which this implicit barrier call must be diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 5822e0f6db89a..ad7b941f92dd6 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1616,8 +1616,16 @@ static void emitCommonOMPParallelDirective( CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); + OpenMPNumThreadsClauseModifier Modifier = NumThreadsClause->getModifier(); + OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal; + clang::Expr *Message = nullptr; + if (const auto *MessageClause = S.getSingleClause()) + Message = MessageClause->getMessageString(); + if (const auto *SeverityClause = S.getSingleClause()) + Severity = SeverityClause->getSeverityKind(); CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( - CGF, NumThreads, NumThreadsClause->getBeginLoc()); + CGF, NumThreads, NumThreadsClause->getBeginLoc(), Modifier, Severity, + Message); } if (const auto *ProcBindClause = S.getSingleClause()) { CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp index f92ce4e89464b..7de49fed0ca65 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -1,10 +1,16 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_1 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_2 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_2 + +// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_1 +// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_2 +// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_2 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 @@ -26,6 +32,12 @@ tx ftemplate(int n) { { aa += 1; } + #ifdef OMP60 + #pragma omp target parallel map(tofrom: aa) num_threads(strict: 1024) + { + aa += 1; + } + #endif #pragma omp target parallel map(tofrom:a, aa, b) if(target: n>40) num_threads(n) { @@ -33,6 +45,14 @@ tx ftemplate(int n) { aa += 1; b[2] += 1; } + #ifdef OMP60 + #pragma omp target parallel map(tofrom:a, aa, b) if(target: n>40) num_threads(strict: n) + { + a += 1; + aa += 1; + b[2] += 1; + } + #endif return a; } @@ -46,7 +66,655 @@ int bar(int n){ } #endif -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25 +// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// OMP45_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { +// OMP45_1-NEXT: entry: +// OMP45_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// OMP45_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] +// OMP45_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]]) +// OMP45_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// OMP45_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP45_1: user_code.entry: +// OMP45_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// OMP45_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// OMP45_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// OMP45_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// OMP45_1-NEXT: call void @__kmpc_target_deinit() +// OMP45_1-NEXT: ret void +// OMP45_1: worker.exit: +// OMP45_1-NEXT: ret void +// +// +// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined +// OMP45_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// OMP45_1-NEXT: entry: +// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// OMP45_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// OMP45_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] +// OMP45_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 +// OMP45_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// OMP45_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// OMP45_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// OMP45_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// OMP45_1-NEXT: ret void +// +// +// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42 +// OMP45_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { +// OMP45_1-NEXT: entry: +// OMP45_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// OMP45_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// OMP45_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// OMP45_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// OMP45_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// OMP45_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// OMP45_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// OMP45_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META6]], !align [[META8:![0-9]+]] +// OMP45_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] +// OMP45_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META6]], !align [[META8]] +// OMP45_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]]) +// OMP45_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 +// OMP45_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP45_1: user_code.entry: +// OMP45_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// OMP45_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// OMP45_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// OMP45_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 +// OMP45_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// OMP45_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// OMP45_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// OMP45_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// OMP45_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// OMP45_1-NEXT: call void @__kmpc_target_deinit() +// OMP45_1-NEXT: ret void +// OMP45_1: worker.exit: +// OMP45_1-NEXT: ret void +// +// +// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined +// OMP45_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// OMP45_1-NEXT: entry: +// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// OMP45_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// OMP45_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// OMP45_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// OMP45_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// OMP45_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// OMP45_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META6]], !align [[META8]] +// OMP45_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] +// OMP45_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META6]], !align [[META8]] +// OMP45_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP45_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// OMP45_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// OMP45_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 +// OMP45_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// OMP45_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// OMP45_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// OMP45_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 +// OMP45_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 +// OMP45_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// OMP45_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// OMP45_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// OMP45_1-NEXT: ret void +// +// +// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// OMP45_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { +// OMP45_2-NEXT: entry: +// OMP45_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 +// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// OMP45_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 +// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] +// OMP45_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]]) +// OMP45_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// OMP45_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP45_2: user_code.entry: +// OMP45_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// OMP45_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// OMP45_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// OMP45_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// OMP45_2-NEXT: call void @__kmpc_target_deinit() +// OMP45_2-NEXT: ret void +// OMP45_2: worker.exit: +// OMP45_2-NEXT: ret void +// +// +// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined +// OMP45_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// OMP45_2-NEXT: entry: +// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// OMP45_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP45_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]] +// OMP45_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 +// OMP45_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// OMP45_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// OMP45_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// OMP45_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// OMP45_2-NEXT: ret void +// +// +// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42 +// OMP45_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { +// OMP45_2-NEXT: entry: +// OMP45_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 +// OMP45_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// OMP45_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// OMP45_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// OMP45_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 +// OMP45_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// OMP45_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// OMP45_2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META6]], !align [[META8:![0-9]+]] +// OMP45_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]] +// OMP45_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META6]], !align [[META8]] +// OMP45_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]]) +// OMP45_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 +// OMP45_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP45_2: user_code.entry: +// OMP45_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// OMP45_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// OMP45_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// OMP45_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 +// OMP45_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// OMP45_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// OMP45_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// OMP45_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 +// OMP45_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// OMP45_2-NEXT: call void @__kmpc_target_deinit() +// OMP45_2-NEXT: ret void +// OMP45_2: worker.exit: +// OMP45_2-NEXT: ret void +// +// +// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined +// OMP45_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// OMP45_2-NEXT: entry: +// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP45_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// OMP45_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// OMP45_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP45_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP45_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// OMP45_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META6]], !align [[META8]] +// OMP45_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]] +// OMP45_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META6]], !align [[META8]] +// OMP45_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP45_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// OMP45_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// OMP45_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 +// OMP45_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// OMP45_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// OMP45_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// OMP45_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 +// OMP45_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 2 +// OMP45_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// OMP45_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// OMP45_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// OMP45_2-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]] +// OMP60_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]]) +// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_1: user_code.entry: +// OMP60_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// OMP60_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// OMP60_1-NEXT: call void @__kmpc_target_deinit() +// OMP60_1-NEXT: ret void +// OMP60_1: worker.exit: +// OMP60_1-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined +// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] +// OMP60_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 +// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// OMP60_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// OMP60_1-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36 +// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] +// OMP60_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]]) +// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_1: user_code.entry: +// OMP60_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// OMP60_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// OMP60_1-NEXT: call void @__kmpc_target_deinit() +// OMP60_1-NEXT: ret void +// OMP60_1: worker.exit: +// OMP60_1-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined +// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] +// OMP60_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 +// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// OMP60_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// OMP60_1-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42 +// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// OMP60_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META8]], !align [[META10:![0-9]+]] +// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] +// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META8]], !align [[META10]] +// OMP60_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]]) +// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 +// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_1: user_code.entry: +// OMP60_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// OMP60_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 +// OMP60_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// OMP60_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// OMP60_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// OMP60_1-NEXT: call void @__kmpc_target_deinit() +// OMP60_1-NEXT: ret void +// OMP60_1: worker.exit: +// OMP60_1-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined +// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META8]], !align [[META10]] +// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] +// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META8]], !align [[META10]] +// OMP60_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// OMP60_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// OMP60_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 +// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// OMP60_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// OMP60_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 +// OMP60_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 +// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// OMP60_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// OMP60_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// OMP60_1-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 +// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// OMP60_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META8]], !align [[META10]] +// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] +// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META8]], !align [[META10]] +// OMP60_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_kernel_environment, ptr [[DYN_PTR]]) +// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 +// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_1: user_code.entry: +// OMP60_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// OMP60_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 +// OMP60_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// OMP60_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// OMP60_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// OMP60_1-NEXT: call void @__kmpc_target_deinit() +// OMP60_1-NEXT: ret void +// OMP60_1: worker.exit: +// OMP60_1-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined +// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META8]], !align [[META10]] +// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] +// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META8]], !align [[META10]] +// OMP60_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// OMP60_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// OMP60_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 +// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// OMP60_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// OMP60_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 +// OMP60_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 +// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// OMP60_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// OMP60_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// OMP60_1-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]] +// OMP60_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]]) +// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_2: user_code.entry: +// OMP60_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// OMP60_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// OMP60_2-NEXT: call void @__kmpc_target_deinit() +// OMP60_2-NEXT: ret void +// OMP60_2: worker.exit: +// OMP60_2-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined +// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]] +// OMP60_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 +// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// OMP60_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// OMP60_2-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36 +// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]] +// OMP60_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]]) +// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_2: user_code.entry: +// OMP60_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// OMP60_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// OMP60_2-NEXT: call void @__kmpc_target_deinit() +// OMP60_2-NEXT: ret void +// OMP60_2: worker.exit: +// OMP60_2-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined +// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]] +// OMP60_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 +// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// OMP60_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// OMP60_2-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42 +// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// OMP60_2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META8]], !align [[META10:![0-9]+]] +// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]] +// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META8]], !align [[META10]] +// OMP60_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]]) +// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 +// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_2: user_code.entry: +// OMP60_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// OMP60_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 +// OMP60_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// OMP60_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// OMP60_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 +// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// OMP60_2-NEXT: call void @__kmpc_target_deinit() +// OMP60_2-NEXT: ret void +// OMP60_2: worker.exit: +// OMP60_2-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined +// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META8]], !align [[META10]] +// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]] +// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META8]], !align [[META10]] +// OMP60_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// OMP60_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// OMP60_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 +// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// OMP60_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// OMP60_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 +// OMP60_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 2 +// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// OMP60_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// OMP60_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// OMP60_2-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 +// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// OMP60_2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META8]], !align [[META10]] +// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]] +// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META8]], !align [[META10]] +// OMP60_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_kernel_environment, ptr [[DYN_PTR]]) +// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 +// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_2: user_code.entry: +// OMP60_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// OMP60_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 +// OMP60_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// OMP60_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// OMP60_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 +// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// OMP60_2-NEXT: call void @__kmpc_target_deinit() +// OMP60_2-NEXT: ret void +// OMP60_2: worker.exit: +// OMP60_2-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined +// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META8]], !align [[META10]] +// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]] +// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META8]], !align [[META10]] +// OMP60_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// OMP60_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// OMP60_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 +// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// OMP60_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// OMP60_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 +// OMP60_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 2 +// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// OMP60_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// OMP60_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// OMP60_2-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 // CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -54,22 +722,22 @@ int bar(int n){ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment, ptr [[DYN_PTR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) // CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -78,7 +746,7 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] // CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 @@ -87,7 +755,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42 // CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 @@ -101,10 +769,10 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment, ptr [[DYN_PTR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META6]], !align [[META8:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META6]], !align [[META8]] +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -116,14 +784,14 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) // CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -136,9 +804,9 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META6]], !align [[META8]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META6]], !align [[META8]] // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 @@ -154,7 +822,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25 +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 // CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -162,22 +830,22 @@ int bar(int n){ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment, ptr [[DYN_PTR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) // CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -186,7 +854,7 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]] // CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 @@ -195,7 +863,7 @@ int bar(int n){ // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30 +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42 // CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 @@ -209,10 +877,10 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment, ptr [[DYN_PTR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META6]], !align [[META8:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META6]], !align [[META8]] +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -224,14 +892,14 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) // CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 @@ -244,9 +912,9 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META6]], !align [[META8]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META6]], !align [[META8]] // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 diff --git a/clang/test/OpenMP/parallel_num_threads_codegen.cpp b/clang/test/OpenMP/parallel_num_threads_codegen.cpp index de10bead4ff81..2fb9589ab1ab6 100644 --- a/clang/test/OpenMP/parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/parallel_num_threads_codegen.cpp @@ -2,9 +2,18 @@ // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefixes=CHECK,OMP60 %s +// RUN: %clang_cc1 -DOMP60 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -DOMP60 -fopenmp -fopenmp-version=60 -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,OMP60 %s + // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s + +// RUN: %clang_cc1 -DOMP60 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -DOMP60 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -DOMP60 -fopenmp-simd -fopenmp-version=60 -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s + // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics #ifndef HEADER @@ -32,6 +41,12 @@ int tmain() { foo(); #pragma omp parallel num_threads(T(23)) foo(); +#ifdef OMP60 +#pragma omp parallel num_threads(strict: C) + foo(); +#pragma omp parallel num_threads(strict: T(23)) + foo(); +#endif return 0; } @@ -42,6 +57,12 @@ int main() { foo(); #pragma omp parallel num_threads(a) foo(); +#ifdef OMP60 +#pragma omp parallel num_threads(strict: 2) + foo(); +#pragma omp parallel num_threads(strict: a) + foo(); +#endif return a + tmain() + tmain(); } @@ -70,6 +91,10 @@ int main() { // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( // CHECK: call {{.*}}void @__kmpc_push_num_threads(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 23) // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( +// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 5, i32 2, ptr null) +// OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call( +// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 23, i32 2, ptr null) +// OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call( // CHECK: ret [[INT_TY]] 0 // CHECK-NEXT: } @@ -83,6 +108,14 @@ int main() { // CHECK: call {{.*}}void @__kmpc_push_num_threads(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]]) // CHECK: {{(invoke|call)}} {{.*}} [[S_TY_DESTR]](ptr {{[^,]*}} [[S_TEMP]]) // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( +// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 1, i32 2, ptr null) +// OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call( +// OMP60: {{(invoke|call)}} {{.*}} [[S_TY_CONSTR]](ptr {{[^,]*}} [[S_TEMP:%.+]], [[INTPTR_T_TY]] noundef [[INTPTR_T_TY_ATTR]]23) +// OMP60: [[S_CHAR_OP1:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP]](ptr {{[^,]*}} [[S_TEMP]]) +// OMP60: [[RES1:%.+]] = sext {{.*}}i8 [[S_CHAR_OP1]] to i32 +// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES1]], i32 2, ptr null) +// OMP60: {{(invoke|call)}} {{.*}} [[S_TY_DESTR]](ptr {{[^,]*}} [[S_TEMP]]) +// OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call( // CHECK: ret [[INT_TY]] 0 // CHECK: } diff --git a/clang/test/OpenMP/target_parallel_num_threads_messages.cpp b/clang/test/OpenMP/target_parallel_num_threads_messages.cpp index 79f77b7dc9f91..7d487c8a4da33 100644 --- a/clang/test/OpenMP/target_parallel_num_threads_messages.cpp +++ b/clang/test/OpenMP/target_parallel_num_threads_messages.cpp @@ -1,7 +1,9 @@ // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wuninitialized - // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wuninitialized +// RUN: %clang_cc1 -DOMP60 -verify=expected,omp60 -fopenmp -fopenmp-version=60 -ferror-limit 100 %s -Wuninitialized +// RUN: %clang_cc1 -DOMP60 -verify=expected,omp60 -fopenmp-simd -fopenmp-version=60 -ferror-limit 100 %s -Wuninitialized + void foo() { } @@ -9,11 +11,11 @@ bool foobool(int argc) { return argc; } -struct S1; // expected-note {{declared here}} +struct S1; // expected-note {{declared here}} omp60-note {{declared here}} #define redef_num_threads(a, b) num_threads(a) -template // expected-note {{declared here}} +template // expected-note {{declared here}} omp60-note {{declared here}} T tmain(T argc, S **argv) { T z; #pragma omp target parallel num_threads // expected-error {{expected '(' after 'num_threads'}} @@ -41,6 +43,40 @@ T tmain(T argc, S **argv) { #pragma omp target parallel redef_num_threads (argc, argc) foo(); +#ifdef OMP60 + // Valid uses of strict modifier + #pragma omp parallel num_threads(strict: 4) + #pragma omp parallel num_threads(strict: argc+z) + + // Invalid: missing expression after strict: + #pragma omp parallel num_threads(strict: ) // omp60-error {{expected expression}} + #pragma omp parallel num_threads(strict:) // omp60-error {{expected expression}} + #pragma omp parallel num_threads(strict: // omp60-error {{expected expression}} omp60-error {{expected ')'}} omp60-note {{to match this '('}} + + // Invalid: unknown/missing modifier + #pragma omp parallel num_threads(foo: 4) // omp60-error {{expected 'strict' in OpenMP clause 'num_threads'}} + #pragma omp parallel num_threads(: 4) // omp60-error {{expected expression}} omp60-error {{expected ')'}} omp60-note {{to match this '('}} + #pragma omp parallel num_threads(:)// omp60-error {{expected expression}} omp60-error {{expected ')'}} omp60-note {{to match this '('}} + + // Invalid: missing colon after modifier + #pragma omp parallel num_threads(strict 4) // omp60-error {{missing ':' after strict modifier}} + + // Invalid: negative, zero, or non-integral + #pragma omp parallel num_threads(strict: -1) // omp60-error {{argument to 'num_threads' clause must be a strictly positive integer value}} + #pragma omp parallel num_threads(strict: 0) // omp60-error {{argument to 'num_threads' clause must be a strictly positive integer value}} + #pragma omp parallel num_threads(strict: (argc > 0) ? argv[1] : argv[2]) // omp60-error 2 {{expression must have integral or unscoped enumeration type, not 'char *'}} + #pragma omp parallel num_threads(strict: S) // omp60-error {{'S' does not refer to a value}} + #pragma omp parallel num_threads(strict: argv[1]=2) // omp60-error {{expected ')'}} omp60-note {{to match this '('}} omp60-error 2 {{expression must have integral or unscoped enumeration type, not 'char *'}} + #pragma omp parallel num_threads(strict: N) // omp60-error {{argument to 'num_threads' clause must be a strictly positive integer value}} + + // Invalid: multiple strict modifiers or mixed with non-strict + #pragma omp parallel num_threads(strict: 4, strict: 5) // omp60-error {{expected ')'}} expected-note {{to match this '('}} + #pragma omp parallel num_threads(strict: 4), num_threads(5) // omp60-error {{directive '#pragma omp parallel' cannot contain more than one 'num_threads' clause}} + #pragma omp parallel num_threads(4), num_threads(strict: 5) // omp60-error {{directive '#pragma omp parallel' cannot contain more than one 'num_threads' clause}} +#endif // OMP60 + + foo(); + return argc; } @@ -69,5 +105,38 @@ int main(int argc, char **argv) { #pragma omp target parallel redef_num_threads (argc, argc) foo(); +#ifdef OMP60 + // Valid uses of strict modifier + #pragma omp parallel num_threads(strict: 4) + #pragma omp parallel num_threads(strict: argc+z) + + // Invalid: missing expression after strict: + #pragma omp parallel num_threads(strict: ) // omp60-error {{expected expression}} + #pragma omp parallel num_threads(strict:) // omp60-error {{expected expression}} + #pragma omp parallel num_threads(strict: // omp60-error {{expected expression}} omp60-error {{expected ')'}} omp60-note {{to match this '('}} + + // Invalid: unknown/missing modifier + #pragma omp parallel num_threads(foo: 4) // omp60-error {{expected 'strict' in OpenMP clause 'num_threads'}} + #pragma omp parallel num_threads(: 4) // omp60-error {{expected expression}} omp60-error {{expected ')'}} omp60-note {{to match this '('}} + #pragma omp parallel num_threads(:) // omp60-error {{expected expression}} omp60-error {{expected ')'}} omp60-note {{to match this '('}} + + // Invalid: missing colon after modifier + #pragma omp parallel num_threads(strict 4) // omp60-error {{missing ':' after strict modifier}} + + // Invalid: negative, zero, or non-integral + #pragma omp parallel num_threads(strict: -1) // omp60-error {{argument to 'num_threads' clause must be a strictly positive integer value}} + #pragma omp parallel num_threads(strict: 0) // omp60-error {{argument to 'num_threads' clause must be a strictly positive integer value}} + #pragma omp parallel num_threads(strict: (argc > 0) ? argv[1] : argv[2]) // omp60-error {{expression must have integral or unscoped enumeration type, not 'char *'}} + #pragma omp parallel num_threads(strict: S1) // omp60-error {{'S1' does not refer to a value}} + #pragma omp parallel num_threads(strict: argv[1]=2) // omp60-error {{expected ')'}} omp60-note {{to match this '('}} omp60-error {{expression must have integral or unscoped enumeration type, not 'char *'}} + + // Invalid: multiple strict modifiers or mixed with non-strict + #pragma omp parallel num_threads(strict: 4, strict: 5) // omp60-error {{expected ')'}} expected-note {{to match this '('}} + #pragma omp parallel num_threads(strict: 4), num_threads(5) // omp60-error {{directive '#pragma omp parallel' cannot contain more than one 'num_threads' clause}} + #pragma omp parallel num_threads(4), num_threads(strict: 5) // omp60-error {{directive '#pragma omp parallel' cannot contain more than one 'num_threads' clause}} +#endif // OMP60 + + foo(); + return tmain(argc, argv); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp new file mode 100644 index 0000000000000..3220e61c91fdf --- /dev/null +++ b/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp @@ -0,0 +1,1642 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK3 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK3 + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK9 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK9 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK11 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK11 + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + + +// We have 6 target regions + +// Check target registration is registered as a Ctor. + +// Check that the offloading functions are emitted and that the parallel function +// is appropriately guarded. + + +template +tx ftemplate(int n) { + tx a = 0; + + #pragma omp target parallel num_threads(strict: tx(20)) + { + } + + short b = 1; + #pragma omp target parallel num_threads(strict: b) + { + a += b; + } + + return a; +} + +static +int fstatic(int n) { + + #pragma omp target parallel num_threads(strict: n) + { + } + + #pragma omp target parallel num_threads(strict: 32+n) + { + } + + return n+1; +} + +struct S1 { + double a; + + int r1(int n){ + int b = 1; + + #pragma omp target parallel num_threads(strict: n-b) + { + this->a = (double)b + 1.5; + } + + #pragma omp target parallel num_threads(strict: 1024) + { + this->a = 2.5; + } + + return (int)a; + } +}; + +int bar(int n){ + int a = 0; + + S1 S; + a += S.r1(n); + + a += fstatic(n); + + a += ftemplate(n); + + return a; +} + +#endif +// CHECK1-LABEL: define {{[^@]+}}@_Z3bari +// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CALL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP2]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP3]], [[CALL1]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP4]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP5]], [[CALL3]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK1-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i32 1, ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP0]], [[TMP1]] +// CHECK1-NEXT: store i32 [[SUB]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[B_CASTED]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP17]], 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 3, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, ptr [[TMP20]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP29]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] [[TMP18]], ptr [[TMP30]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP31]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 1, i32 [[TMP17]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK1-NEXT: br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86(ptr [[THIS1]], i64 [[TMP3]], i64 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[A2:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A2]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 +// CHECK1-NEXT: store i32 3, ptr [[TMP39]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP40]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP37]], ptr [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP38]], ptr [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP49]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] [i32 1024, i32 0, i32 0], ptr [[TMP50]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP51]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1024, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.region_id, ptr [[KERNEL_ARGS6]]) +// CHECK1-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK1-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] +// CHECK1: omp_offload.failed7: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91(ptr [[THIS1]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]] +// CHECK1: omp_offload.cont8: +// CHECK1-NEXT: [[A9:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP54:%.*]] = load double, ptr [[A9]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[TMP54]] to i32 +// CHECK1-NEXT: ret i32 [[CONV]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP8]], 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 3, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP20]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] [[TMP9]], ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP8]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69(i64 [[TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 32, [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP27]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP27]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP33]], 0 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 +// CHECK1-NEXT: store i32 3, ptr [[TMP35]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP36]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP32]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.5, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP45]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] [[TMP34]], ptr [[TMP46]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP47]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP33]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.region_id, ptr [[KERNEL_ARGS6]]) +// CHECK1-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK1-NEXT: br i1 [[TMP49]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] +// CHECK1: omp_offload.failed7: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73(i64 [[TMP27]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]] +// CHECK1: omp_offload.cont8: +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP50]], 1 +// CHECK1-NEXT: ret i32 [[ADD9]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 3, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] [i32 20, i32 0, i32 0], ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 20, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53() #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: store i16 1, ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP15:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: store i16 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: store i16 [[TMP18]], ptr [[B_CASTED]], align 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK1-NEXT: store i16 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 2 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK1-NEXT: [[TMP34:%.*]] = zext i16 [[TMP33]] to i32 +// CHECK1-NEXT: [[TMP35:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP34]], 0 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 0 +// CHECK1-NEXT: store i32 3, ptr [[TMP36]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, ptr [[TMP37]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP32]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP46]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] [[TMP35]], ptr [[TMP47]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP34]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.region_id, ptr [[KERNEL_ARGS1]]) +// CHECK1-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK1-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]] +// CHECK1: omp_offload.failed2: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58(i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT3]] +// CHECK1: omp_offload.cont3: +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: ret i32 [[TMP51]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86 +// CHECK1-SAME: (ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined, ptr [[TMP1]], i64 [[TMP4]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91 +// CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 2, ptr null) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined, ptr [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store double 2.500000e+00, ptr [[A]], align 8 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69 +// CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73 +// CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53 +// CHECK1-SAME: () #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 2, ptr null) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58 +// CHECK1-SAME: (i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 +// CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z3bari +// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CALL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP2]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP3]], [[CALL1]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: [[CALL3:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP4]]) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP5]], [[CALL3]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: ret i32 [[TMP6]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP0]], [[TMP1]] +// CHECK3-NEXT: store i32 [[SUB]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[B_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP17]], 0 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 3, ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 3, ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP27]], align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP28]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] [[TMP18]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 1, i32 [[TMP17]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86(ptr [[THIS1]], i32 [[TMP3]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[A2:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP34]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A2]], ptr [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP36]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 +// CHECK3-NEXT: store i32 3, ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP37]], ptr [[TMP41]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP38]], ptr [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.1, ptr [[TMP43]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP44]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP46]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP47]], align 8 +// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP48]], align 8 +// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP49]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] [i32 1024, i32 0, i32 0], ptr [[TMP50]], align 4 +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP51]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1024, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.region_id, ptr [[KERNEL_ARGS6]]) +// CHECK3-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK3-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] +// CHECK3: omp_offload.failed7: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91(ptr [[THIS1]]) #[[ATTR2]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT8]] +// CHECK3: omp_offload.cont8: +// CHECK3-NEXT: [[A9:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP54:%.*]] = load double, ptr [[A9]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = fptosi double [[TMP54]] to i32 +// CHECK3-NEXT: ret i32 [[CONV]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP8]], 0 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 3, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP18]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] [[TMP9]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP8]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69(i32 [[TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 32, [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP33]], 0 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 +// CHECK3-NEXT: store i32 3, ptr [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP36]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP31]], ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP32]], ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.5, ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP43]], align 8 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] [[TMP34]], ptr [[TMP46]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP47]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP33]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.region_id, ptr [[KERNEL_ARGS6]]) +// CHECK3-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK3-NEXT: br i1 [[TMP49]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] +// CHECK3: omp_offload.failed7: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73(i32 [[TMP27]]) #[[ATTR2]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT8]] +// CHECK3: omp_offload.cont8: +// CHECK3-NEXT: [[TMP50:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP50]], 1 +// CHECK3-NEXT: ret i32 [[ADD9]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 3, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] [i32 20, i32 0, i32 0], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 20, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53() #[[ATTR2]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: store i16 1, ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP15:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: store i16 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: store i16 [[TMP18]], ptr [[B_CASTED]], align 2 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK3-NEXT: store i16 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 2 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP33:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK3-NEXT: [[TMP34:%.*]] = zext i16 [[TMP33]] to i32 +// CHECK3-NEXT: [[TMP35:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP34]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 0 +// CHECK3-NEXT: store i32 3, ptr [[TMP36]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 1 +// CHECK3-NEXT: store i32 3, ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP31]], ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP32]], ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP41]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP43]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP46]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] [[TMP35]], ptr [[TMP47]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP34]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.region_id, ptr [[KERNEL_ARGS1]]) +// CHECK3-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK3-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]] +// CHECK3: omp_offload.failed2: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58(i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP21]]) #[[ATTR2]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT3]] +// CHECK3: omp_offload.cont3: +// CHECK3-NEXT: [[TMP51:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: ret i32 [[TMP51]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86 +// CHECK3-SAME: (ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined, ptr [[TMP1]], i32 [[TMP4]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91 +// CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 2, ptr null) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined, ptr [[TMP1]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store double 2.500000e+00, ptr [[A]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69 +// CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73 +// CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53 +// CHECK3-SAME: () #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 2, ptr null) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58 +// CHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 +// CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69 +// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73 +// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86 +// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined, ptr [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91 +// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 2, ptr null) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined, ptr [[TMP1]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store double 2.500000e+00, ptr [[A]], align 8 +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53 +// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 2, ptr null) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58 +// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 +// CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69 +// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73 +// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86 +// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined, ptr [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91 +// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 2, ptr null) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined, ptr [[TMP1]]) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: store double 2.500000e+00, ptr [[A]], align 4 +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53 +// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 2, ptr null) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58 +// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 +// CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: ret void +// diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index f974cfc78c8dd..119e2dcbc2b77 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -217,6 +217,8 @@ __OMP_RTL(__kmpc_omp_taskwait, false, Int32, IdentPtr, Int32) __OMP_RTL(__kmpc_omp_taskyield, false, Int32, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32, /* Int */ Int32) +__OMP_RTL(__kmpc_push_num_threads_strict, false, Void, IdentPtr, Int32, + /* Int */ Int32, /* Int */ Int32, /* const char* */ Int8Ptr) __OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32, /* kmp_task_t */ VoidPtr, Int32, @@ -470,6 +472,8 @@ __OMP_RTL(__kmpc_target_deinit, false, Void,) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, VoidPtr, VoidPtr, VoidPtrPtr, SizeTy) +__OMP_RTL(__kmpc_parallel_60, false, Void, IdentPtr, Int32, Int32, Int32, Int32, + VoidPtr, VoidPtr, VoidPtrPtr, SizeTy, Int32, Int32, Int8Ptr) __OMP_RTL(__kmpc_for_static_loop_4, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32) __OMP_RTL(__kmpc_for_static_loop_4u, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32) __OMP_RTL(__kmpc_for_static_loop_8, false, Void, IdentPtr, VoidPtr, VoidPtr, Int64, Int64, Int64) @@ -708,6 +712,10 @@ __OMP_RTL_ATTRS(__kmpc_omp_taskyield, InaccessibleArgOnlyAttrs, SExt, ParamAttrs(ReadOnlyPtrAttrs, SExt, SExt)) __OMP_RTL_ATTRS(__kmpc_push_num_threads, InaccessibleArgOnlyAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs, SExt, SExt)) +__OMP_RTL_ATTRS(__kmpc_push_num_threads_strict, InaccessibleArgOnlyAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, SExt, SExt, SExt, + ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_push_proc_bind, InaccessibleArgOnlyAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs, SExt, SExt)) __OMP_RTL_ATTRS(__kmpc_omp_reg_task_with_affinity, DefaultAttrs, SExt, @@ -1079,6 +1087,10 @@ __OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(), ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt, AttributeSet(), AttributeSet(), AttributeSet(), SizeTyExt)) +__OMP_RTL_ATTRS(__kmpc_parallel_60, AlwaysInlineAttrs, AttributeSet(), + ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt, + AttributeSet(), AttributeSet(), AttributeSet(), + SizeTyExt, SExt, SExt, AttributeSet())) __OMP_RTL_ATTRS(__kmpc_serialized_parallel, InaccessibleArgOnlyAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs, SExt)) __OMP_RTL_ATTRS(__kmpc_end_serialized_parallel, InaccessibleArgOnlyAttrs, From 8296fde47f8e8fccfe8127a6d6b5dff773cbcedd Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Tue, 1 Jul 2025 08:08:26 -0500 Subject: [PATCH 2/6] GPU codegen doesn't use emitNumThreadsClause -> pass information to emitParallelCall as well --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 12 +- ...cn_target_parallel_num_threads_codegen.cpp | 1067 +++++++++++++++++ ...tx_target_parallel_num_threads_codegen.cpp | 8 +- 3 files changed, 1079 insertions(+), 8 deletions(-) create mode 100644 clang/test/OpenMP/amdgcn_target_parallel_num_threads_codegen.cpp diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index ad7b941f92dd6..c0c71cfba0516 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1608,6 +1608,11 @@ static void emitCommonOMPParallelDirective( const CodeGenBoundParametersTy &CodeGenBoundParameters) { const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); llvm::Value *NumThreads = nullptr; + OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown; + // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is as + // if sev-level is fatal." + OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal; + clang::Expr *Message = nullptr; llvm::Function *OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, @@ -1616,9 +1621,7 @@ static void emitCommonOMPParallelDirective( CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); - OpenMPNumThreadsClauseModifier Modifier = NumThreadsClause->getModifier(); - OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal; - clang::Expr *Message = nullptr; + Modifier = NumThreadsClause->getModifier(); if (const auto *MessageClause = S.getSingleClause()) Message = MessageClause->getMessageString(); if (const auto *SeverityClause = S.getSingleClause()) @@ -1650,7 +1653,8 @@ static void emitCommonOMPParallelDirective( CodeGenBoundParameters(CGF, S, CapturedVars); CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, - CapturedVars, IfCond, NumThreads); + CapturedVars, IfCond, NumThreads, + Modifier, Severity, Message); } static bool isAllocatableDecl(const VarDecl *VD) { diff --git a/clang/test/OpenMP/amdgcn_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/amdgcn_target_parallel_num_threads_codegen.cpp new file mode 100644 index 0000000000000..4f79ef2617909 --- /dev/null +++ b/clang/test/OpenMP/amdgcn_target_parallel_num_threads_codegen.cpp @@ -0,0 +1,1067 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_1 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_2 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_2 + +// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_1 +// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_2 +// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fexceptions -fcxx-exceptions -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_2 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template +tx ftemplate(int n) { + tx a = 0; + short aa = 0; + tx b[10]; + + #pragma omp target parallel map(tofrom: aa) num_threads(1024) + { + aa += 1; + } + #ifdef OMP60 + #pragma omp target parallel map(tofrom: aa) num_threads(strict: 1024) + { + aa += 1; + } + #endif + + #pragma omp target parallel map(tofrom:a, aa, b) if(target: n>40) num_threads(n) + { + a += 1; + aa += 1; + b[2] += 1; + } + #ifdef OMP60 + #pragma omp target parallel map(tofrom:a, aa, b) if(target: n>40) num_threads(strict: n) + { + a += 1; + aa += 1; + b[2] += 1; + } + #endif + + return a; +} + +int bar(int n){ + int a = 0; + + a += ftemplate(n); + + return a; +} + +#endif +// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// OMP45_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { +// OMP45_1-NEXT: entry: +// OMP45_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// OMP45_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// OMP45_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// OMP45_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]] +// OMP45_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]]) +// OMP45_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// OMP45_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP45_1: user_code.entry: +// OMP45_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// OMP45_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// OMP45_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// OMP45_1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1) +// OMP45_1-NEXT: call void @__kmpc_target_deinit() +// OMP45_1-NEXT: ret void +// OMP45_1: worker.exit: +// OMP45_1-NEXT: ret void +// +// +// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined +// OMP45_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// OMP45_1-NEXT: entry: +// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// OMP45_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP45_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// OMP45_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] +// OMP45_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 +// OMP45_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// OMP45_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// OMP45_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// OMP45_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// OMP45_1-NEXT: ret void +// +// +// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42 +// OMP45_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { +// OMP45_1-NEXT: entry: +// OMP45_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) +// OMP45_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// OMP45_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OMP45_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP45_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// OMP45_1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr +// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// OMP45_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// OMP45_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP45_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// OMP45_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 +// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9:![0-9]+]] +// OMP45_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] +// OMP45_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] +// OMP45_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment to ptr), ptr [[DYN_PTR]]) +// OMP45_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 +// OMP45_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP45_1: user_code.entry: +// OMP45_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// OMP45_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 +// OMP45_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// OMP45_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 +// OMP45_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// OMP45_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// OMP45_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// OMP45_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// OMP45_1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3) +// OMP45_1-NEXT: call void @__kmpc_target_deinit() +// OMP45_1-NEXT: ret void +// OMP45_1: worker.exit: +// OMP45_1-NEXT: ret void +// +// +// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined +// OMP45_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// OMP45_1-NEXT: entry: +// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// OMP45_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OMP45_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP45_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// OMP45_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// OMP45_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// OMP45_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP45_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] +// OMP45_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] +// OMP45_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] +// OMP45_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP45_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// OMP45_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// OMP45_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 +// OMP45_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// OMP45_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// OMP45_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// OMP45_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 +// OMP45_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 +// OMP45_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// OMP45_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// OMP45_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// OMP45_1-NEXT: ret void +// +// +// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// OMP45_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { +// OMP45_2-NEXT: entry: +// OMP45_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// OMP45_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// OMP45_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// OMP45_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]] +// OMP45_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]]) +// OMP45_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// OMP45_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP45_2: user_code.entry: +// OMP45_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// OMP45_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// OMP45_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// OMP45_2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1) +// OMP45_2-NEXT: call void @__kmpc_target_deinit() +// OMP45_2-NEXT: ret void +// OMP45_2: worker.exit: +// OMP45_2-NEXT: ret void +// +// +// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined +// OMP45_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// OMP45_2-NEXT: entry: +// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// OMP45_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP45_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// OMP45_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] +// OMP45_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 +// OMP45_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// OMP45_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// OMP45_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// OMP45_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// OMP45_2-NEXT: ret void +// +// +// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42 +// OMP45_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { +// OMP45_2-NEXT: entry: +// OMP45_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) +// OMP45_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// OMP45_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OMP45_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP45_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// OMP45_2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr +// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// OMP45_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// OMP45_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP45_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// OMP45_2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 +// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9:![0-9]+]] +// OMP45_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] +// OMP45_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] +// OMP45_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment to ptr), ptr [[DYN_PTR]]) +// OMP45_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 +// OMP45_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP45_2: user_code.entry: +// OMP45_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// OMP45_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 +// OMP45_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// OMP45_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 +// OMP45_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// OMP45_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// OMP45_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// OMP45_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// OMP45_2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3) +// OMP45_2-NEXT: call void @__kmpc_target_deinit() +// OMP45_2-NEXT: ret void +// OMP45_2: worker.exit: +// OMP45_2-NEXT: ret void +// +// +// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined +// OMP45_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// OMP45_2-NEXT: entry: +// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// OMP45_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OMP45_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP45_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// OMP45_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// OMP45_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// OMP45_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP45_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] +// OMP45_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] +// OMP45_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] +// OMP45_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP45_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// OMP45_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// OMP45_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 +// OMP45_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// OMP45_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// OMP45_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// OMP45_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 +// OMP45_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 +// OMP45_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// OMP45_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// OMP45_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// OMP45_2-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// OMP60_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9:![0-9]+]], !align [[META10:![0-9]+]] +// OMP60_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]]) +// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_1: user_code.entry: +// OMP60_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// OMP60_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1) +// OMP60_1-NEXT: call void @__kmpc_target_deinit() +// OMP60_1-NEXT: ret void +// OMP60_1: worker.exit: +// OMP60_1-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined +// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] +// OMP60_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 +// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// OMP60_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// OMP60_1-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36 +// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// OMP60_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] +// OMP60_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment to ptr), ptr [[DYN_PTR]]) +// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_1: user_code.entry: +// OMP60_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// OMP60_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// OMP60_1-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1, i32 1, i32 2, ptr null) +// OMP60_1-NEXT: call void @__kmpc_target_deinit() +// OMP60_1-NEXT: ret void +// OMP60_1: worker.exit: +// OMP60_1-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined +// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] +// OMP60_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 +// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// OMP60_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// OMP60_1-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42 +// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) +// OMP60_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// OMP60_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr +// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11:![0-9]+]] +// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] +// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] +// OMP60_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment to ptr), ptr [[DYN_PTR]]) +// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 +// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_1: user_code.entry: +// OMP60_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 +// OMP60_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 +// OMP60_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// OMP60_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// OMP60_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3) +// OMP60_1-NEXT: call void @__kmpc_target_deinit() +// OMP60_1-NEXT: ret void +// OMP60_1: worker.exit: +// OMP60_1-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined +// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// OMP60_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] +// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] +// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] +// OMP60_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// OMP60_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// OMP60_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 +// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// OMP60_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// OMP60_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 +// OMP60_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 +// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// OMP60_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// OMP60_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// OMP60_1-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 +// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) +// OMP60_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// OMP60_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr +// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] +// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] +// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] +// OMP60_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_kernel_environment to ptr), ptr [[DYN_PTR]]) +// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 +// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_1: user_code.entry: +// OMP60_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 +// OMP60_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 +// OMP60_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// OMP60_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// OMP60_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// OMP60_1-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3, i32 1, i32 2, ptr null) +// OMP60_1-NEXT: call void @__kmpc_target_deinit() +// OMP60_1-NEXT: ret void +// OMP60_1: worker.exit: +// OMP60_1-NEXT: ret void +// +// +// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined +// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// OMP60_1-NEXT: entry: +// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// OMP60_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] +// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] +// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] +// OMP60_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// OMP60_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// OMP60_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 +// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// OMP60_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// OMP60_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 +// OMP60_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 +// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// OMP60_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// OMP60_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// OMP60_1-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// OMP60_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9:![0-9]+]], !align [[META10:![0-9]+]] +// OMP60_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]]) +// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_2: user_code.entry: +// OMP60_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// OMP60_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1) +// OMP60_2-NEXT: call void @__kmpc_target_deinit() +// OMP60_2-NEXT: ret void +// OMP60_2: worker.exit: +// OMP60_2-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined +// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] +// OMP60_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 +// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// OMP60_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// OMP60_2-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36 +// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// OMP60_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] +// OMP60_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment to ptr), ptr [[DYN_PTR]]) +// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_2: user_code.entry: +// OMP60_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// OMP60_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// OMP60_2-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1, i32 1, i32 2, ptr null) +// OMP60_2-NEXT: call void @__kmpc_target_deinit() +// OMP60_2-NEXT: ret void +// OMP60_2: worker.exit: +// OMP60_2-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined +// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] +// OMP60_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 +// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// OMP60_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// OMP60_2-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42 +// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) +// OMP60_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// OMP60_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr +// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11:![0-9]+]] +// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] +// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] +// OMP60_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment to ptr), ptr [[DYN_PTR]]) +// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 +// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_2: user_code.entry: +// OMP60_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 +// OMP60_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 +// OMP60_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// OMP60_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// OMP60_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3) +// OMP60_2-NEXT: call void @__kmpc_target_deinit() +// OMP60_2-NEXT: ret void +// OMP60_2: worker.exit: +// OMP60_2-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined +// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// OMP60_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] +// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] +// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] +// OMP60_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// OMP60_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// OMP60_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 +// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// OMP60_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// OMP60_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 +// OMP60_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 +// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// OMP60_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// OMP60_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// OMP60_2-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 +// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) +// OMP60_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// OMP60_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr +// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] +// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] +// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] +// OMP60_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_kernel_environment to ptr), ptr [[DYN_PTR]]) +// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 +// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// OMP60_2: user_code.entry: +// OMP60_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 +// OMP60_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 +// OMP60_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// OMP60_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// OMP60_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// OMP60_2-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3, i32 1, i32 2, ptr null) +// OMP60_2-NEXT: call void @__kmpc_target_deinit() +// OMP60_2-NEXT: ret void +// OMP60_2: worker.exit: +// OMP60_2-NEXT: ret void +// +// +// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined +// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// OMP60_2-NEXT: entry: +// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// OMP60_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// OMP60_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] +// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]] +// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]] +// OMP60_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// OMP60_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// OMP60_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 +// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// OMP60_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// OMP60_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// OMP60_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 +// OMP60_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 +// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// OMP60_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// OMP60_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// OMP60_2-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]]) +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK1: user_code.entry: +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: ret void +// CHECK1: worker.exit: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42 +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) +// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment to ptr), ptr [[DYN_PTR]]) +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK1: user_code.entry: +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3) +// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: ret void +// CHECK1: worker.exit: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]]) +// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK2: user_code.entry: +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: ret void +// CHECK2: worker.exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42 +// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5) +// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment to ptr), ptr [[DYN_PTR]]) +// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 +// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK2: user_code.entry: +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3) +// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: ret void +// CHECK2: worker.exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr +// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8 +// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]] +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: ret void +// diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp index 7de49fed0ca65..1c96ffdec88e1 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -339,7 +339,7 @@ int bar(int n){ // OMP60_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // OMP60_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// OMP60_1-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1, i32 1, i32 2, ptr null) // OMP60_1-NEXT: call void @__kmpc_target_deinit() // OMP60_1-NEXT: ret void // OMP60_1: worker.exit: @@ -460,7 +460,7 @@ int bar(int n){ // OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 // OMP60_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // OMP60_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 -// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// OMP60_1-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3, i32 1, i32 2, ptr null) // OMP60_1-NEXT: call void @__kmpc_target_deinit() // OMP60_1-NEXT: ret void // OMP60_1: worker.exit: @@ -555,7 +555,7 @@ int bar(int n){ // OMP60_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // OMP60_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 -// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// OMP60_2-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1, i32 1, i32 2, ptr null) // OMP60_2-NEXT: call void @__kmpc_target_deinit() // OMP60_2-NEXT: ret void // OMP60_2: worker.exit: @@ -676,7 +676,7 @@ int bar(int n){ // OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 // OMP60_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // OMP60_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 -// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// OMP60_2-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3, i32 1, i32 2, ptr null) // OMP60_2-NEXT: call void @__kmpc_target_deinit() // OMP60_2-NEXT: ret void // OMP60_2: worker.exit: From c79a1f169f14ec186928516e63b5302e47218ecc Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Wed, 23 Jul 2025 02:41:09 -0500 Subject: [PATCH 3/6] implement feedback --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index aadae117a5b85..d5bd37a91358b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2718,11 +2718,14 @@ void CGOpenMPRuntime::emitNumThreadsClause( // as if sev-level is fatal." Args.push_back(llvm::ConstantInt::get( CGM.Int32Ty, Severity == OMPC_SEVERITY_warning ? 1 : 2)); - if (Message) - Args.push_back(CGF.EmitStringLiteralLValue(cast(Message)) - .getPointer(CGF)); - else + if (Message) { + if (const StringLiteral *Msg = dyn_cast(Message)) + Args.push_back(CGF.EmitStringLiteralLValue(Msg).getPointer(CGF)); + else + Args.push_back(CGF.EmitScalarExpr(Message)); + } else { Args.push_back(llvm::ConstantPointerNull::get(CGF.VoidPtrTy)); + } } CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args); From d039caae9e3a7836c81853e8ed9187e393c6d6f2 Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Thu, 31 Jul 2025 06:34:21 -0500 Subject: [PATCH 4/6] support non-string-literals for the message clause --- clang/include/clang/AST/OpenMPClause.h | 16 +- .../clang/Basic/DiagnosticParseKinds.td | 4 +- clang/lib/AST/OpenMPClause.cpp | 8 +- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 15 +- clang/lib/Sema/SemaOpenMP.cpp | 29 +- clang/lib/Sema/TreeTransform.h | 3 +- clang/test/OpenMP/error_codegen.cpp | 734 +++++++++++++++++- clang/test/OpenMP/error_message.cpp | 6 +- .../test/OpenMP/parallel_message_messages.cpp | 28 +- .../OpenMP/parallel_num_threads_codegen.cpp | 29 +- 10 files changed, 802 insertions(+), 70 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 1118d3e062e68..b5dabf283964e 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1874,6 +1874,10 @@ class OMPMessageClause final : public OMPClause { // Expression of the 'message' clause. Stmt *MessageString = nullptr; + // The message as a StringLiteral in case it is as string literal. This might + // be needed during compile time. + StringLiteral *MessageStringLiteral = nullptr; + /// Set message string of the clause. void setMessageString(Expr *MS) { MessageString = MS; } @@ -1887,10 +1891,14 @@ class OMPMessageClause final : public OMPClause { /// \param StartLoc Starting location of the clause. /// \param LParenLoc Location of '('. /// \param EndLoc Ending location of the clause. + /// \param MessageStringLiteral The message as a StringLiteral in case it is + /// as string literal. This might be needed during compile time. OMPMessageClause(Expr *MS, SourceLocation StartLoc, SourceLocation LParenLoc, - SourceLocation EndLoc) + SourceLocation EndLoc, + StringLiteral *MessageStringLiteral = nullptr) : OMPClause(llvm::omp::OMPC_message, StartLoc, EndLoc), - LParenLoc(LParenLoc), MessageString(MS) {} + LParenLoc(LParenLoc), MessageString(MS), + MessageStringLiteral(MessageStringLiteral) {} /// Build an empty clause. OMPMessageClause() @@ -1903,6 +1911,10 @@ class OMPMessageClause final : public OMPClause { /// Returns message string of the clause. Expr *getMessageString() const { return cast_or_null(MessageString); } + // Returns the source message as a string literal in case it has been a string + // literal. Otherwise, return null. + StringLiteral *getAsStringLiteral() const { return MessageStringLiteral; } + child_range children() { return child_range(&MessageString, &MessageString + 1); } diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 35903af998fbe..f52c6dca35e3b 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1499,8 +1499,10 @@ def err_omp_unexpected_directive : Error< "unexpected OpenMP directive %select{|'#pragma omp %1'}0">; def err_omp_expected_punc : Error< "expected ',' or ')' in '%0' %select{clause|directive}1">; -def warn_clause_expected_string : Warning< +def warn_clause_expected_string_literal : Warning< "expected string literal in 'clause %0' - ignoring">, InGroup; +def warn_clause_expected_string: Warning< + "expected string in 'clause %0' - ignoring">, InGroup; def err_omp_unexpected_clause : Error< "unexpected OpenMP clause '%0' in directive '#pragma omp %1'">; def err_omp_unexpected_clause_extension_only : Error< diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index de8b5996818de..b561156cd2d20 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1963,8 +1963,12 @@ void OMPClausePrinter::VisitOMPSeverityClause(OMPSeverityClause *Node) { } void OMPClausePrinter::VisitOMPMessageClause(OMPMessageClause *Node) { - OS << "message(\"" - << cast(Node->getMessageString())->getString() << "\")"; + OS << "message("; + if (StringLiteral *SL = Node->getAsStringLiteral()) + OS << "\"" << SL->getString() << "\""; + else if (Expr *E = Node->getMessageString()) + E->printPretty(OS, nullptr, Policy); + OS << ")"; } void OMPClausePrinter::VisitOMPScheduleClause(OMPScheduleClause *Node) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index d5bd37a91358b..848a1ea6a4b84 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2372,9 +2372,8 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal) { - llvm::Value *MVL = - ME ? CGF.EmitStringLiteralLValue(cast(ME)).getPointer(CGF) - : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + llvm::Value *MVL = ME ? CGF.EmitScalarExpr(ME) + : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); // Build call void __kmpc_error(ident_t *loc, int severity, const char // *message) llvm::Value *Args[] = { @@ -2718,14 +2717,10 @@ void CGOpenMPRuntime::emitNumThreadsClause( // as if sev-level is fatal." Args.push_back(llvm::ConstantInt::get( CGM.Int32Ty, Severity == OMPC_SEVERITY_warning ? 1 : 2)); - if (Message) { - if (const StringLiteral *Msg = dyn_cast(Message)) - Args.push_back(CGF.EmitStringLiteralLValue(Msg).getPointer(CGF)); - else - Args.push_back(CGF.EmitScalarExpr(Message)); - } else { + if (Message) + Args.push_back(CGF.EmitScalarExpr(Message)); + else Args.push_back(llvm::ConstantPointerNull::get(CGF.VoidPtrTy)); - } } CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 4ecc9b0d4c5c8..70e43379b64d2 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -11082,15 +11082,19 @@ StmtResult SemaOpenMP::ActOnOpenMPErrorDirective(ArrayRef Clauses, OMPExecutableDirective::getSingleClause(Clauses); const OMPMessageClause *MessageC = OMPExecutableDirective::getSingleClause(Clauses); - Expr *ME = MessageC ? MessageC->getMessageString() : nullptr; if (!AtC || AtC->getAtKind() == OMPC_AT_compilation) { + StringLiteral *SL = MessageC ? MessageC->getAsStringLiteral() : nullptr; + if (MessageC && !SL) + Diag(MessageC->getMessageString()->getBeginLoc(), + diag::warn_clause_expected_string_literal) + << getOpenMPClauseNameForDiag(OMPC_message); if (SeverityC && SeverityC->getSeverityKind() == OMPC_SEVERITY_warning) Diag(SeverityC->getSeverityKindKwLoc(), diag::warn_diagnose_if_succeeded) - << (ME ? cast(ME)->getString() : "WARNING"); + << (SL ? SL->getString() : "WARNING"); else Diag(StartLoc, diag::err_diagnose_if_succeeded) - << (ME ? cast(ME)->getString() : "ERROR"); + << (SL ? SL->getString() : "ERROR"); if (!SeverityC || SeverityC->getSeverityKind() != OMPC_SEVERITY_warning) return StmtError(); } @@ -16445,13 +16449,28 @@ OMPClause *SemaOpenMP::ActOnOpenMPMessageClause(Expr *ME, SourceLocation LParenLoc, SourceLocation EndLoc) { assert(ME && "NULL expr in Message clause"); - if (!isa(ME)) { + QualType Type = ME->getType(); + if ((!Type->isPointerType() && !Type->isArrayType()) || + !Type->getPointeeOrArrayElementType()->isAnyCharacterType()) { Diag(ME->getBeginLoc(), diag::warn_clause_expected_string) << getOpenMPClauseNameForDiag(OMPC_message); return nullptr; } + + // If the string is a string literal, save it in case it is later needed + // during compile time. Also consider a const char pointer to a string + // literal. This is necessary for template instantiations where the string + // literal is not passed directly and we only get a const char pointer to it. + StringLiteral *SL = dyn_cast( + isa(ME) ? cast(ME)->getSubExpr() + : ME); + + // Convert array type to pointer type if needed. + if (Type->isArrayType()) + ME = SemaRef.DefaultFunctionArrayConversion(ME).get(); + return new (getASTContext()) - OMPMessageClause(ME, StartLoc, LParenLoc, EndLoc); + OMPMessageClause(ME, StartLoc, LParenLoc, EndLoc, SL); } OMPClause *SemaOpenMP::ActOnOpenMPOrderClause( diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 3e38f8b183dfd..9489357919715 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -10948,8 +10948,7 @@ TreeTransform::TransformOMPMessageClause(OMPMessageClause *C) { if (E.isInvalid()) return nullptr; return getDerived().RebuildOMPMessageClause( - C->getMessageString(), C->getBeginLoc(), C->getLParenLoc(), - C->getEndLoc()); + E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc()); } template diff --git a/clang/test/OpenMP/error_codegen.cpp b/clang/test/OpenMP/error_codegen.cpp index 70d493e01a709..0efb0ab098cf3 100644 --- a/clang/test/OpenMP/error_codegen.cpp +++ b/clang/test/OpenMP/error_codegen.cpp @@ -1,8 +1,9 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 5 // RUN: %clang_cc1 -std=c++11 -fopenmp -fopenmp-version=51 -triple x86_64 \ -// RUN: -emit-llvm -o - %s | FileCheck %s +// RUN: -emit-llvm -o - %s | FileCheck --check-prefix OMP51 %s // RUN: %clang_cc1 -std=c++11 -fopenmp -fopenmp-version=60 -triple x86_64 \ -// RUN: -emit-llvm -o - %s | FileCheck %s +// RUN: -emit-llvm -o - %s | FileCheck --check-prefix OMP60 %s // RUN: %clang_cc1 -std=c++11 -fopenmp-simd -fopenmp-version=51 \ // RUN: -debug-info-kind=limited -triple x86_64 -emit-llvm -o - %s | \ @@ -12,20 +13,6 @@ // RUN: -debug-info-kind=limited -triple x86_64 -emit-llvm -o - %s | \ // RUN: FileCheck --check-prefix SIMD %s -//CHECK: @.str = private unnamed_addr constant [23 x i8] c"GPU compiler required.\00", align 1 -//CHECK: @0 = private unnamed_addr constant {{.*}}error_codegen.cpp;main;59;1;;\00", align 1 -//CHECK: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{.*}}, ptr @0 }, align 8 -//CHECK: @.str.1 = private unnamed_addr constant [27 x i8] c"Note this is functioncall.\00", align 1 -//CHECK: @2 = private unnamed_addr constant {{.*}}error_codegen.cpp;main;61;1;;\00", align 1 -//CHECK: @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{.*}}, ptr @2 }, align 8 -//CHECK: @.str.2 = private unnamed_addr constant [23 x i8] c"GNU compiler required.\00", align 1 -//CHECK: @4 = private unnamed_addr constant {{.*}}error_codegen.cpp;tmain;36;1;;\00", align 1 -//CHECK: @5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{.*}}, ptr @4 }, align 8 -//CHECK: @.str.3 = private unnamed_addr constant [22 x i8] c"Notice: add for loop.\00", align 1 -//CHECK: @6 = private unnamed_addr constant {{.*}}error_codegen.cpp;tmain;39;1;;\00", align 1 -//CHECK: @7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{.*}}, ptr @6 }, align 8 -//CHECK: @8 = private unnamed_addr constant {{.*}}error_codegen.cpp;tmain;45;1;;\00", align 1 -//CHECK: @9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{.*}}, ptr @8 }, align 8 void foo() {} @@ -33,42 +20,723 @@ template int tmain(T argc, char **argv) { T b = argc, c, d, e, f, g; static int a; + const char str1[] = "msg"; + const char *str2 = "msg"; + char str3[] = "msg"; + char *str4 = str3; + char * const str5 = str3; #pragma omp error at(execution) severity(fatal) message("GNU compiler required.") +#pragma omp error at(execution) severity(fatal) message(str1) +#pragma omp error at(execution) severity(fatal) message(str2) +#pragma omp error at(execution) severity(fatal) message(str3) +#pragma omp error at(execution) severity(fatal) message(str4) +#pragma omp error at(execution) severity(fatal) message(str5) a = argv[0][0]; ++a; #pragma omp error at(execution) severity(warning) message("Notice: add for loop.") +#pragma omp error at(execution) severity(warning) message(str1) +#pragma omp error at(execution) severity(warning) message(str2) +#pragma omp error at(execution) severity(warning) message(str3) +#pragma omp error at(execution) severity(warning) message(str4) +#pragma omp error at(execution) severity(warning) message(str5) { int b = 10; T c = 100; a = b + c; } -#pragma omp error at(execution) severity(fatal) message("GPU compiler required.") +#pragma omp error at(execution) severity(fatal) message("GPU compiler required.") +#pragma omp error at(execution) severity(fatal) message(str1) +#pragma omp error at(execution) severity(fatal) message(str2) +#pragma omp error at(execution) severity(fatal) message(str3) +#pragma omp error at(execution) severity(fatal) message(str4) +#pragma omp error at(execution) severity(fatal) message(str5) foo(); return N; } -// CHECK-LABEL: @main( -// SIMD-LABEL: @main( -// CHECK: call void @__kmpc_error(ptr @1, i32 2, ptr @.str) -// SIMD-NOT: call void @__kmpc_error(ptr @1, i32 2, ptr @.str) -// CHECK: call void @__kmpc_error(ptr @3, i32 1, ptr @.str.1) -// SIMD-NOT: call void @__kmpc_error(ptr @3, i32 1, ptr @.str.1) -// + + + int main (int argc, char **argv) { int b = argc, c, d, e, f, g; static int a; + const char str1[] = "msg"; + const char *str2 = "msg"; + char str3[] = "msg"; + char *str4 = str3; + char * const str5 = str3; #pragma omp error at(execution) severity(fatal) message("GPU compiler required.") +#pragma omp error at(execution) severity(fatal) message(str1) +#pragma omp error at(execution) severity(fatal) message(str2) +#pragma omp error at(execution) severity(fatal) message(str3) +#pragma omp error at(execution) severity(fatal) message(str4) +#pragma omp error at(execution) severity(fatal) message(str5) a=2; #pragma omp error at(execution) severity(warning) message("Note this is functioncall.") +#pragma omp error at(execution) severity(warning) message(str1) +#pragma omp error at(execution) severity(warning) message(str2) +#pragma omp error at(execution) severity(warning) message(str3) +#pragma omp error at(execution) severity(warning) message(str4) +#pragma omp error at(execution) severity(warning) message(str5) foo(); tmain(argc, argv); } -//CHECK-LABEL: @_Z5tmainIiLi10EEiT_PPc( -//SIMD-LABEL: @_Z5tmainIiLi10EEiT_PPc( -//CHECK: call void @__kmpc_error(ptr @5, i32 2, ptr @.str.2) -//CHECK: call void @__kmpc_error(ptr @7, i32 1, ptr @.str.3) -//CHECK: call void @__kmpc_error(ptr @9, i32 2, ptr @.str) -//SIMD-NOT: call void @__kmpc_error(ptr @5, i32 2, ptr @.str.2) -//SIMD-NOT: call void @__kmpc_error(ptr @7, i32 1, ptr @.str.3) -//SIMD-NOT: call void @__kmpc_error(ptr @9, i32 2, ptr @.str) -//CHECK: ret i32 10 +// CHECK-LABEL: define dso_local void @_Z3foov( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret void +// CHECK-LABEL: define dso_local noundef i32 @main( +// CHECK-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[D:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[E:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[F:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1 +// CHECK-NEXT: [[STR2:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1 +// CHECK-NEXT: [[STR4:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[STR5:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP0]], ptr [[B]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const.main.str1, i64 4, i1 false) +// CHECK-NEXT: store ptr @.str, ptr [[STR2]], align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const.main.str3, i64 4, i1 false) +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8 +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// CHECK-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.str.1) +// CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB3:[0-9]+]], i32 2, ptr [[ARRAYDECAY2]]) +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[STR2]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB5:[0-9]+]], i32 2, ptr [[TMP1]]) +// CHECK-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB7:[0-9]+]], i32 2, ptr [[ARRAYDECAY3]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[STR4]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB9:[0-9]+]], i32 2, ptr [[TMP2]]) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[STR5]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB11:[0-9]+]], i32 2, ptr [[TMP3]]) +// CHECK-NEXT: store i32 2, ptr @_ZZ4mainE1a, align 4 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB13:[0-9]+]], i32 1, ptr @.str.2) +// CHECK-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB15:[0-9]+]], i32 1, ptr [[ARRAYDECAY4]]) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[STR2]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB17:[0-9]+]], i32 1, ptr [[TMP4]]) +// CHECK-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB19:[0-9]+]], i32 1, ptr [[ARRAYDECAY5]]) +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[STR4]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB21:[0-9]+]], i32 1, ptr [[TMP5]]) +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[STR5]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB23:[0-9]+]], i32 1, ptr [[TMP6]]) +// CHECK-NEXT: call void @_Z3foov() +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_PPc(i32 noundef [[TMP7]], ptr noundef [[TMP8]]) +// CHECK-NEXT: ret i32 0 +// CHECK-LABEL: define linkonce_odr noundef i32 @_Z5tmainIiLi10EEiT_PPc( +// CHECK-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR0]] comdat { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[D:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[E:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[F:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1 +// CHECK-NEXT: [[STR2:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1 +// CHECK-NEXT: [[STR4:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[STR5:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B7:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[C8:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP0]], ptr [[B]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str1, i64 4, i1 false) +// CHECK-NEXT: store ptr @.str, ptr [[STR2]], align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str3, i64 4, i1 false) +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8 +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// CHECK-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB25:[0-9]+]], i32 2, ptr @.str.3) +// CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB27:[0-9]+]], i32 2, ptr [[ARRAYDECAY2]]) +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[STR2]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB29:[0-9]+]], i32 2, ptr [[TMP1]]) +// CHECK-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB31:[0-9]+]], i32 2, ptr [[ARRAYDECAY3]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[STR4]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB33:[0-9]+]], i32 2, ptr [[TMP2]]) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[STR5]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB35:[0-9]+]], i32 2, ptr [[TMP3]]) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +// CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 +// CHECK-NEXT: store i32 [[CONV]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB37:[0-9]+]], i32 1, ptr @.str.4) +// CHECK-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB39:[0-9]+]], i32 1, ptr [[ARRAYDECAY5]]) +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[STR2]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB41:[0-9]+]], i32 1, ptr [[TMP8]]) +// CHECK-NEXT: [[ARRAYDECAY6:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB43:[0-9]+]], i32 1, ptr [[ARRAYDECAY6]]) +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[STR4]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB45:[0-9]+]], i32 1, ptr [[TMP9]]) +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[STR5]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB47:[0-9]+]], i32 1, ptr [[TMP10]]) +// CHECK-NEXT: store i32 10, ptr [[B7]], align 4 +// CHECK-NEXT: store i32 100, ptr [[C8]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[B7]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[C8]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-NEXT: store i32 [[ADD]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB49:[0-9]+]], i32 2, ptr @.str.1) +// CHECK-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB51:[0-9]+]], i32 2, ptr [[ARRAYDECAY9]]) +// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[STR2]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB53:[0-9]+]], i32 2, ptr [[TMP13]]) +// CHECK-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB55:[0-9]+]], i32 2, ptr [[ARRAYDECAY10]]) +// CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[STR4]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB57:[0-9]+]], i32 2, ptr [[TMP14]]) +// CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[STR5]], align 8 +// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB59:[0-9]+]], i32 2, ptr [[TMP15]]) +// CHECK-NEXT: call void @_Z3foov() +// CHECK-NEXT: ret i32 10 +// OMP51-LABEL: define dso_local void @_Z3foov( +// OMP51-SAME: ) #[[ATTR0:[0-9]+]] { +// OMP51-NEXT: [[ENTRY:.*:]] +// OMP51-NEXT: ret void +// +// +// OMP51-LABEL: define dso_local noundef i32 @main( +// OMP51-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// OMP51-NEXT: [[ENTRY:.*:]] +// OMP51-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// OMP51-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// OMP51-NEXT: [[B:%.*]] = alloca i32, align 4 +// OMP51-NEXT: [[C:%.*]] = alloca i32, align 4 +// OMP51-NEXT: [[D:%.*]] = alloca i32, align 4 +// OMP51-NEXT: [[E:%.*]] = alloca i32, align 4 +// OMP51-NEXT: [[F:%.*]] = alloca i32, align 4 +// OMP51-NEXT: [[G:%.*]] = alloca i32, align 4 +// OMP51-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1 +// OMP51-NEXT: [[STR2:%.*]] = alloca ptr, align 8 +// OMP51-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1 +// OMP51-NEXT: [[STR4:%.*]] = alloca ptr, align 8 +// OMP51-NEXT: [[STR5:%.*]] = alloca ptr, align 8 +// OMP51-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// OMP51-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 +// OMP51-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// OMP51-NEXT: store i32 [[TMP0]], ptr [[B]], align 4 +// OMP51-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const.main.str1, i64 4, i1 false) +// OMP51-NEXT: store ptr @.str, ptr [[STR2]], align 8 +// OMP51-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const.main.str3, i64 4, i1 false) +// OMP51-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP51-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8 +// OMP51-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP51-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.str.1) +// OMP51-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB3:[0-9]+]], i32 2, ptr [[ARRAYDECAY2]]) +// OMP51-NEXT: [[TMP1:%.*]] = load ptr, ptr [[STR2]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB5:[0-9]+]], i32 2, ptr [[TMP1]]) +// OMP51-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB7:[0-9]+]], i32 2, ptr [[ARRAYDECAY3]]) +// OMP51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[STR4]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB9:[0-9]+]], i32 2, ptr [[TMP2]]) +// OMP51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[STR5]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB11:[0-9]+]], i32 2, ptr [[TMP3]]) +// OMP51-NEXT: store i32 2, ptr @_ZZ4mainE1a, align 4 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB13:[0-9]+]], i32 1, ptr @.str.2) +// OMP51-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB15:[0-9]+]], i32 1, ptr [[ARRAYDECAY4]]) +// OMP51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[STR2]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB17:[0-9]+]], i32 1, ptr [[TMP4]]) +// OMP51-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB19:[0-9]+]], i32 1, ptr [[ARRAYDECAY5]]) +// OMP51-NEXT: [[TMP5:%.*]] = load ptr, ptr [[STR4]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB21:[0-9]+]], i32 1, ptr [[TMP5]]) +// OMP51-NEXT: [[TMP6:%.*]] = load ptr, ptr [[STR5]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB23:[0-9]+]], i32 1, ptr [[TMP6]]) +// OMP51-NEXT: call void @_Z3foov() +// OMP51-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// OMP51-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// OMP51-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_PPc(i32 noundef [[TMP7]], ptr noundef [[TMP8]]) +// OMP51-NEXT: ret i32 0 +// +// +// OMP51-LABEL: define linkonce_odr noundef i32 @_Z5tmainIiLi10EEiT_PPc( +// OMP51-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR0]] comdat { +// OMP51-NEXT: [[ENTRY:.*:]] +// OMP51-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// OMP51-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// OMP51-NEXT: [[B:%.*]] = alloca i32, align 4 +// OMP51-NEXT: [[C:%.*]] = alloca i32, align 4 +// OMP51-NEXT: [[D:%.*]] = alloca i32, align 4 +// OMP51-NEXT: [[E:%.*]] = alloca i32, align 4 +// OMP51-NEXT: [[F:%.*]] = alloca i32, align 4 +// OMP51-NEXT: [[G:%.*]] = alloca i32, align 4 +// OMP51-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1 +// OMP51-NEXT: [[STR2:%.*]] = alloca ptr, align 8 +// OMP51-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1 +// OMP51-NEXT: [[STR4:%.*]] = alloca ptr, align 8 +// OMP51-NEXT: [[STR5:%.*]] = alloca ptr, align 8 +// OMP51-NEXT: [[B7:%.*]] = alloca i32, align 4 +// OMP51-NEXT: [[C8:%.*]] = alloca i32, align 4 +// OMP51-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// OMP51-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 +// OMP51-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// OMP51-NEXT: store i32 [[TMP0]], ptr [[B]], align 4 +// OMP51-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str1, i64 4, i1 false) +// OMP51-NEXT: store ptr @.str, ptr [[STR2]], align 8 +// OMP51-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str3, i64 4, i1 false) +// OMP51-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP51-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8 +// OMP51-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP51-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB25:[0-9]+]], i32 2, ptr @.str.3) +// OMP51-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB27:[0-9]+]], i32 2, ptr [[ARRAYDECAY2]]) +// OMP51-NEXT: [[TMP1:%.*]] = load ptr, ptr [[STR2]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB29:[0-9]+]], i32 2, ptr [[TMP1]]) +// OMP51-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB31:[0-9]+]], i32 2, ptr [[ARRAYDECAY3]]) +// OMP51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[STR4]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB33:[0-9]+]], i32 2, ptr [[TMP2]]) +// OMP51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[STR5]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB35:[0-9]+]], i32 2, ptr [[TMP3]]) +// OMP51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// OMP51-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// OMP51-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// OMP51-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// OMP51-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +// OMP51-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 +// OMP51-NEXT: store i32 [[CONV]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4 +// OMP51-NEXT: [[TMP7:%.*]] = load i32, ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4 +// OMP51-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// OMP51-NEXT: store i32 [[INC]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB37:[0-9]+]], i32 1, ptr @.str.4) +// OMP51-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB39:[0-9]+]], i32 1, ptr [[ARRAYDECAY5]]) +// OMP51-NEXT: [[TMP8:%.*]] = load ptr, ptr [[STR2]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB41:[0-9]+]], i32 1, ptr [[TMP8]]) +// OMP51-NEXT: [[ARRAYDECAY6:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB43:[0-9]+]], i32 1, ptr [[ARRAYDECAY6]]) +// OMP51-NEXT: [[TMP9:%.*]] = load ptr, ptr [[STR4]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB45:[0-9]+]], i32 1, ptr [[TMP9]]) +// OMP51-NEXT: [[TMP10:%.*]] = load ptr, ptr [[STR5]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB47:[0-9]+]], i32 1, ptr [[TMP10]]) +// OMP51-NEXT: store i32 10, ptr [[B7]], align 4 +// OMP51-NEXT: store i32 100, ptr [[C8]], align 4 +// OMP51-NEXT: [[TMP11:%.*]] = load i32, ptr [[B7]], align 4 +// OMP51-NEXT: [[TMP12:%.*]] = load i32, ptr [[C8]], align 4 +// OMP51-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// OMP51-NEXT: store i32 [[ADD]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB49:[0-9]+]], i32 2, ptr @.str.1) +// OMP51-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB51:[0-9]+]], i32 2, ptr [[ARRAYDECAY9]]) +// OMP51-NEXT: [[TMP13:%.*]] = load ptr, ptr [[STR2]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB53:[0-9]+]], i32 2, ptr [[TMP13]]) +// OMP51-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB55:[0-9]+]], i32 2, ptr [[ARRAYDECAY10]]) +// OMP51-NEXT: [[TMP14:%.*]] = load ptr, ptr [[STR4]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB57:[0-9]+]], i32 2, ptr [[TMP14]]) +// OMP51-NEXT: [[TMP15:%.*]] = load ptr, ptr [[STR5]], align 8 +// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB59:[0-9]+]], i32 2, ptr [[TMP15]]) +// OMP51-NEXT: call void @_Z3foov() +// OMP51-NEXT: ret i32 10 +// +// +// OMP60-LABEL: define dso_local void @_Z3foov( +// OMP60-SAME: ) #[[ATTR0:[0-9]+]] { +// OMP60-NEXT: [[ENTRY:.*:]] +// OMP60-NEXT: ret void +// +// +// OMP60-LABEL: define dso_local noundef i32 @main( +// OMP60-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// OMP60-NEXT: [[ENTRY:.*:]] +// OMP60-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// OMP60-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// OMP60-NEXT: [[B:%.*]] = alloca i32, align 4 +// OMP60-NEXT: [[C:%.*]] = alloca i32, align 4 +// OMP60-NEXT: [[D:%.*]] = alloca i32, align 4 +// OMP60-NEXT: [[E:%.*]] = alloca i32, align 4 +// OMP60-NEXT: [[F:%.*]] = alloca i32, align 4 +// OMP60-NEXT: [[G:%.*]] = alloca i32, align 4 +// OMP60-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1 +// OMP60-NEXT: [[STR2:%.*]] = alloca ptr, align 8 +// OMP60-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1 +// OMP60-NEXT: [[STR4:%.*]] = alloca ptr, align 8 +// OMP60-NEXT: [[STR5:%.*]] = alloca ptr, align 8 +// OMP60-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// OMP60-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 +// OMP60-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// OMP60-NEXT: store i32 [[TMP0]], ptr [[B]], align 4 +// OMP60-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const.main.str1, i64 4, i1 false) +// OMP60-NEXT: store ptr @.str, ptr [[STR2]], align 8 +// OMP60-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const.main.str3, i64 4, i1 false) +// OMP60-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP60-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8 +// OMP60-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP60-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.str.1) +// OMP60-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB3:[0-9]+]], i32 2, ptr [[ARRAYDECAY2]]) +// OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[STR2]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB5:[0-9]+]], i32 2, ptr [[TMP1]]) +// OMP60-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB7:[0-9]+]], i32 2, ptr [[ARRAYDECAY3]]) +// OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[STR4]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB9:[0-9]+]], i32 2, ptr [[TMP2]]) +// OMP60-NEXT: [[TMP3:%.*]] = load ptr, ptr [[STR5]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB11:[0-9]+]], i32 2, ptr [[TMP3]]) +// OMP60-NEXT: store i32 2, ptr @_ZZ4mainE1a, align 4 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB13:[0-9]+]], i32 1, ptr @.str.2) +// OMP60-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB15:[0-9]+]], i32 1, ptr [[ARRAYDECAY4]]) +// OMP60-NEXT: [[TMP4:%.*]] = load ptr, ptr [[STR2]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB17:[0-9]+]], i32 1, ptr [[TMP4]]) +// OMP60-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB19:[0-9]+]], i32 1, ptr [[ARRAYDECAY5]]) +// OMP60-NEXT: [[TMP5:%.*]] = load ptr, ptr [[STR4]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB21:[0-9]+]], i32 1, ptr [[TMP5]]) +// OMP60-NEXT: [[TMP6:%.*]] = load ptr, ptr [[STR5]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB23:[0-9]+]], i32 1, ptr [[TMP6]]) +// OMP60-NEXT: call void @_Z3foov() +// OMP60-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// OMP60-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// OMP60-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_PPc(i32 noundef [[TMP7]], ptr noundef [[TMP8]]) +// OMP60-NEXT: ret i32 0 +// +// +// OMP60-LABEL: define linkonce_odr noundef i32 @_Z5tmainIiLi10EEiT_PPc( +// OMP60-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR0]] comdat { +// OMP60-NEXT: [[ENTRY:.*:]] +// OMP60-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// OMP60-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// OMP60-NEXT: [[B:%.*]] = alloca i32, align 4 +// OMP60-NEXT: [[C:%.*]] = alloca i32, align 4 +// OMP60-NEXT: [[D:%.*]] = alloca i32, align 4 +// OMP60-NEXT: [[E:%.*]] = alloca i32, align 4 +// OMP60-NEXT: [[F:%.*]] = alloca i32, align 4 +// OMP60-NEXT: [[G:%.*]] = alloca i32, align 4 +// OMP60-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1 +// OMP60-NEXT: [[STR2:%.*]] = alloca ptr, align 8 +// OMP60-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1 +// OMP60-NEXT: [[STR4:%.*]] = alloca ptr, align 8 +// OMP60-NEXT: [[STR5:%.*]] = alloca ptr, align 8 +// OMP60-NEXT: [[B7:%.*]] = alloca i32, align 4 +// OMP60-NEXT: [[C8:%.*]] = alloca i32, align 4 +// OMP60-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// OMP60-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 +// OMP60-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// OMP60-NEXT: store i32 [[TMP0]], ptr [[B]], align 4 +// OMP60-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str1, i64 4, i1 false) +// OMP60-NEXT: store ptr @.str, ptr [[STR2]], align 8 +// OMP60-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str3, i64 4, i1 false) +// OMP60-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP60-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8 +// OMP60-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP60-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB25:[0-9]+]], i32 2, ptr @.str.3) +// OMP60-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB27:[0-9]+]], i32 2, ptr [[ARRAYDECAY2]]) +// OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[STR2]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB29:[0-9]+]], i32 2, ptr [[TMP1]]) +// OMP60-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB31:[0-9]+]], i32 2, ptr [[ARRAYDECAY3]]) +// OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[STR4]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB33:[0-9]+]], i32 2, ptr [[TMP2]]) +// OMP60-NEXT: [[TMP3:%.*]] = load ptr, ptr [[STR5]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB35:[0-9]+]], i32 2, ptr [[TMP3]]) +// OMP60-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// OMP60-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// OMP60-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// OMP60-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// OMP60-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +// OMP60-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 +// OMP60-NEXT: store i32 [[CONV]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4 +// OMP60-NEXT: [[TMP7:%.*]] = load i32, ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4 +// OMP60-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// OMP60-NEXT: store i32 [[INC]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB37:[0-9]+]], i32 1, ptr @.str.4) +// OMP60-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB39:[0-9]+]], i32 1, ptr [[ARRAYDECAY5]]) +// OMP60-NEXT: [[TMP8:%.*]] = load ptr, ptr [[STR2]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB41:[0-9]+]], i32 1, ptr [[TMP8]]) +// OMP60-NEXT: [[ARRAYDECAY6:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB43:[0-9]+]], i32 1, ptr [[ARRAYDECAY6]]) +// OMP60-NEXT: [[TMP9:%.*]] = load ptr, ptr [[STR4]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB45:[0-9]+]], i32 1, ptr [[TMP9]]) +// OMP60-NEXT: [[TMP10:%.*]] = load ptr, ptr [[STR5]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB47:[0-9]+]], i32 1, ptr [[TMP10]]) +// OMP60-NEXT: store i32 10, ptr [[B7]], align 4 +// OMP60-NEXT: store i32 100, ptr [[C8]], align 4 +// OMP60-NEXT: [[TMP11:%.*]] = load i32, ptr [[B7]], align 4 +// OMP60-NEXT: [[TMP12:%.*]] = load i32, ptr [[C8]], align 4 +// OMP60-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// OMP60-NEXT: store i32 [[ADD]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB49:[0-9]+]], i32 2, ptr @.str.1) +// OMP60-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB51:[0-9]+]], i32 2, ptr [[ARRAYDECAY9]]) +// OMP60-NEXT: [[TMP13:%.*]] = load ptr, ptr [[STR2]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB53:[0-9]+]], i32 2, ptr [[TMP13]]) +// OMP60-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB55:[0-9]+]], i32 2, ptr [[ARRAYDECAY10]]) +// OMP60-NEXT: [[TMP14:%.*]] = load ptr, ptr [[STR4]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB57:[0-9]+]], i32 2, ptr [[TMP14]]) +// OMP60-NEXT: [[TMP15:%.*]] = load ptr, ptr [[STR5]], align 8 +// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB59:[0-9]+]], i32 2, ptr [[TMP15]]) +// OMP60-NEXT: call void @_Z3foov() +// OMP60-NEXT: ret i32 10 +// +// +// SIMD-LABEL: define dso_local void @_Z3foov( +// SIMD-SAME: ) #[[ATTR0:[0-9]+]] !dbg [[DBG29:![0-9]+]] { +// SIMD-NEXT: [[ENTRY:.*:]] +// SIMD-NEXT: ret void, !dbg [[DBG32:![0-9]+]] +// +// +// SIMD-LABEL: define dso_local noundef i32 @main( +// SIMD-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG2:![0-9]+]] { +// SIMD-NEXT: [[ENTRY:.*:]] +// SIMD-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// SIMD-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-NEXT: [[C:%.*]] = alloca i32, align 4 +// SIMD-NEXT: [[D:%.*]] = alloca i32, align 4 +// SIMD-NEXT: [[E:%.*]] = alloca i32, align 4 +// SIMD-NEXT: [[F:%.*]] = alloca i32, align 4 +// SIMD-NEXT: [[G:%.*]] = alloca i32, align 4 +// SIMD-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1 +// SIMD-NEXT: [[STR2:%.*]] = alloca ptr, align 8 +// SIMD-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1 +// SIMD-NEXT: [[STR4:%.*]] = alloca ptr, align 8 +// SIMD-NEXT: [[STR5:%.*]] = alloca ptr, align 8 +// SIMD-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// SIMD-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META33:![0-9]+]], !DIExpression(), [[META34:![0-9]+]]) +// SIMD-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 +// SIMD-NEXT: #dbg_declare(ptr [[ARGV_ADDR]], [[META35:![0-9]+]], !DIExpression(), [[META36:![0-9]+]]) +// SIMD-NEXT: #dbg_declare(ptr [[B]], [[META37:![0-9]+]], !DIExpression(), [[META38:![0-9]+]]) +// SIMD-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !dbg [[DBG39:![0-9]+]] +// SIMD-NEXT: store i32 [[TMP0]], ptr [[B]], align 4, !dbg [[META38]] +// SIMD-NEXT: #dbg_declare(ptr [[C]], [[META40:![0-9]+]], !DIExpression(), [[META41:![0-9]+]]) +// SIMD-NEXT: #dbg_declare(ptr [[D]], [[META42:![0-9]+]], !DIExpression(), [[META43:![0-9]+]]) +// SIMD-NEXT: #dbg_declare(ptr [[E]], [[META44:![0-9]+]], !DIExpression(), [[META45:![0-9]+]]) +// SIMD-NEXT: #dbg_declare(ptr [[F]], [[META46:![0-9]+]], !DIExpression(), [[META47:![0-9]+]]) +// SIMD-NEXT: #dbg_declare(ptr [[G]], [[META48:![0-9]+]], !DIExpression(), [[META49:![0-9]+]]) +// SIMD-NEXT: #dbg_declare(ptr [[STR1]], [[META50:![0-9]+]], !DIExpression(), [[META51:![0-9]+]]) +// SIMD-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const.main.str1, i64 4, i1 false), !dbg [[META51]] +// SIMD-NEXT: #dbg_declare(ptr [[STR2]], [[META52:![0-9]+]], !DIExpression(), [[META54:![0-9]+]]) +// SIMD-NEXT: store ptr @.str, ptr [[STR2]], align 8, !dbg [[META54]] +// SIMD-NEXT: #dbg_declare(ptr [[STR3]], [[META55:![0-9]+]], !DIExpression(), [[META57:![0-9]+]]) +// SIMD-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const.main.str3, i64 4, i1 false), !dbg [[META57]] +// SIMD-NEXT: #dbg_declare(ptr [[STR4]], [[META58:![0-9]+]], !DIExpression(), [[META59:![0-9]+]]) +// SIMD-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0, !dbg [[DBG60:![0-9]+]] +// SIMD-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8, !dbg [[META59]] +// SIMD-NEXT: #dbg_declare(ptr [[STR5]], [[META61:![0-9]+]], !DIExpression(), [[META63:![0-9]+]]) +// SIMD-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0, !dbg [[DBG64:![0-9]+]] +// SIMD-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8, !dbg [[META63]] +// SIMD-NEXT: store i32 2, ptr @_ZZ4mainE1a, align 4, !dbg [[DBG65:![0-9]+]] +// SIMD-NEXT: call void @_Z3foov(), !dbg [[DBG66:![0-9]+]] +// SIMD-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !dbg [[DBG67:![0-9]+]] +// SIMD-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !dbg [[DBG68:![0-9]+]] +// SIMD-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_PPc(i32 noundef [[TMP1]], ptr noundef [[TMP2]]), !dbg [[DBG69:![0-9]+]] +// SIMD-NEXT: ret i32 0, !dbg [[DBG70:![0-9]+]] +// +// +// SIMD-LABEL: define linkonce_odr noundef i32 @_Z5tmainIiLi10EEiT_PPc( +// SIMD-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR0]] comdat !dbg [[DBG21:![0-9]+]] { +// SIMD-NEXT: [[ENTRY:.*:]] +// SIMD-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// SIMD-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-NEXT: [[C:%.*]] = alloca i32, align 4 +// SIMD-NEXT: [[D:%.*]] = alloca i32, align 4 +// SIMD-NEXT: [[E:%.*]] = alloca i32, align 4 +// SIMD-NEXT: [[F:%.*]] = alloca i32, align 4 +// SIMD-NEXT: [[G:%.*]] = alloca i32, align 4 +// SIMD-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1 +// SIMD-NEXT: [[STR2:%.*]] = alloca ptr, align 8 +// SIMD-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1 +// SIMD-NEXT: [[STR4:%.*]] = alloca ptr, align 8 +// SIMD-NEXT: [[STR5:%.*]] = alloca ptr, align 8 +// SIMD-NEXT: [[B3:%.*]] = alloca i32, align 4 +// SIMD-NEXT: [[C4:%.*]] = alloca i32, align 4 +// SIMD-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// SIMD-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META71:![0-9]+]], !DIExpression(), [[META72:![0-9]+]]) +// SIMD-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 +// SIMD-NEXT: #dbg_declare(ptr [[ARGV_ADDR]], [[META73:![0-9]+]], !DIExpression(), [[META74:![0-9]+]]) +// SIMD-NEXT: #dbg_declare(ptr [[B]], [[META75:![0-9]+]], !DIExpression(), [[META76:![0-9]+]]) +// SIMD-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !dbg [[DBG77:![0-9]+]] +// SIMD-NEXT: store i32 [[TMP0]], ptr [[B]], align 4, !dbg [[META76]] +// SIMD-NEXT: #dbg_declare(ptr [[C]], [[META78:![0-9]+]], !DIExpression(), [[META79:![0-9]+]]) +// SIMD-NEXT: #dbg_declare(ptr [[D]], [[META80:![0-9]+]], !DIExpression(), [[META81:![0-9]+]]) +// SIMD-NEXT: #dbg_declare(ptr [[E]], [[META82:![0-9]+]], !DIExpression(), [[META83:![0-9]+]]) +// SIMD-NEXT: #dbg_declare(ptr [[F]], [[META84:![0-9]+]], !DIExpression(), [[META85:![0-9]+]]) +// SIMD-NEXT: #dbg_declare(ptr [[G]], [[META86:![0-9]+]], !DIExpression(), [[META87:![0-9]+]]) +// SIMD-NEXT: #dbg_declare(ptr [[STR1]], [[META88:![0-9]+]], !DIExpression(), [[META89:![0-9]+]]) +// SIMD-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str1, i64 4, i1 false), !dbg [[META89]] +// SIMD-NEXT: #dbg_declare(ptr [[STR2]], [[META90:![0-9]+]], !DIExpression(), [[META91:![0-9]+]]) +// SIMD-NEXT: store ptr @.str, ptr [[STR2]], align 8, !dbg [[META91]] +// SIMD-NEXT: #dbg_declare(ptr [[STR3]], [[META92:![0-9]+]], !DIExpression(), [[META93:![0-9]+]]) +// SIMD-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str3, i64 4, i1 false), !dbg [[META93]] +// SIMD-NEXT: #dbg_declare(ptr [[STR4]], [[META94:![0-9]+]], !DIExpression(), [[META95:![0-9]+]]) +// SIMD-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0, !dbg [[DBG96:![0-9]+]] +// SIMD-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8, !dbg [[META95]] +// SIMD-NEXT: #dbg_declare(ptr [[STR5]], [[META97:![0-9]+]], !DIExpression(), [[META98:![0-9]+]]) +// SIMD-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0, !dbg [[DBG99:![0-9]+]] +// SIMD-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8, !dbg [[META98]] +// SIMD-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !dbg [[DBG100:![0-9]+]] +// SIMD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0, !dbg [[DBG100]] +// SIMD-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !dbg [[DBG100]] +// SIMD-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0, !dbg [[DBG100]] +// SIMD-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1, !dbg [[DBG100]] +// SIMD-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32, !dbg [[DBG100]] +// SIMD-NEXT: store i32 [[CONV]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4, !dbg [[DBG101:![0-9]+]] +// SIMD-NEXT: [[TMP4:%.*]] = load i32, ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4, !dbg [[DBG102:![0-9]+]] +// SIMD-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1, !dbg [[DBG102]] +// SIMD-NEXT: store i32 [[INC]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4, !dbg [[DBG102]] +// SIMD-NEXT: #dbg_declare(ptr [[B3]], [[META103:![0-9]+]], !DIExpression(), [[META105:![0-9]+]]) +// SIMD-NEXT: store i32 10, ptr [[B3]], align 4, !dbg [[META105]] +// SIMD-NEXT: #dbg_declare(ptr [[C4]], [[META106:![0-9]+]], !DIExpression(), [[META107:![0-9]+]]) +// SIMD-NEXT: store i32 100, ptr [[C4]], align 4, !dbg [[META107]] +// SIMD-NEXT: [[TMP5:%.*]] = load i32, ptr [[B3]], align 4, !dbg [[DBG108:![0-9]+]] +// SIMD-NEXT: [[TMP6:%.*]] = load i32, ptr [[C4]], align 4, !dbg [[DBG109:![0-9]+]] +// SIMD-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]], !dbg [[DBG110:![0-9]+]] +// SIMD-NEXT: store i32 [[ADD]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4, !dbg [[DBG111:![0-9]+]] +// SIMD-NEXT: call void @_Z3foov(), !dbg [[DBG112:![0-9]+]] +// SIMD-NEXT: ret i32 10, !dbg [[DBG113:![0-9]+]] +// +//. +// SIMD: [[META0:![0-9]+]] = !DIGlobalVariableExpression(var: [[META1:![0-9]+]], expr: !DIExpression()) +// SIMD: [[META1]] = distinct !DIGlobalVariable(name: "a", scope: [[DBG2]], file: [[META3:![0-9]+]], line: 61, type: [[META6:![0-9]+]], isLocal: true, isDefinition: true) +// SIMD: [[DBG2]] = distinct !DISubprogram(name: "main", scope: [[META3]], file: [[META3]], line: 59, type: [[META4:![0-9]+]], scopeLine: 59, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META10:![0-9]+]], retainedNodes: [[META22:![0-9]+]]) +// SIMD: [[META3]] = !DIFile(filename: "{{.*}}error_codegen.cpp", directory: {{.*}}) +// SIMD: [[META4]] = !DISubroutineType(types: [[META5:![0-9]+]]) +// SIMD: [[META5]] = !{[[META6]], [[META6]], [[META7:![0-9]+]]} +// SIMD: [[META6]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +// SIMD: [[META7]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META8:![0-9]+]], size: 64) +// SIMD: [[META8]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META9:![0-9]+]], size: 64) +// SIMD: [[META9]] = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +// SIMD: [[META10]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_11, file: [[META11:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: [[META12:![0-9]+]], splitDebugInlining: false, nameTableKind: None) +// SIMD: [[META11]] = !DIFile(filename: "{{.*}}", directory: {{.*}}) +// SIMD: [[META12]] = !{[[META0]], [[META13:![0-9]+]], [[META19:![0-9]+]]} +// SIMD: [[META13]] = !DIGlobalVariableExpression(var: [[META14:![0-9]+]], expr: !DIExpression()) +// SIMD: [[META14]] = distinct !DIGlobalVariable(scope: null, file: [[META3]], line: 63, type: [[META15:![0-9]+]], isLocal: true, isDefinition: true) +// SIMD: [[META15]] = !DICompositeType(tag: DW_TAG_array_type, baseType: [[META16:![0-9]+]], size: 32, elements: [[META17:![0-9]+]]) +// SIMD: [[META16]] = !DIDerivedType(tag: DW_TAG_const_type, baseType: [[META9]]) +// SIMD: [[META17]] = !{[[META18:![0-9]+]]} +// SIMD: [[META18]] = !DISubrange(count: 4) +// SIMD: [[META19]] = !DIGlobalVariableExpression(var: [[META20:![0-9]+]], expr: !DIExpression()) +// SIMD: [[META20]] = distinct !DIGlobalVariable(name: "a", scope: [[DBG21]], file: [[META3]], line: 22, type: [[META6]], isLocal: false, isDefinition: true) +// SIMD: [[DBG21]] = distinct !DISubprogram(name: "tmain", linkageName: "_Z5tmainIiLi10EEiT_PPc", scope: [[META3]], file: [[META3]], line: 20, type: [[META4]], scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META10]], templateParams: [[META23:![0-9]+]], retainedNodes: [[META22]]) +// SIMD: [[META22]] = !{} +// SIMD: [[META23]] = !{[[META24:![0-9]+]], [[META25:![0-9]+]]} +// SIMD: [[META24]] = !DITemplateTypeParameter(name: "T", type: [[META6]]) +// SIMD: [[META25]] = !DITemplateValueParameter(name: "N", type: [[META6]], value: i32 10) +// SIMD: [[DBG29]] = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: [[META3]], file: [[META3]], line: 17, type: [[META30:![0-9]+]], scopeLine: 17, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META10]]) +// SIMD: [[META30]] = !DISubroutineType(types: [[META31:![0-9]+]]) +// SIMD: [[META31]] = !{null} +// SIMD: [[DBG32]] = !DILocation(line: 17, column: 13, scope: [[DBG29]]) +// SIMD: [[META33]] = !DILocalVariable(name: "argc", arg: 1, scope: [[DBG2]], file: [[META3]], line: 59, type: [[META6]]) +// SIMD: [[META34]] = !DILocation(line: 59, column: 15, scope: [[DBG2]]) +// SIMD: [[META35]] = !DILocalVariable(name: "argv", arg: 2, scope: [[DBG2]], file: [[META3]], line: 59, type: [[META7]]) +// SIMD: [[META36]] = !DILocation(line: 59, column: 28, scope: [[DBG2]]) +// SIMD: [[META37]] = !DILocalVariable(name: "b", scope: [[DBG2]], file: [[META3]], line: 60, type: [[META6]]) +// SIMD: [[META38]] = !DILocation(line: 60, column: 7, scope: [[DBG2]]) +// SIMD: [[DBG39]] = !DILocation(line: 60, column: 11, scope: [[DBG2]]) +// SIMD: [[META40]] = !DILocalVariable(name: "c", scope: [[DBG2]], file: [[META3]], line: 60, type: [[META6]]) +// SIMD: [[META41]] = !DILocation(line: 60, column: 17, scope: [[DBG2]]) +// SIMD: [[META42]] = !DILocalVariable(name: "d", scope: [[DBG2]], file: [[META3]], line: 60, type: [[META6]]) +// SIMD: [[META43]] = !DILocation(line: 60, column: 20, scope: [[DBG2]]) +// SIMD: [[META44]] = !DILocalVariable(name: "e", scope: [[DBG2]], file: [[META3]], line: 60, type: [[META6]]) +// SIMD: [[META45]] = !DILocation(line: 60, column: 23, scope: [[DBG2]]) +// SIMD: [[META46]] = !DILocalVariable(name: "f", scope: [[DBG2]], file: [[META3]], line: 60, type: [[META6]]) +// SIMD: [[META47]] = !DILocation(line: 60, column: 26, scope: [[DBG2]]) +// SIMD: [[META48]] = !DILocalVariable(name: "g", scope: [[DBG2]], file: [[META3]], line: 60, type: [[META6]]) +// SIMD: [[META49]] = !DILocation(line: 60, column: 29, scope: [[DBG2]]) +// SIMD: [[META50]] = !DILocalVariable(name: "str1", scope: [[DBG2]], file: [[META3]], line: 62, type: [[META15]]) +// SIMD: [[META51]] = !DILocation(line: 62, column: 14, scope: [[DBG2]]) +// SIMD: [[META52]] = !DILocalVariable(name: "str2", scope: [[DBG2]], file: [[META3]], line: 63, type: [[META53:![0-9]+]]) +// SIMD: [[META53]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META16]], size: 64) +// SIMD: [[META54]] = !DILocation(line: 63, column: 15, scope: [[DBG2]]) +// SIMD: [[META55]] = !DILocalVariable(name: "str3", scope: [[DBG2]], file: [[META3]], line: 64, type: [[META56:![0-9]+]]) +// SIMD: [[META56]] = !DICompositeType(tag: DW_TAG_array_type, baseType: [[META9]], size: 32, elements: [[META17]]) +// SIMD: [[META57]] = !DILocation(line: 64, column: 8, scope: [[DBG2]]) +// SIMD: [[META58]] = !DILocalVariable(name: "str4", scope: [[DBG2]], file: [[META3]], line: 65, type: [[META8]]) +// SIMD: [[META59]] = !DILocation(line: 65, column: 9, scope: [[DBG2]]) +// SIMD: [[DBG60]] = !DILocation(line: 65, column: 16, scope: [[DBG2]]) +// SIMD: [[META61]] = !DILocalVariable(name: "str5", scope: [[DBG2]], file: [[META3]], line: 66, type: [[META62:![0-9]+]]) +// SIMD: [[META62]] = !DIDerivedType(tag: DW_TAG_const_type, baseType: [[META8]]) +// SIMD: [[META63]] = !DILocation(line: 66, column: 16, scope: [[DBG2]]) +// SIMD: [[DBG64]] = !DILocation(line: 66, column: 23, scope: [[DBG2]]) +// SIMD: [[DBG65]] = !DILocation(line: 73, column: 5, scope: [[DBG2]]) +// SIMD: [[DBG66]] = !DILocation(line: 80, column: 3, scope: [[DBG2]]) +// SIMD: [[DBG67]] = !DILocation(line: 81, column: 18, scope: [[DBG2]]) +// SIMD: [[DBG68]] = !DILocation(line: 81, column: 24, scope: [[DBG2]]) +// SIMD: [[DBG69]] = !DILocation(line: 81, column: 3, scope: [[DBG2]]) +// SIMD: [[DBG70]] = !DILocation(line: 82, column: 1, scope: [[DBG2]]) +// SIMD: [[META71]] = !DILocalVariable(name: "argc", arg: 1, scope: [[DBG21]], file: [[META3]], line: 20, type: [[META6]]) +// SIMD: [[META72]] = !DILocation(line: 20, column: 13, scope: [[DBG21]]) +// SIMD: [[META73]] = !DILocalVariable(name: "argv", arg: 2, scope: [[DBG21]], file: [[META3]], line: 20, type: [[META7]]) +// SIMD: [[META74]] = !DILocation(line: 20, column: 26, scope: [[DBG21]]) +// SIMD: [[META75]] = !DILocalVariable(name: "b", scope: [[DBG21]], file: [[META3]], line: 21, type: [[META6]]) +// SIMD: [[META76]] = !DILocation(line: 21, column: 5, scope: [[DBG21]]) +// SIMD: [[DBG77]] = !DILocation(line: 21, column: 9, scope: [[DBG21]]) +// SIMD: [[META78]] = !DILocalVariable(name: "c", scope: [[DBG21]], file: [[META3]], line: 21, type: [[META6]]) +// SIMD: [[META79]] = !DILocation(line: 21, column: 15, scope: [[DBG21]]) +// SIMD: [[META80]] = !DILocalVariable(name: "d", scope: [[DBG21]], file: [[META3]], line: 21, type: [[META6]]) +// SIMD: [[META81]] = !DILocation(line: 21, column: 18, scope: [[DBG21]]) +// SIMD: [[META82]] = !DILocalVariable(name: "e", scope: [[DBG21]], file: [[META3]], line: 21, type: [[META6]]) +// SIMD: [[META83]] = !DILocation(line: 21, column: 21, scope: [[DBG21]]) +// SIMD: [[META84]] = !DILocalVariable(name: "f", scope: [[DBG21]], file: [[META3]], line: 21, type: [[META6]]) +// SIMD: [[META85]] = !DILocation(line: 21, column: 24, scope: [[DBG21]]) +// SIMD: [[META86]] = !DILocalVariable(name: "g", scope: [[DBG21]], file: [[META3]], line: 21, type: [[META6]]) +// SIMD: [[META87]] = !DILocation(line: 21, column: 27, scope: [[DBG21]]) +// SIMD: [[META88]] = !DILocalVariable(name: "str1", scope: [[DBG21]], file: [[META3]], line: 23, type: [[META15]]) +// SIMD: [[META89]] = !DILocation(line: 23, column: 14, scope: [[DBG21]]) +// SIMD: [[META90]] = !DILocalVariable(name: "str2", scope: [[DBG21]], file: [[META3]], line: 24, type: [[META53]]) +// SIMD: [[META91]] = !DILocation(line: 24, column: 15, scope: [[DBG21]]) +// SIMD: [[META92]] = !DILocalVariable(name: "str3", scope: [[DBG21]], file: [[META3]], line: 25, type: [[META56]]) +// SIMD: [[META93]] = !DILocation(line: 25, column: 8, scope: [[DBG21]]) +// SIMD: [[META94]] = !DILocalVariable(name: "str4", scope: [[DBG21]], file: [[META3]], line: 26, type: [[META8]]) +// SIMD: [[META95]] = !DILocation(line: 26, column: 9, scope: [[DBG21]]) +// SIMD: [[DBG96]] = !DILocation(line: 26, column: 16, scope: [[DBG21]]) +// SIMD: [[META97]] = !DILocalVariable(name: "str5", scope: [[DBG21]], file: [[META3]], line: 27, type: [[META62]]) +// SIMD: [[META98]] = !DILocation(line: 27, column: 16, scope: [[DBG21]]) +// SIMD: [[DBG99]] = !DILocation(line: 27, column: 23, scope: [[DBG21]]) +// SIMD: [[DBG100]] = !DILocation(line: 34, column: 7, scope: [[DBG21]]) +// SIMD: [[DBG101]] = !DILocation(line: 34, column: 5, scope: [[DBG21]]) +// SIMD: [[DBG102]] = !DILocation(line: 35, column: 3, scope: [[DBG21]]) +// SIMD: [[META103]] = !DILocalVariable(name: "b", scope: [[META104:![0-9]+]], file: [[META3]], line: 43, type: [[META6]]) +// SIMD: [[META104]] = distinct !DILexicalBlock(scope: [[DBG21]], file: [[META3]], line: 42, column: 3) +// SIMD: [[META105]] = !DILocation(line: 43, column: 9, scope: [[META104]]) +// SIMD: [[META106]] = !DILocalVariable(name: "c", scope: [[META104]], file: [[META3]], line: 44, type: [[META6]]) +// SIMD: [[META107]] = !DILocation(line: 44, column: 7, scope: [[META104]]) +// SIMD: [[DBG108]] = !DILocation(line: 45, column: 9, scope: [[META104]]) +// SIMD: [[DBG109]] = !DILocation(line: 45, column: 13, scope: [[META104]]) +// SIMD: [[DBG110]] = !DILocation(line: 45, column: 11, scope: [[META104]]) +// SIMD: [[DBG111]] = !DILocation(line: 45, column: 7, scope: [[META104]]) +// SIMD: [[DBG112]] = !DILocation(line: 53, column: 3, scope: [[DBG21]]) +// SIMD: [[DBG113]] = !DILocation(line: 54, column: 1, scope: [[DBG21]]) +//. diff --git a/clang/test/OpenMP/error_message.cpp b/clang/test/OpenMP/error_message.cpp index aed2df99ea8f6..6e64ee2ad4a67 100644 --- a/clang/test/OpenMP/error_message.cpp +++ b/clang/test/OpenMP/error_message.cpp @@ -112,8 +112,12 @@ if (1) // expected-error@+1 {{GPU compiler is needed.}} #pragma omp error message("GPU compiler is needed.") message("GPU compiler is needed.") // expected-error {{directive '#pragma omp error' cannot contain more than one 'message' clause}} int a; -// expected-warning@+1 {{expected string literal in 'clause message' - ignoring}} +// expected-warning@+1 {{expected string in 'clause message' - ignoring}} #pragma omp error message(a) // expected-error {{ERROR}} + char str[] = "msg"; +// expected-warning@+1 {{expected string literal in 'clause message' - ignoring}} +#pragma omp error message(str) // expected-error {{ERROR}} +#pragma omp error at(execution) message(str) // no error // expected-error@+1 {{ERROR}} #pragma omp error message() // expected-error {{expected expression}} return T(); diff --git a/clang/test/OpenMP/parallel_message_messages.cpp b/clang/test/OpenMP/parallel_message_messages.cpp index 470fadc032280..ea8fa23ef5e53 100644 --- a/clang/test/OpenMP/parallel_message_messages.cpp +++ b/clang/test/OpenMP/parallel_message_messages.cpp @@ -8,6 +8,9 @@ T tmain(T argc, S **argv) { // Correct usage #pragma omp parallel message("correct message") + // Template parameter is not yet instantiated. + #pragma omp parallel message(argv[0]) // expected-warning {{expected string in 'clause message' - ignoring}} + // Missing parentheses #pragma omp parallel message // expected-error {{expected '(' after 'message'}} @@ -15,9 +18,8 @@ T tmain(T argc, S **argv) { #pragma omp parallel message() // expected-error {{expected expression}} // Non-string literal - #pragma omp parallel message(123) // expected-warning {{expected string literal in 'clause message' - ignoring}} - #pragma omp parallel message(argc) // expected-warning {{expected string literal in 'clause message' - ignoring}} - #pragma omp parallel message(argv[0]) // expected-warning {{expected string literal in 'clause message' - ignoring}} + #pragma omp parallel message(123) // expected-warning {{expected string in 'clause message' - ignoring}} + #pragma omp parallel message(argc) // expected-warning {{expected string in 'clause message' - ignoring}} // Multiple arguments #pragma omp parallel message("msg1", "msg2") // expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -47,10 +49,10 @@ T tmain(T argc, S **argv) { // Message clause with macro that is not a string #define NOT_A_STRING 123 - #pragma omp parallel message(NOT_A_STRING) // expected-warning {{expected string literal in 'clause message' - ignoring}} + #pragma omp parallel message(NOT_A_STRING) // expected-warning {{expected string in 'clause message' - ignoring}} // Message clause with template parameter that is not a string - #pragma omp parallel message(N) // expected-warning {{expected string literal in 'clause message' - ignoring}} + #pragma omp parallel message(N) // expected-warning {{expected string in 'clause message' - ignoring}} // Message clause with macro that is a string #define A_STRING "macro string" @@ -73,15 +75,29 @@ T tmain(T argc, S **argv) { return argc; } +template int tmain(int argc, char **argv); + int main(int argc, char **argv) { + const char str1[] = "msg"; + const char *str2 = "msg"; + char str3[] = "msg"; + char *str4 = str3; + char * const str5 = str3; + // Correct usage #pragma omp parallel message("main correct") + #pragma omp parallel message(argv[0]) + #pragma omp parallel message(str1) + #pragma omp parallel message(str2) + #pragma omp parallel message(str3) + #pragma omp parallel message(str4) + #pragma omp parallel message(str5) // Invalid: missing string #pragma omp parallel message() // expected-error {{expression}} // Invalid: non-string - #pragma omp parallel message(argc) // expected-warning {{expected string literal in 'clause message' - ignoring}} + #pragma omp parallel message(argc) // expected-warning {{expected string in 'clause message' - ignoring}} foo(); diff --git a/clang/test/OpenMP/parallel_num_threads_codegen.cpp b/clang/test/OpenMP/parallel_num_threads_codegen.cpp index 2fb9589ab1ab6..9b5d02cd41678 100644 --- a/clang/test/OpenMP/parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/parallel_num_threads_codegen.cpp @@ -42,9 +42,11 @@ int tmain() { #pragma omp parallel num_threads(T(23)) foo(); #ifdef OMP60 -#pragma omp parallel num_threads(strict: C) + char str[] = "msg"; + const char *str1 = "msg1"; +#pragma omp parallel num_threads(strict: C) severity(fatal) message(str) foo(); -#pragma omp parallel num_threads(strict: T(23)) +#pragma omp parallel num_threads(strict: T(23)) severity(warning) message(str1) foo(); #endif return 0; @@ -58,9 +60,11 @@ int main() { #pragma omp parallel num_threads(a) foo(); #ifdef OMP60 -#pragma omp parallel num_threads(strict: 2) + char str[] = "msg"; + const char *str1 = "msg1"; +#pragma omp parallel num_threads(strict: 2) severity(fatal) message(str) foo(); -#pragma omp parallel num_threads(strict: a) +#pragma omp parallel num_threads(strict: a) severity(warning) message(str1) foo(); #endif return a + tmain() + tmain(); @@ -79,6 +83,13 @@ int main() { // CHECK: [[RES:%.+]] = sext i8 [[A_VAL]] to i32 // CHECK: call {{.*}}void @__kmpc_push_num_threads(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]]) // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( +// OMP60: [[ARRDECAY:%.+]] = getelementptr inbounds [4 x i8], ptr [[STR:%.+]], i64 0, i64 0 +// OMP60: call void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 2, i32 2, ptr [[ARRDECAY]]) +// OMP60: call void (ptr, i32, ptr, ...) @__kmpc_fork_call( +// OMP60: [[A_VAL1:%.+]] = load i8, ptr [[A_ADDR]] +// OMP60: [[RES1:%.+]] = sext i8 [[A_VAL1]] to i32 +// OMP60: call void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES1]], i32 1, ptr [[STR2:%.+]]) +// OMP60: call void (ptr, i32, ptr, ...) @__kmpc_fork_call( // CHECK: invoke{{.*}} [[INT_TY:i[0-9]+]] [[TMAIN_CHAR_5:@.+]]() // CHECK: invoke{{.*}} [[INT_TY]] [[TMAIN_S_1:@.+]]() // CHECK: call {{.*}} [[S_TY_DESTR:@.+]](ptr {{[^,]*}} [[S_ADDR]]) @@ -91,9 +102,10 @@ int main() { // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( // CHECK: call {{.*}}void @__kmpc_push_num_threads(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 23) // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( -// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 5, i32 2, ptr null) +// OMP60: [[ARRDECAY:%.+]] = getelementptr inbounds [4 x i8], ptr [[STR:%.+]], i64 0, i64 0 +// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 5, i32 2, ptr [[ARRDECAY]]) // OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call( -// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 23, i32 2, ptr null) +// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 23, i32 1, ptr [[STR1:%.+]]) // OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call( // CHECK: ret [[INT_TY]] 0 // CHECK-NEXT: } @@ -108,12 +120,13 @@ int main() { // CHECK: call {{.*}}void @__kmpc_push_num_threads(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]]) // CHECK: {{(invoke|call)}} {{.*}} [[S_TY_DESTR]](ptr {{[^,]*}} [[S_TEMP]]) // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( -// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 1, i32 2, ptr null) +// OMP60: [[ARRDECAY:%.+]] = getelementptr inbounds [4 x i8], ptr [[STR:%.+]], i64 0, i64 0 +// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 1, i32 2, ptr [[ARRDECAY]]) // OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call( // OMP60: {{(invoke|call)}} {{.*}} [[S_TY_CONSTR]](ptr {{[^,]*}} [[S_TEMP:%.+]], [[INTPTR_T_TY]] noundef [[INTPTR_T_TY_ATTR]]23) // OMP60: [[S_CHAR_OP1:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP]](ptr {{[^,]*}} [[S_TEMP]]) // OMP60: [[RES1:%.+]] = sext {{.*}}i8 [[S_CHAR_OP1]] to i32 -// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES1]], i32 2, ptr null) +// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES1]], i32 1, ptr [[STR1:%.+]]) // OMP60: {{(invoke|call)}} {{.*}} [[S_TY_DESTR]](ptr {{[^,]*}} [[S_TEMP]]) // OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call( // CHECK: ret [[INT_TY]] 0 From ee91aeb6d22b744789612c863add25b0ee337e40 Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Thu, 31 Jul 2025 10:12:08 -0500 Subject: [PATCH 5/6] implement feedback --- clang/include/clang/AST/OpenMPClause.h | 21 +++++++------------ clang/lib/AST/OpenMPClause.cpp | 2 +- clang/lib/Sema/SemaOpenMP.cpp | 29 ++++++++++---------------- 3 files changed, 20 insertions(+), 32 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index b5dabf283964e..cad1b70892838 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1874,10 +1874,6 @@ class OMPMessageClause final : public OMPClause { // Expression of the 'message' clause. Stmt *MessageString = nullptr; - // The message as a StringLiteral in case it is as string literal. This might - // be needed during compile time. - StringLiteral *MessageStringLiteral = nullptr; - /// Set message string of the clause. void setMessageString(Expr *MS) { MessageString = MS; } @@ -1891,14 +1887,10 @@ class OMPMessageClause final : public OMPClause { /// \param StartLoc Starting location of the clause. /// \param LParenLoc Location of '('. /// \param EndLoc Ending location of the clause. - /// \param MessageStringLiteral The message as a StringLiteral in case it is - /// as string literal. This might be needed during compile time. OMPMessageClause(Expr *MS, SourceLocation StartLoc, SourceLocation LParenLoc, - SourceLocation EndLoc, - StringLiteral *MessageStringLiteral = nullptr) + SourceLocation EndLoc) : OMPClause(llvm::omp::OMPC_message, StartLoc, EndLoc), - LParenLoc(LParenLoc), MessageString(MS), - MessageStringLiteral(MessageStringLiteral) {} + LParenLoc(LParenLoc), MessageString(MS) {} /// Build an empty clause. OMPMessageClause() @@ -1911,9 +1903,12 @@ class OMPMessageClause final : public OMPClause { /// Returns message string of the clause. Expr *getMessageString() const { return cast_or_null(MessageString); } - // Returns the source message as a string literal in case it has been a string - // literal. Otherwise, return null. - StringLiteral *getAsStringLiteral() const { return MessageStringLiteral; } + /// Try to evaluate the message string at compile time. + std::optional tryEvaluateString(ASTContext &Ctx) const { + if (Expr *MessageExpr = getMessageString()) + return MessageExpr->tryEvaluateString(Ctx); + return std::nullopt; + } child_range children() { return child_range(&MessageString, &MessageString + 1); diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index b561156cd2d20..bc4480bf30d37 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1964,7 +1964,7 @@ void OMPClausePrinter::VisitOMPSeverityClause(OMPSeverityClause *Node) { void OMPClausePrinter::VisitOMPMessageClause(OMPMessageClause *Node) { OS << "message("; - if (StringLiteral *SL = Node->getAsStringLiteral()) + if (StringLiteral *SL = dyn_cast(Node->getMessageString())) OS << "\"" << SL->getString() << "\""; else if (Expr *E = Node->getMessageString()) E->printPretty(OS, nullptr, Policy); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 70e43379b64d2..157a6eb3162dc 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -11078,26 +11078,27 @@ StmtResult SemaOpenMP::ActOnOpenMPErrorDirective(ArrayRef Clauses, return StmtError(); } - const OMPSeverityClause *SeverityC = - OMPExecutableDirective::getSingleClause(Clauses); - const OMPMessageClause *MessageC = - OMPExecutableDirective::getSingleClause(Clauses); - if (!AtC || AtC->getAtKind() == OMPC_AT_compilation) { - StringLiteral *SL = MessageC ? MessageC->getAsStringLiteral() : nullptr; + const OMPSeverityClause *SeverityC = + OMPExecutableDirective::getSingleClause(Clauses); + const OMPMessageClause *MessageC = + OMPExecutableDirective::getSingleClause(Clauses); + std::optional SL = + MessageC ? MessageC->tryEvaluateString(getASTContext()) : std::nullopt; + if (MessageC && !SL) Diag(MessageC->getMessageString()->getBeginLoc(), diag::warn_clause_expected_string_literal) << getOpenMPClauseNameForDiag(OMPC_message); if (SeverityC && SeverityC->getSeverityKind() == OMPC_SEVERITY_warning) Diag(SeverityC->getSeverityKindKwLoc(), diag::warn_diagnose_if_succeeded) - << (SL ? SL->getString() : "WARNING"); + << SL.value_or("WARNING"); else - Diag(StartLoc, diag::err_diagnose_if_succeeded) - << (SL ? SL->getString() : "ERROR"); + Diag(StartLoc, diag::err_diagnose_if_succeeded) << SL.value_or("ERROR"); if (!SeverityC || SeverityC->getSeverityKind() != OMPC_SEVERITY_warning) return StmtError(); } + return OMPErrorDirective::Create(getASTContext(), StartLoc, EndLoc, Clauses); } @@ -16457,20 +16458,12 @@ OMPClause *SemaOpenMP::ActOnOpenMPMessageClause(Expr *ME, return nullptr; } - // If the string is a string literal, save it in case it is later needed - // during compile time. Also consider a const char pointer to a string - // literal. This is necessary for template instantiations where the string - // literal is not passed directly and we only get a const char pointer to it. - StringLiteral *SL = dyn_cast( - isa(ME) ? cast(ME)->getSubExpr() - : ME); - // Convert array type to pointer type if needed. if (Type->isArrayType()) ME = SemaRef.DefaultFunctionArrayConversion(ME).get(); return new (getASTContext()) - OMPMessageClause(ME, StartLoc, LParenLoc, EndLoc, SL); + OMPMessageClause(ME, StartLoc, LParenLoc, EndLoc); } OMPClause *SemaOpenMP::ActOnOpenMPOrderClause( From 9ec7fea9108752fc04b2b8b2fcd8317cc6f05588 Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Fri, 1 Aug 2025 09:07:16 -0500 Subject: [PATCH 6/6] adapt OMPClausePrinter::VisitOMPMessageClause; add lvalue conversion to message clause --- clang/lib/AST/OpenMPClause.cpp | 4 +--- clang/lib/Sema/SemaOpenMP.cpp | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index bc4480bf30d37..a9aa17a19d1ea 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1964,9 +1964,7 @@ void OMPClausePrinter::VisitOMPSeverityClause(OMPSeverityClause *Node) { void OMPClausePrinter::VisitOMPMessageClause(OMPMessageClause *Node) { OS << "message("; - if (StringLiteral *SL = dyn_cast(Node->getMessageString())) - OS << "\"" << SL->getString() << "\""; - else if (Expr *E = Node->getMessageString()) + if (Expr *E = Node->getMessageString()) E->printPretty(OS, nullptr, Policy); OS << ")"; } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 157a6eb3162dc..229190a641080 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -16459,8 +16459,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPMessageClause(Expr *ME, } // Convert array type to pointer type if needed. - if (Type->isArrayType()) - ME = SemaRef.DefaultFunctionArrayConversion(ME).get(); + ME = SemaRef.DefaultFunctionArrayLvalueConversion(ME).get(); return new (getASTContext()) OMPMessageClause(ME, StartLoc, LParenLoc, EndLoc);