-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[OpenMP][OMPIRBuilder] Add support to omp target parallel #67000
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[OpenMP][OMPIRBuilder] Add support to omp target parallel #67000
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-openmp ChangesAdded support for LLVM IR code generation which is used for handling omp target parallel code. The call for __kmpc_parallel_51 is generated and the parallel region is outlined to separate function. The proper setup of kmpc_target_init mode is not included in the commit. It is assumed that the SPMD mode for target initialization is properly set by other codegen functions. This PR depends on: #66998 Patch is 20.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/67000.diff 3 Files Affected:
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 1ace7d5b97ffc96..25886bee1f51353 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -602,6 +602,8 @@ void OpenMPIRBuilder::finalize(Function *Fn) {
Function *OuterFn = OI.getFunction();
CodeExtractorAnalysisCache CEAC(*OuterFn);
+ unsigned AllocaAddressSpace =
+ Config.isTargetDevice() ? 0 : M.getDataLayout().getAllocaAddrSpace();
CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
/* AggregateArgs */ true,
/* BlockFrequencyInfo */ nullptr,
@@ -610,7 +612,7 @@ void OpenMPIRBuilder::finalize(Function *Fn) {
/* AllowVarArgs */ true,
/* AllowAlloca */ true,
/* AllocaBlock*/ OI.OuterAllocaBB,
- /* Suffix */ ".omp_par");
+ /* Suffix */ ".omp_par", AllocaAddressSpace);
LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
@@ -1137,8 +1139,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
// Change the location to the outer alloca insertion point to create and
// initialize the allocas we pass into the parallel region.
Builder.restoreIP(OuterAllocaIP);
- AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
- AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
+ unsigned AllocaAddressSpace =
+ Config.isTargetDevice() ? 0 : M.getDataLayout().getAllocaAddrSpace();
+ AllocaInst *TIDAddr =
+ Builder.CreateAlloca(Int32, AllocaAddressSpace, nullptr, "tid.addr");
+ AllocaInst *ZeroAddr =
+ Builder.CreateAlloca(Int32, AllocaAddressSpace, nullptr, "zero.addr");
// We only need TIDAddr and ZeroAddr for modeling purposes to get the
// associated arguments in the outlined function, so we delete them later.
@@ -1212,11 +1218,14 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
FunctionCallee RTLFn;
- if (IfCondition)
- RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
- else
- RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
-
+ if (Config.isTargetDevice()) {
+ RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
+ } else {
+ if (IfCondition)
+ RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
+ else
+ RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
+ }
if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
llvm::LLVMContext &Ctx = F->getContext();
@@ -1235,63 +1244,146 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
}
OutlineInfo OI;
- OI.PostOutlineCB = [=](Function &OutlinedFn) {
- // Add some known attributes.
- OutlinedFn.addParamAttr(0, Attribute::NoAlias);
- OutlinedFn.addParamAttr(1, Attribute::NoAlias);
- OutlinedFn.addFnAttr(Attribute::NoUnwind);
- OutlinedFn.addFnAttr(Attribute::NoRecurse);
-
- assert(OutlinedFn.arg_size() >= 2 &&
- "Expected at least tid and bounded tid as arguments");
- unsigned NumCapturedVars =
- OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
-
- CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
- CI->getParent()->setName("omp_parallel");
- Builder.SetInsertPoint(CI);
-
- // Build call __kmpc_fork_call[_if](Ident, n, microtask, var1, .., varn);
- Value *ForkCallArgs[] = {
- Ident, Builder.getInt32(NumCapturedVars),
- Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
-
- SmallVector<Value *, 16> RealArgs;
- RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
- if (IfCondition) {
- Value *Cond = Builder.CreateSExtOrTrunc(IfCondition,
- Type::getInt32Ty(M.getContext()));
- RealArgs.push_back(Cond);
- }
- RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
-
- // __kmpc_fork_call_if always expects a void ptr as the last argument
- // If there are no arguments, pass a null pointer.
- auto PtrTy = Type::getInt8PtrTy(M.getContext());
- if (IfCondition && NumCapturedVars == 0) {
+ if (Config.isTargetDevice()) {
+ // Generate OpenMP target specific runtime call
+ OI.PostOutlineCB = [=](Function &OutlinedFn) {
+ // Add some known attributes.
+ OutlinedFn.addParamAttr(0, Attribute::NoAlias);
+ OutlinedFn.addParamAttr(1, Attribute::NoAlias);
+ OutlinedFn.addParamAttr(0, Attribute::NoUndef);
+ OutlinedFn.addParamAttr(1, Attribute::NoUndef);
+ OutlinedFn.addFnAttr(Attribute::NoUnwind);
+ OutlinedFn.addFnAttr(Attribute::NoRecurse);
+
+ assert(OutlinedFn.arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+ unsigned NumCapturedVars =
+ OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
+
+ CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
+ assert(CI && "Expected call instruction to outlined function");
+ CI->getParent()->setName("omp_parallel");
+ // Replace direct call to the outlined function by the call to
+ // __kmpc_parallel_51
+ Builder.SetInsertPoint(CI);
+
+ // Build call __kmpc_parallel_51
+ auto PtrTy = Type::getInt8PtrTy(M.getContext());
llvm::Value *Void = ConstantPointerNull::get(PtrTy);
- RealArgs.push_back(Void);
- }
- if (IfCondition && RealArgs.back()->getType() != PtrTy)
- RealArgs.back() = Builder.CreateBitCast(RealArgs.back(), PtrTy);
+ // Add alloca for kernel args. Put this instruction at the beginning
+ // of the function.
+ InsertPointTy CurrentIP = Builder.saveIP();
+ Builder.SetInsertPoint(&OuterFn->front(),
+ OuterFn->front().getFirstInsertionPt());
+ AllocaInst *ArgsAlloca =
+ Builder.CreateAlloca(ArrayType::get(PtrTy, NumCapturedVars));
+ Value *Args = Builder.CreatePointerCast(
+ ArgsAlloca, Type::getInt8PtrTy(M.getContext()));
+ Builder.restoreIP(CurrentIP);
+ // Store captured vars which are used by kmpc_parallel_51
+ if (NumCapturedVars) {
+ for (unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
+ Value *V = *(CI->arg_begin() + 2 + Idx);
+ Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
+ ArrayType::get(PtrTy, NumCapturedVars), Args, 0, Idx);
+ Builder.CreateStore(V, StoreAddress);
+ }
+ }
+ Value *Cond = IfCondition
+ ? Builder.CreateSExtOrTrunc(
+ IfCondition, Type::getInt32Ty(M.getContext()))
+ : Builder.getInt32(1);
+ Value *Parallel51CallArgs[] = {
+ /* identifier*/ Ident,
+ /* global thread num*/ ThreadID,
+ /* if expression */ Cond,
+ NumThreads ? NumThreads : Builder.getInt32(-1),
+ /* Proc bind */ Builder.getInt32(-1),
+ /* outlined function */
+ Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr), Void, Args,
+ Builder.getInt64(NumCapturedVars)};
+
+ SmallVector<Value *, 16> RealArgs;
+ RealArgs.append(std::begin(Parallel51CallArgs),
+ std::end(Parallel51CallArgs));
+
+ Builder.CreateCall(RTLFn, RealArgs);
+
+ LLVM_DEBUG(dbgs() << "With kmpc_parallel_51 placed: "
+ << *Builder.GetInsertBlock()->getParent() << "\n");
+
+ InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
+
+ // Initialize the local TID stack location with the argument value.
+ Builder.SetInsertPoint(PrivTID);
+ Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
+ Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr);
+
+ CI->eraseFromParent();
+
+ for (Instruction *I : ToBeDeleted)
+ I->eraseFromParent();
+ };
+ } else {
+ // Generate OpenMP host runtime call
+ OI.PostOutlineCB = [=](Function &OutlinedFn) {
+ // Add some known attributes.
+ OutlinedFn.addParamAttr(0, Attribute::NoAlias);
+ OutlinedFn.addParamAttr(1, Attribute::NoAlias);
+ OutlinedFn.addFnAttr(Attribute::NoUnwind);
+ OutlinedFn.addFnAttr(Attribute::NoRecurse);
+
+ assert(OutlinedFn.arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+ unsigned NumCapturedVars =
+ OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
+
+ CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
+ CI->getParent()->setName("omp_parallel");
+ Builder.SetInsertPoint(CI);
+
+ // Build call __kmpc_fork_call[_if](Ident, n, microtask, var1, .., varn);
+ Value *ForkCallArgs[] = {
+ Ident, Builder.getInt32(NumCapturedVars),
+ Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
+
+ SmallVector<Value *, 16> RealArgs;
+ RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
+ if (IfCondition) {
+ Value *Cond = Builder.CreateSExtOrTrunc(
+ IfCondition, Type::getInt32Ty(M.getContext()));
+ RealArgs.push_back(Cond);
+ }
+ RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
+
+ // __kmpc_fork_call_if always expects a void ptr as the last argument
+ // If there are no arguments, pass a null pointer.
+ auto PtrTy = Type::getInt8PtrTy(M.getContext());
+ if (IfCondition && NumCapturedVars == 0) {
+ llvm::Value *Void = ConstantPointerNull::get(PtrTy);
+ RealArgs.push_back(Void);
+ }
+ if (IfCondition && RealArgs.back()->getType() != PtrTy)
+ RealArgs.back() = Builder.CreateBitCast(RealArgs.back(), PtrTy);
- Builder.CreateCall(RTLFn, RealArgs);
+ Builder.CreateCall(RTLFn, RealArgs);
- LLVM_DEBUG(dbgs() << "With fork_call placed: "
- << *Builder.GetInsertBlock()->getParent() << "\n");
+ LLVM_DEBUG(dbgs() << "With fork_call placed: "
+ << *Builder.GetInsertBlock()->getParent() << "\n");
- InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
+ InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
- // Initialize the local TID stack location with the argument value.
- Builder.SetInsertPoint(PrivTID);
- Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
- Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr);
+ // Initialize the local TID stack location with the argument value.
+ Builder.SetInsertPoint(PrivTID);
+ Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
+ Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr);
- CI->eraseFromParent();
+ CI->eraseFromParent();
- for (Instruction *I : ToBeDeleted)
- I->eraseFromParent();
- };
+ for (Instruction *I : ToBeDeleted)
+ I->eraseFromParent();
+ };
+ }
// Adjust the finalization stack, verify the adjustment, and call the
// finalize function a last time to finalize values between the pre-fini
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index f945de52920ccfe..a92343a4028226e 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -520,6 +520,7 @@ struct OMPInformationCache : public InformationCache {
void recollectUses() {
for (int Idx = 0; Idx < RFIs.size(); ++Idx)
recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
+ OMPBuilder.Config.IsTargetDevice = isOpenMPDevice(OMPBuilder.M);
}
// Helper function to inherit the calling convention of the function callee.
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 2026824416f3e3c..06243f90deda98b 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -591,9 +591,114 @@ TEST_F(OpenMPIRBuilderTest, DbgLoc) {
EXPECT_EQ(SrcSrc->getAsCString(), ";/src/test.dbg;foo;3;7;;");
}
+TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.Config.IsTargetDevice = true;
+ OMPBuilder.initialize();
+ F->setName("func");
+ IRBuilder<> Builder(BB);
+ BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
+ Builder.CreateBr(EnterBB);
+ Builder.SetInsertPoint(EnterBB);
+ OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+ Loc = OMPBuilder.createTargetInit(Loc, true);
+ OMPBuilder.M.print(errs(), nullptr);
+
+ AllocaInst *PrivAI = nullptr;
+
+ unsigned NumBodiesGenerated = 0;
+ unsigned NumPrivatizedVars = 0;
+ unsigned NumFinalizationPoints = 0;
+
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ ++NumBodiesGenerated;
+
+ Builder.restoreIP(AllocaIP);
+ PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
+ Builder.CreateStore(F->arg_begin(), PrivAI);
+
+ Builder.restoreIP(CodeGenIP);
+ Value *PrivLoad =
+ Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
+ Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
+ Instruction *ThenTerm, *ElseTerm;
+ SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
+ &ThenTerm, &ElseTerm);
+ };
+
+ auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ Value &Orig, Value &Inner,
+ Value *&ReplacementValue) -> InsertPointTy {
+ ++NumPrivatizedVars;
+
+ if (!isa<AllocaInst>(Orig)) {
+ EXPECT_EQ(&Orig, F->arg_begin());
+ ReplacementValue = &Inner;
+ return CodeGenIP;
+ }
+
+ // Since the original value is an allocation, it has a pointer type and
+ // therefore no additional wrapping should happen.
+ EXPECT_EQ(&Orig, &Inner);
+
+ // Trivial copy (=firstprivate).
+ Builder.restoreIP(AllocaIP);
+ Type *VTy = ReplacementValue->getType();
+ Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
+ ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
+ Builder.restoreIP(CodeGenIP);
+ Builder.CreateStore(V, ReplacementValue);
+ return CodeGenIP;
+ };
+
+ auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
+
+ IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+ F->getEntryBlock().getFirstInsertionPt());
+ IRBuilder<>::InsertPoint AfterIP =
+ OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+ nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+ EXPECT_EQ(NumBodiesGenerated, 1U);
+ EXPECT_EQ(NumPrivatizedVars, 1U);
+ EXPECT_EQ(NumFinalizationPoints, 1U);
+
+ Builder.restoreIP(AfterIP);
+ OMPBuilder.createTargetDeinit(Builder);
+ Builder.CreateRetVoid();
+
+ OMPBuilder.finalize();
+ OMPBuilder.M.print(llvm::errs(), nullptr);
+ Function *OutlinedFn = PrivAI->getFunction();
+ EXPECT_FALSE(verifyModule(*M, &errs()));
+ EXPECT_NE(OutlinedFn, F);
+ EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
+ EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoRecurse));
+ EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
+ EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
+
+ EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
+ EXPECT_EQ(OutlinedFn->arg_size(), 3U);
+
+ EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
+ EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
+ User *Usr = OutlinedFn->user_back();
+ ASSERT_TRUE(isa<CallInst>(Usr));
+ CallInst *Parallel51CI = dyn_cast<CallInst>(Usr);
+ ASSERT_NE(Parallel51CI, nullptr);
+
+ EXPECT_EQ(Parallel51CI->getCalledFunction()->getName(), "__kmpc_parallel_51");
+ EXPECT_EQ(Parallel51CI->arg_size(), 9U);
+ EXPECT_EQ(Parallel51CI->getArgOperand(5), OutlinedFn);
+ EXPECT_TRUE(isa<GlobalVariable>(Parallel51CI->getArgOperand(0)));
+ EXPECT_EQ(Parallel51CI, Usr);
+}
+
TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.Config.IsTargetDevice = false;
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
@@ -699,6 +804,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
TEST_F(OpenMPIRBuilderTest, ParallelNested) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.Config.IsTargetDevice = false;
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
@@ -793,6 +899,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) {
TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.Config.IsTargetDevice = false;
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
@@ -902,6 +1009,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.Config.IsTargetDevice = false;
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
@@ -1006,6 +1114,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.Config.IsTargetDevice = false;
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
@@ -1119,6 +1228,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.Config.IsTargetDevice = false;
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
@@ -4131,6 +4241,7 @@ xorAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS,
TEST_F(OpenMPIRBuilderTest, CreateReductions) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.Config.IsTargetDevice = false;
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
@@ -4363,6 +4474,7 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.Config.IsTargetDevice = false;
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
@@ -5324,6 +5436,7 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
TEST_F(OpenMPIRBuilderTest, CreateTask) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.Config.IsTargetDevice = false;
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
@@ -5449,6 +5562,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) {
TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.Config.IsTargetDevice = false;
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
@@ -5471,6 +5585,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) {
TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.Config.IsTargetDevice = false;
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
@@ -5500,6 +5615,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) {
TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.Config.IsTargetDevice = false;
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
@@ -5573,6 +5689,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) {
TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) {
using InsertPointTy = O...
[truncated]
|
@@ -602,6 +602,8 @@ void OpenMPIRBuilder::finalize(Function *Fn) { | |||
|
|||
Function *OuterFn = OI.getFunction(); | |||
CodeExtractorAnalysisCache CEAC(*OuterFn); | |||
unsigned AllocaAddressSpace = | |||
Config.isTargetDevice() ? 0 : M.getDataLayout().getAllocaAddrSpace(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Always use the DL?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See comment: #66998 (comment)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed. I always use DL, and I perform address space casting if needed.
AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); | ||
AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); | ||
unsigned AllocaAddressSpace = | ||
Config.isTargetDevice() ? 0 : M.getDataLayout().getAllocaAddrSpace(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See above.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Without this change the outlined function will have the following signature:
define internal void @__omp_offloading_fd00_2b03043__QQmain_l30..omp_par(ptr addrspace(5) noalias noundef %tid.addr, ptr addrspace(5) noalias noundef %zero.addr, ptr addrspace(5) %0)
(Tested on AMD GPU target)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
OutlinedFn.addParamAttr(0, Attribute::NoAlias); | ||
OutlinedFn.addParamAttr(1, Attribute::NoAlias); | ||
OutlinedFn.addFnAttr(Attribute::NoUnwind); | ||
OutlinedFn.addFnAttr(Attribute::NoRecurse); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the no recourse was wrong (already before). I don’t see why it could not reach the same parallel region from a parallel region.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed no recursion attribute
Builder.SetInsertPoint(CI); | ||
|
||
// Build call __kmpc_parallel_51 | ||
auto PtrTy = Type::getInt8PtrTy(M.getContext()); | ||
llvm::Value *Void = ConstantPointerNull::get(PtrTy); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No llvm::
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed
13edd33
to
43430cb
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better, still a few remarks.
@@ -1090,6 +1097,174 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag, | |||
Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); | |||
} | |||
|
|||
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add some brief documentation.
Use references when possible.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
I->eraseFromParent(); | ||
} | ||
} | ||
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same as above, also a new line is free.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI), | ||
PrivTIDAddr); | ||
|
||
ToBeDeleted.insert(ToBeDeleted.begin(), CI); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we insert it in the front? Just erase CI here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
Value *Ident, Value *IfCondition, | ||
Value *NumThreads, Instruction *PrivTID, | ||
AllocaInst *PrivTIDAddr, Value *ThreadID, | ||
SmallVector<Instruction *, 4> ToBeDeleted) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is the vector passed by copy. Either by value, if needed, or just make a new one here. I believe some of the pointers are also not actually initialized by the caller but just used in here. No need to pass them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed
ToBeDeleted.push_back(ZeroAddr); | ||
} else { | ||
TIDAddr = TIDAddrAlloca; | ||
ZeroAddr = ZeroAddrAlloca; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Initialize them directly in line 1324, so you can avoid the else. If you have an else case, put the simpler/shorter one first.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
43430cb
to
f25d714
Compare
f25d714
to
bab8d33
Compare
Can @shraiysh review this patch? |
To be sure I understand this correctly, in the testcase, |
bab8d33
to
3ab0ad4
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
7fbe950
to
e801022
Compare
@shraiysh |
clang/test/OpenMP/cancel_codegen.cpp
Outdated
// CHECK3-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] | ||
// CHECK3: .omp_outlined..exit: | ||
// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 | ||
// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias ![[NOALIAS1]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Run the update script on this in a pre-commit to remove some distracting changes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. These changes are moved to separate commit.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Commit that right away as NFC. If you leave it with the PR and you use the web interface it'll smash them right back together.
OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call); | ||
} | ||
if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { | ||
if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still, llvm::
. Can you please check your commits for these things (even if you just copied/moved the code). I will never point out all the locations.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done (scope of changes: this PR). I did not add any new llvm::
inside OMPIRBuilder file.
// of the function. | ||
OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP(); | ||
Builder.SetInsertPoint(&OuterFn->front(), | ||
OuterFn->front().getFirstInsertionPt()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You cannot just assume the first block is the right one. That is why we have AllocaIP.
AllocaInst *ArgsAlloca = | ||
Builder.CreateAlloca(ArrayType::get(PtrTy, NumCapturedVars)); | ||
Value *Args = | ||
Builder.CreatePointerCast(ArgsAlloca, Type::getInt8PtrTy(M.getContext())); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- Type::getInt8PtrTy(M.getContext())
+ PtrTy
Check your patches for things like this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
Builder.CreatePointerCast(ArgsAlloca, Type::getInt8PtrTy(M.getContext())); | ||
Builder.restoreIP(CurrentIP); | ||
// Store captured vars which are used by kmpc_parallel_51 | ||
if (NumCapturedVars) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need for the if
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
Value *Parallel51CallArgs[] = { | ||
/* identifier*/ Ident, | ||
/* global thread num*/ ThreadID, | ||
/* if expression */ Cond, NumThreads ? NumThreads : Builder.getInt32(-1), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Either provide comments for all arguments or none. this is confusing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done -> added comments.
|
||
// Build call __kmpc_parallel_51 | ||
auto PtrTy = Type::getInt8PtrTy(M.getContext()); | ||
Value *Void = ConstantPointerNull::get(PtrTy); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Void is a type, not a value. This is a nullptr
, or similar.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. New name: Value *NullPtrValue
Args, Builder.getInt64(NumCapturedVars)}; | ||
|
||
SmallVector<Value *, 16> RealArgs; | ||
RealArgs.append(std::begin(Parallel51CallArgs), std::end(Parallel51CallArgs)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure why you package them into a vector here. Should work fine w/o.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. Removed vector.
RealArgs.push_back(Void); | ||
} | ||
if (IfCondition && RealArgs.back()->getType() != PtrTy) | ||
RealArgs.back() = Builder.CreateBitCast(RealArgs.back(), PtrTy); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This does not look sound, but you just moved it, it's ok.
Okay, I misunderstood the testcase a bit earlier. So, the function This patch LGTM @kiranchandramohan (with Johannes' comments addressed). |
e801022
to
2bb329e
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG, commit the two commits separately though
clang/test/OpenMP/cancel_codegen.cpp
Outdated
// CHECK3-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] | ||
// CHECK3: .omp_outlined..exit: | ||
// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 | ||
// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias ![[NOALIAS1]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Commit that right away as NFC. If you leave it with the PR and you use the web interface it'll smash them right back together.
Added support for LLVM IR code generation which is used for handling omp target parallel code. The call for __kmpc_parallel_51 is generated and the parallel region is outlined to separate function. The proper setup of kmpc_target_init mode is not included in the commit. It is assumed that the SPMD mode for target init is properly set by other codegen functions.
2bb329e
to
65a5f9d
Compare
This property of the `OpenMPIRBuilderConfig` is already initialized in the constructor for `OMPInformationCache`. This looks to be a leftover after a merge from main (comment [here](llvm#67000 (comment))).
…ice (#140) This property of the `OpenMPIRBuilderConfig` is already initialized in the constructor for `OMPInformationCache`. This looks to be a leftover after a merge from main (comment [here](llvm#67000 (comment))).
Added support for LLVM IR code generation which is used for handling omp target parallel code. The call for __kmpc_parallel_51 is generated and the parallel region is outlined to separate function.
The proper setup of kmpc_target_init mode is not included in the commit. It is assumed that the SPMD mode for target initialization is properly set by other codegen functions.
This PR depends on: #66998