Skip to content

Commit fb0cf01

Browse files
committed
Revert "[OpenMP] Codegen aggregate for outlined function captures"
This reverts commit e9c7291. Fix failing tests
1 parent dd5aa65 commit fb0cf01

File tree

200 files changed

+288183
-322951
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

200 files changed

+288183
-322951
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1284,7 +1284,7 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction(
12841284
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
12851285
HasCancel, OutlinedHelperName);
12861286
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1287-
return CGF.GenerateOpenMPCapturedStmtFunctionAggregate(*CS, D.getBeginLoc());
1287+
return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
12881288
}
12891289

12901290
llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 23 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1513,56 +1513,21 @@ void CGOpenMPRuntimeGPU::emitParallelCall(CodeGenFunction &CGF,
15131513
// TODO: Is that needed?
15141514
CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF);
15151515

1516-
// Store addresses of global arguments to pass to the parallel call.
15171516
Address CapturedVarsAddrs = CGF.CreateDefaultAlignTempAlloca(
15181517
llvm::ArrayType::get(CGM.VoidPtrTy, CapturedVars.size()),
15191518
"captured_vars_addrs");
1520-
1521-
// Store globalized values to push, pop through the global stack.
1522-
SmallVector<llvm::Value *, 4> GlobalValues;
1519+
// There's something to share.
15231520
if (!CapturedVars.empty()) {
1521+
// Prepare for parallel region. Indicate the outlined function.
15241522
ASTContext &Ctx = CGF.getContext();
15251523
unsigned Idx = 0;
15261524
for (llvm::Value *V : CapturedVars) {
15271525
Address Dst = Bld.CreateConstArrayGEP(CapturedVarsAddrs, Idx);
15281526
llvm::Value *PtrV;
15291527
if (V->getType()->isIntegerTy())
15301528
PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy);
1531-
else {
1532-
assert(V->getType()->isPointerTy() &&
1533-
"Expected Pointer Type to globalize.");
1534-
// Globalize and store pointer.
1535-
llvm::Type *PtrElemTy = V->getType()->getPointerElementType();
1536-
auto &DL = CGM.getDataLayout();
1537-
unsigned GlobalSize = DL.getTypeAllocSize(PtrElemTy);
1538-
1539-
/*
1540-
llvm::Value *AllocArgs[] = {CGF.getTypeSize(VD->getType())};
1541-
llvm::Instruction *VoidPtr =
1542-
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1543-
CGM.getModule(),
1544-
OMPRTL___kmpc_alloc_shared), AllocArgs, VD->getName());
1545-
*/
1546-
// Use shared memory to store globalized pointer values, for now this
1547-
// should be the outlined args aggregate struct.
1548-
llvm::Value *GlobalSizeArg[] = {
1549-
llvm::ConstantInt::get(CGM.SizeTy, GlobalSize)};
1550-
llvm::Value *GlobalValue = CGF.EmitRuntimeCall(
1551-
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1552-
OMPRTL___kmpc_alloc_shared),
1553-
GlobalSizeArg);
1554-
GlobalValues.push_back(GlobalValue);
1555-
1556-
llvm::Value *CapturedVarVal = Bld.CreateAlignedLoad(
1557-
PtrElemTy, V, DL.getABITypeAlign(PtrElemTy));
1558-
llvm::Value *GlobalValueCast =
1559-
Bld.CreatePointerBitCastOrAddrSpaceCast(
1560-
GlobalValue, PtrElemTy->getPointerTo());
1561-
Bld.CreateDefaultAlignedStore(CapturedVarVal, GlobalValueCast);
1562-
1563-
PtrV = Bld.CreatePointerBitCastOrAddrSpaceCast(GlobalValue,
1564-
CGF.VoidPtrTy);
1565-
}
1529+
else
1530+
PtrV = Bld.CreatePointerBitCastOrAddrSpaceCast(V, CGF.VoidPtrTy);
15661531
CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false,
15671532
Ctx.getPointerType(Ctx.VoidPtrTy));
15681533
++Idx;
@@ -1575,9 +1540,8 @@ void CGOpenMPRuntimeGPU::emitParallelCall(CodeGenFunction &CGF,
15751540
/* isSigned */ false);
15761541
else
15771542
IfCondVal = llvm::ConstantInt::get(CGF.Int32Ty, 1);
1578-
assert(IfCondVal && "Expected a value");
15791543

1580-
// Create the parallel call.
1544+
assert(IfCondVal && "Expected a value");
15811545
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
15821546
llvm::Value *Args[] = {
15831547
RTLoc,
@@ -1593,13 +1557,6 @@ void CGOpenMPRuntimeGPU::emitParallelCall(CodeGenFunction &CGF,
15931557
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
15941558
CGM.getModule(), OMPRTL___kmpc_parallel_51),
15951559
Args);
1596-
1597-
// Pop any globalized values from the global stack.
1598-
for (auto *V : GlobalValues) {
1599-
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1600-
CGM.getModule(), OMPRTL___kmpc_free_shared),
1601-
V);
1602-
}
16031560
};
16041561

16051562
RegionCodeGenTy RCG(ParallelGen);
@@ -3510,6 +3467,7 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
35103467
D.getBeginLoc(), D.getBeginLoc());
35113468

35123469
const auto *RD = CS.getCapturedRecordDecl();
3470+
auto CurField = RD->field_begin();
35133471

35143472
Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
35153473
/*Name=*/".zero.addr");
@@ -3521,6 +3479,7 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
35213479
Args.emplace_back(ZeroAddr.getPointer());
35223480

35233481
CGBuilderTy &Bld = CGF.Builder;
3482+
auto CI = CS.capture_begin();
35243483

35253484
// Use global memory for data sharing.
35263485
// Handle passing of global args to workers.
@@ -3567,27 +3526,23 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
35673526
++Idx;
35683527
}
35693528
if (CS.capture_size() > 0) {
3570-
auto CI = CS.capture_begin();
3571-
// Load the outlined arg aggregate struct.
35723529
ASTContext &CGFContext = CGF.getContext();
3573-
QualType RecordPointerTy =
3574-
CGFContext.getPointerType(CGFContext.getRecordType(RD));
3575-
Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx);
3576-
Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
3577-
Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(RecordPointerTy)));
3578-
llvm::Value *Arg = CGF.EmitLoadOfScalar(
3579-
TypedAddress,
3580-
/*Volatile=*/false, CGFContext.getPointerType(RecordPointerTy),
3581-
CI->getLocation());
3582-
Args.emplace_back(Arg);
3583-
} else {
3584-
// If there are no captured arguments, use nullptr.
3585-
ASTContext &CGFContext = CGF.getContext();
3586-
QualType RecordPointerTy =
3587-
CGFContext.getPointerType(CGFContext.getRecordType(RD));
3588-
llvm::Value *Arg =
3589-
llvm::Constant::getNullValue(CGF.ConvertTypeForMem(RecordPointerTy));
3590-
Args.emplace_back(Arg);
3530+
for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) {
3531+
QualType ElemTy = CurField->getType();
3532+
Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx);
3533+
Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
3534+
Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy)));
3535+
llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress,
3536+
/*Volatile=*/false,
3537+
CGFContext.getPointerType(ElemTy),
3538+
CI->getLocation());
3539+
if (CI->capturesVariableByCopy() &&
3540+
!CI->getCapturedVar()->getType()->isAnyPointerType()) {
3541+
Arg = castValueToType(CGF, Arg, ElemTy, CGFContext.getUIntPtrType(),
3542+
CI->getLocation());
3543+
}
3544+
Args.emplace_back(Arg);
3545+
}
35913546
}
35923547

35933548
emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedParallelFn, Args);

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 2 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -320,32 +320,6 @@ llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
320320
return CGM.getSize(SizeInChars);
321321
}
322322

323-
void CodeGenFunction::GenerateOpenMPCapturedVarsAggregate(
324-
const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
325-
const RecordDecl *RD = S.getCapturedRecordDecl();
326-
QualType RecordTy = getContext().getRecordType(RD);
327-
// Create the aggregate argument struct for the outlined function.
328-
LValue AggLV = MakeAddrLValue(
329-
CreateMemTemp(RecordTy, "omp.outlined.arg.agg."), RecordTy);
330-
331-
// Initialize the aggregate with captured values.
332-
auto CurField = RD->field_begin();
333-
for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
334-
E = S.capture_init_end();
335-
I != E; ++I, ++CurField) {
336-
LValue LV = EmitLValueForFieldInitialization(AggLV, *CurField);
337-
// Initialize for VLA.
338-
if (CurField->hasCapturedVLAType()) {
339-
EmitLambdaVLACapture(CurField->getCapturedVLAType(), LV);
340-
} else
341-
// Initialize for capturesThis, capturesVariableByCopy,
342-
// capturesVariable
343-
EmitInitializerForField(*CurField, LV, *I);
344-
}
345-
346-
CapturedVars.push_back(AggLV.getPointer(*this));
347-
}
348-
349323
void CodeGenFunction::GenerateOpenMPCapturedVars(
350324
const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
351325
const RecordDecl *RD = S.getCapturedRecordDecl();
@@ -446,101 +420,6 @@ struct FunctionOptions {
446420
};
447421
} // namespace
448422

449-
static llvm::Function *emitOutlinedFunctionPrologueAggregate(
450-
CodeGenFunction &CGF, FunctionArgList &Args,
451-
llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
452-
&LocalAddrs,
453-
llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
454-
&VLASizes,
455-
llvm::Value *&CXXThisValue, const CapturedStmt &CS, SourceLocation Loc,
456-
StringRef FunctionName) {
457-
const CapturedDecl *CD = CS.getCapturedDecl();
458-
const RecordDecl *RD = CS.getCapturedRecordDecl();
459-
assert(CD->hasBody() && "missing CapturedDecl body");
460-
461-
CXXThisValue = nullptr;
462-
// Build the argument list.
463-
CodeGenModule &CGM = CGF.CGM;
464-
ASTContext &Ctx = CGM.getContext();
465-
Args.append(CD->param_begin(), CD->param_end());
466-
467-
// Create the function declaration.
468-
const CGFunctionInfo &FuncInfo =
469-
CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
470-
llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
471-
472-
auto *F =
473-
llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
474-
FunctionName, &CGM.getModule());
475-
CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
476-
if (CD->isNothrow())
477-
F->setDoesNotThrow();
478-
F->setDoesNotRecurse();
479-
480-
// Generate the function.
481-
CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, Loc, Loc);
482-
Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam());
483-
llvm::Value *ContextV = CGF.Builder.CreateLoad(ContextAddr);
484-
LValue ContextLV = CGF.MakeNaturalAlignAddrLValue(
485-
ContextV, CGM.getContext().getTagDeclType(RD));
486-
auto I = CS.captures().begin();
487-
for (const FieldDecl *FD : RD->fields()) {
488-
LValue FieldLV = CGF.EmitLValueForFieldInitialization(ContextLV, FD);
489-
// Do not map arguments if we emit function with non-original types.
490-
Address LocalAddr = FieldLV.getAddress(CGF);
491-
// If we are capturing a pointer by copy we don't need to do anything, just
492-
// use the value that we get from the arguments.
493-
if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
494-
const VarDecl *CurVD = I->getCapturedVar();
495-
LocalAddrs.insert({FD, {CurVD, LocalAddr}});
496-
++I;
497-
continue;
498-
}
499-
500-
LValue ArgLVal =
501-
CGF.MakeAddrLValue(LocalAddr, FD->getType(), AlignmentSource::Decl);
502-
if (FD->hasCapturedVLAType()) {
503-
llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
504-
const VariableArrayType *VAT = FD->getCapturedVLAType();
505-
VLASizes.try_emplace(FD, VAT->getSizeExpr(), ExprArg);
506-
} else if (I->capturesVariable()) {
507-
const VarDecl *Var = I->getCapturedVar();
508-
QualType VarTy = Var->getType();
509-
Address ArgAddr = ArgLVal.getAddress(CGF);
510-
if (ArgLVal.getType()->isLValueReferenceType()) {
511-
ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
512-
} else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
513-
assert(ArgLVal.getType()->isPointerType());
514-
ArgAddr = CGF.EmitLoadOfPointer(
515-
ArgAddr, ArgLVal.getType()->castAs<PointerType>());
516-
}
517-
LocalAddrs.insert(
518-
{FD, {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
519-
} else if (I->capturesVariableByCopy()) {
520-
assert(!FD->getType()->isAnyPointerType() &&
521-
"Not expecting a captured pointer.");
522-
const VarDecl *Var = I->getCapturedVar();
523-
Address CopyAddr = CGF.CreateMemTemp(FD->getType(), Ctx.getDeclAlign(FD),
524-
Var->getName());
525-
LValue CopyLVal =
526-
CGF.MakeAddrLValue(CopyAddr, FD->getType(), AlignmentSource::Decl);
527-
528-
RValue ArgRVal = CGF.EmitLoadOfLValue(ArgLVal, I->getLocation());
529-
CGF.EmitStoreThroughLValue(ArgRVal, CopyLVal);
530-
531-
LocalAddrs.insert({FD, {Var, CopyAddr}});
532-
} else {
533-
// If 'this' is captured, load it into CXXThisValue.
534-
assert(I->capturesThis());
535-
CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
536-
LocalAddrs.insert({FD, {nullptr, ArgLVal.getAddress(CGF)}});
537-
}
538-
++I;
539-
}
540-
541-
return F;
542-
}
543-
544423
static llvm::Function *emitOutlinedFunctionPrologue(
545424
CodeGenFunction &CGF, FunctionArgList &Args,
546425
llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
@@ -716,37 +595,6 @@ static llvm::Function *emitOutlinedFunctionPrologue(
716595
return F;
717596
}
718597

719-
llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunctionAggregate(
720-
const CapturedStmt &S, SourceLocation Loc) {
721-
assert(
722-
CapturedStmtInfo &&
723-
"CapturedStmtInfo should be set when generating the captured function");
724-
const CapturedDecl *CD = S.getCapturedDecl();
725-
// Build the argument list.
726-
FunctionArgList Args;
727-
llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
728-
llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
729-
StringRef FunctionName = CapturedStmtInfo->getHelperName();
730-
llvm::Function *F = emitOutlinedFunctionPrologueAggregate(
731-
*this, Args, LocalAddrs, VLASizes, CXXThisValue, S, Loc, FunctionName);
732-
CodeGenFunction::OMPPrivateScope LocalScope(*this);
733-
for (const auto &LocalAddrPair : LocalAddrs) {
734-
if (LocalAddrPair.second.first) {
735-
LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
736-
return LocalAddrPair.second.second;
737-
});
738-
}
739-
}
740-
(void)LocalScope.Privatize();
741-
for (const auto &VLASizePair : VLASizes)
742-
VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
743-
PGO.assignRegionCounters(GlobalDecl(CD), F);
744-
CapturedStmtInfo->EmitBody(*this, CD->getBody());
745-
(void)LocalScope.ForceCleanup();
746-
FinishFunction(CD->getBodyRBrace());
747-
return F;
748-
}
749-
750598
llvm::Function *
751599
CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
752600
SourceLocation Loc) {
@@ -1737,7 +1585,7 @@ static void emitCommonOMPParallelDirective(
17371585
// The following lambda takes care of appending the lower and upper bound
17381586
// parameters when necessary
17391587
CodeGenBoundParameters(CGF, S, CapturedVars);
1740-
CGF.GenerateOpenMPCapturedVarsAggregate(*CS, CapturedVars);
1588+
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
17411589
CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
17421590
CapturedVars, IfCond);
17431591
}
@@ -6205,7 +6053,7 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
62056053

62066054
OMPTeamsScope Scope(CGF, S);
62076055
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6208-
CGF.GenerateOpenMPCapturedVarsAggregate(*CS, CapturedVars);
6056+
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
62096057
CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
62106058
CapturedVars);
62116059
}

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3283,13 +3283,8 @@ class CodeGenFunction : public CodeGenTypeCache {
32833283
llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K);
32843284
llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
32853285
Address GenerateCapturedStmtArgument(const CapturedStmt &S);
3286-
llvm::Function *
3287-
GenerateOpenMPCapturedStmtFunctionAggregate(const CapturedStmt &S,
3288-
SourceLocation Loc);
32893286
llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
32903287
SourceLocation Loc);
3291-
void GenerateOpenMPCapturedVarsAggregate(
3292-
const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars);
32933288
void GenerateOpenMPCapturedVars(const CapturedStmt &S,
32943289
SmallVectorImpl<llvm::Value *> &CapturedVars);
32953290
void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy,

clang/test/CodeGenCXX/observe-noexcept.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ void ffcomplex (int a) {
99

1010
// CHECK: call { double, double } @__muldc3(double %{{.+}}, double %{{.+}}, double %{{.+}}, double %{{.+}})
1111
dc *= dc;
12-
// CHECK: call {{.+}} @__kmpc_fork_call({{.+}} [[REGNAME1:@.*]] to void (i32*, i32*, ...)*), %struct.anon* %{{.+}})
12+
// CHECK: call {{.+}} @__kmpc_fork_call({{.+}} [[REGNAME1:@.*]] to void (i32*, i32*, ...)*), { double, double }* %{{.+}})
1313
#pragma omp parallel
1414
{
1515
dc *= dc;
@@ -32,7 +32,7 @@ void foo(int a, int b) {
3232

3333
void (*fptr)(void) noexcept = fnoexcp;
3434

35-
// CHECK: call {{.+}} @__kmpc_fork_call({{.+}} [[REGNAME2:@.*]] to void (i32*, i32*, ...)*), %struct.anon.0* %{{.+}})
35+
// CHECK: call {{.+}} @__kmpc_fork_call({{.+}} [[REGNAME2:@.*]] to void (i32*, i32*, ...)*), void ()** %{{.+}})
3636
#pragma omp parallel
3737
{
3838
fptr();

0 commit comments

Comments
 (0)