Skip to content

Commit 61f415d

Browse files
SC llvm teamSC llvm team
SC llvm team
authored and
SC llvm team
committed
Merged main:d6254e1b2e6d into amd-gfx:a1fa6830c554
Local branch amd-gfx a1fa683 Merged main:dc129d6f715c into amd-gfx:bedba19995b8 Remote branch main d6254e1 Introduce the initial support for OpenMP kernel language (llvm#66844)
2 parents a1fa683 + d6254e1 commit 61f415d

32 files changed

+1324
-711
lines changed

clang/include/clang/AST/OpenMPClause.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9220,6 +9220,27 @@ class OMPXAttributeClause
92209220
}
92219221
};
92229222

9223+
/// This represents 'ompx_bare' clause in the '#pragma omp target teams ...'
9224+
/// directive.
9225+
///
9226+
/// \code
9227+
/// #pragma omp target teams ompx_bare
9228+
/// \endcode
9229+
/// In this example directive '#pragma omp target teams' has a 'ompx_bare'
9230+
/// clause.
9231+
class OMPXBareClause : public OMPNoChildClause<llvm::omp::OMPC_ompx_bare> {
9232+
public:
9233+
/// Build 'ompx_bare' clause.
9234+
///
9235+
/// \param StartLoc Starting location of the clause.
9236+
/// \param EndLoc Ending location of the clause.
9237+
OMPXBareClause(SourceLocation StartLoc, SourceLocation EndLoc)
9238+
: OMPNoChildClause(StartLoc, EndLoc) {}
9239+
9240+
/// Build an empty clause.
9241+
OMPXBareClause() = default;
9242+
};
9243+
92239244
} // namespace clang
92249245

92259246
#endif // LLVM_CLANG_AST_OPENMPCLAUSE_H

clang/include/clang/AST/RecursiveASTVisitor.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3890,6 +3890,11 @@ bool RecursiveASTVisitor<Derived>::VisitOMPXAttributeClause(
38903890
return true;
38913891
}
38923892

3893+
template <typename Derived>
3894+
bool RecursiveASTVisitor<Derived>::VisitOMPXBareClause(OMPXBareClause *C) {
3895+
return true;
3896+
}
3897+
38933898
// FIXME: look at the following tricky-seeming exprs to see if we
38943899
// need to recurse on anything. These are ones that have methods
38953900
// returning decls or qualtypes or nestednamespecifier -- though I'm

clang/include/clang/Basic/DiagnosticParseKinds.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,6 +1360,8 @@ def warn_clause_expected_string : Warning<
13601360
"expected string literal in 'clause %0' - ignoring">, InGroup<IgnoredPragmas>;
13611361
def err_omp_unexpected_clause : Error<
13621362
"unexpected OpenMP clause '%0' in directive '#pragma omp %1'">;
1363+
def err_omp_unexpected_clause_extension_only : Error<
1364+
"OpenMP clause '%0' is only available as extension, use '-fopenmp-extensions'">;
13631365
def err_omp_immediate_directive : Error<
13641366
"'#pragma omp %0' %select{|with '%2' clause }1cannot be an immediate substatement">;
13651367
def err_omp_expected_identifier_for_critical : Error<
@@ -1452,6 +1454,8 @@ def warn_unknown_declare_variant_isa_trait
14521454
"spelling or consider restricting the context selector with the "
14531455
"'arch' selector further">,
14541456
InGroup<SourceUsesOpenMP>;
1457+
def note_ompx_bare_clause : Note<
1458+
"OpenMP extension clause '%0' only allowed with '#pragma omp %1'">;
14551459
def note_omp_declare_variant_ctx_options
14561460
: Note<"context %select{set|selector|property}0 options are: %1">;
14571461
def warn_omp_declare_variant_expected

clang/include/clang/Sema/Sema.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12486,6 +12486,10 @@ class Sema final {
1248612486
SourceLocation LParenLoc,
1248712487
SourceLocation EndLoc);
1248812488

12489+
/// Called on a well-formed 'ompx_bare' clause.
12490+
OMPClause *ActOnOpenMPXBareClause(SourceLocation StartLoc,
12491+
SourceLocation EndLoc);
12492+
1248912493
/// The kind of conversion being performed.
1249012494
enum CheckedConversionKind {
1249112495
/// An implicit conversion.

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,7 @@ class ASTWriter : public ASTDeserializationListener,
613613
/// the module but currently is merely a random 32-bit number.
614614
ASTFileSignature WriteAST(Sema &SemaRef, StringRef OutputFile,
615615
Module *WritingModule, StringRef isysroot,
616+
bool hasErrors = false,
616617
bool ShouldCacheASTInMemory = false);
617618

618619
/// Emit a token.

clang/lib/AST/OpenMPClause.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) {
170170
case OMPC_affinity:
171171
case OMPC_when:
172172
case OMPC_bind:
173+
case OMPC_ompx_bare:
173174
break;
174175
default:
175176
break;
@@ -2546,6 +2547,10 @@ void OMPClausePrinter::VisitOMPXAttributeClause(OMPXAttributeClause *Node) {
25462547
OS << ")";
25472548
}
25482549

2550+
void OMPClausePrinter::VisitOMPXBareClause(OMPXBareClause *Node) {
2551+
OS << "ompx_bare";
2552+
}
2553+
25492554
void OMPTraitInfo::getAsVariantMatchInfo(ASTContext &ASTCtx,
25502555
VariantMatchInfo &VMI) const {
25512556
for (const OMPTraitSet &Set : Sets) {

clang/lib/AST/StmtProfile.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -930,6 +930,7 @@ void OMPClauseProfiler::VisitOMPDoacrossClause(const OMPDoacrossClause *C) {
930930
}
931931
void OMPClauseProfiler::VisitOMPXAttributeClause(const OMPXAttributeClause *C) {
932932
}
933+
void OMPClauseProfiler::VisitOMPXBareClause(const OMPXBareClause *C) {}
933934
} // namespace
934935

935936
void

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -551,10 +551,9 @@ CGOpenMPRuntimeGPU::getExecutionMode() const {
551551
return CurrentExecutionMode;
552552
}
553553

554-
static CGOpenMPRuntimeGPU::DataSharingMode
555-
getDataSharingMode(CodeGenModule &CGM) {
556-
return CGM.getLangOpts().OpenMPCUDAMode ? CGOpenMPRuntimeGPU::CUDA
557-
: CGOpenMPRuntimeGPU::Generic;
554+
CGOpenMPRuntimeGPU::DataSharingMode
555+
CGOpenMPRuntimeGPU::getDataSharingMode() const {
556+
return CurrentDataSharingMode;
558557
}
559558

560559
/// Check for inner (nested) SPMD construct, if any
@@ -752,6 +751,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
752751
EntryFunctionState EST;
753752
WrapperFunctionsMap.clear();
754753

754+
[[maybe_unused]] bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
755+
assert(!IsBareKernel && "bare kernel should not be at generic mode");
756+
755757
// Emit target region as a standalone region.
756758
class NVPTXPrePostActionTy : public PrePostActionTy {
757759
CGOpenMPRuntimeGPU::EntryFunctionState &EST;
@@ -760,15 +762,13 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
760762
NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST)
761763
: EST(EST) {}
762764
void Enter(CodeGenFunction &CGF) override {
763-
auto &RT =
764-
static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
765+
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
765766
RT.emitKernelInit(CGF, EST, /* IsSPMD */ false);
766767
// Skip target region initialization.
767768
RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
768769
}
769770
void Exit(CodeGenFunction &CGF) override {
770-
auto &RT =
771-
static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
771+
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
772772
RT.clearLocThreadIdInsertPt(CGF);
773773
RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ false);
774774
}
@@ -807,25 +807,39 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
807807
ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_SPMD);
808808
EntryFunctionState EST;
809809

810+
bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
811+
810812
// Emit target region as a standalone region.
811813
class NVPTXPrePostActionTy : public PrePostActionTy {
812814
CGOpenMPRuntimeGPU &RT;
813815
CGOpenMPRuntimeGPU::EntryFunctionState &EST;
816+
bool IsBareKernel;
817+
DataSharingMode Mode;
814818

815819
public:
816820
NVPTXPrePostActionTy(CGOpenMPRuntimeGPU &RT,
817-
CGOpenMPRuntimeGPU::EntryFunctionState &EST)
818-
: RT(RT), EST(EST) {}
821+
CGOpenMPRuntimeGPU::EntryFunctionState &EST,
822+
bool IsBareKernel)
823+
: RT(RT), EST(EST), IsBareKernel(IsBareKernel),
824+
Mode(RT.CurrentDataSharingMode) {}
819825
void Enter(CodeGenFunction &CGF) override {
826+
if (IsBareKernel) {
827+
RT.CurrentDataSharingMode = DataSharingMode::DS_CUDA;
828+
return;
829+
}
820830
RT.emitKernelInit(CGF, EST, /* IsSPMD */ true);
821831
// Skip target region initialization.
822832
RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
823833
}
824834
void Exit(CodeGenFunction &CGF) override {
835+
if (IsBareKernel) {
836+
RT.CurrentDataSharingMode = Mode;
837+
return;
838+
}
825839
RT.clearLocThreadIdInsertPt(CGF);
826840
RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ true);
827841
}
828-
} Action(*this, EST);
842+
} Action(*this, EST, IsBareKernel);
829843
CodeGen.setAction(Action);
830844
IsInTTDRegion = true;
831845
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
@@ -843,7 +857,8 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
843857
assert(!ParentName.empty() && "Invalid target region parent name!");
844858

845859
bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
846-
if (Mode)
860+
bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
861+
if (Mode || IsBareKernel)
847862
emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
848863
CodeGen);
849864
else
@@ -863,6 +878,9 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
863878
if (!CGM.getLangOpts().OpenMPIsTargetDevice)
864879
llvm_unreachable("OpenMP can only handle device code.");
865880

881+
if (CGM.getLangOpts().OpenMPCUDAMode)
882+
CurrentDataSharingMode = CGOpenMPRuntimeGPU::DS_CUDA;
883+
866884
llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder();
867885
if (CGM.getLangOpts().NoGPULib || CGM.getLangOpts().OMPHostIRFile.empty())
868886
return;
@@ -1030,7 +1048,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
10301048
void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
10311049
SourceLocation Loc,
10321050
bool WithSPMDCheck) {
1033-
if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
1051+
if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic &&
10341052
getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
10351053
return;
10361054

@@ -1142,7 +1160,7 @@ void CGOpenMPRuntimeGPU::getKmpcFreeShared(
11421160

11431161
void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF,
11441162
bool WithSPMDCheck) {
1145-
if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
1163+
if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic &&
11461164
getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
11471165
return;
11481166

@@ -1178,11 +1196,18 @@ void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF,
11781196
if (!CGF.HaveInsertPoint())
11791197
return;
11801198

1199+
bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
1200+
11811201
Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
11821202
/*Name=*/".zero.addr");
11831203
CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);
11841204
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1185-
OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
1205+
// We don't emit any thread id function call in bare kernel, but because the
1206+
// outlined function has a pointer argument, we emit a nullptr here.
1207+
if (IsBareKernel)
1208+
OutlinedFnArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
1209+
else
1210+
OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
11861211
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
11871212
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
11881213
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -3273,7 +3298,7 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
32733298

32743299
void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
32753300
const Decl *D) {
3276-
if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
3301+
if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)
32773302
return;
32783303

32793304
assert(D && "Expected function or captured|block decl.");
@@ -3382,7 +3407,7 @@ Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF,
33823407
VarTy, Align);
33833408
}
33843409

3385-
if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
3410+
if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)
33863411
return Address::invalid();
33873412

33883413
VD = VD->getCanonicalDecl();

clang/lib/CodeGen/CGOpenMPRuntimeGPU.h

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,18 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
3232
/// Unknown execution mode (orphaned directive).
3333
EM_Unknown,
3434
};
35+
36+
/// Target codegen is specialized based on two data-sharing modes: CUDA, in
37+
/// which the local variables are actually global threadlocal, and Generic, in
38+
/// which the local variables are placed in global memory if they may escape
39+
/// their declaration context.
40+
enum DataSharingMode {
41+
/// CUDA data sharing mode.
42+
DS_CUDA,
43+
/// Generic data-sharing mode.
44+
DS_Generic,
45+
};
46+
3547
private:
3648
/// Parallel outlined function work for workers to execute.
3749
llvm::SmallVector<llvm::Function *, 16> Work;
@@ -42,6 +54,8 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
4254

4355
ExecutionMode getExecutionMode() const;
4456

57+
DataSharingMode getDataSharingMode() const;
58+
4559
/// Get barrier to synchronize all threads in a block.
4660
void syncCTAThreads(CodeGenFunction &CGF);
4761

@@ -297,17 +311,6 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
297311
Address getAddressOfLocalVariable(CodeGenFunction &CGF,
298312
const VarDecl *VD) override;
299313

300-
/// Target codegen is specialized based on two data-sharing modes: CUDA, in
301-
/// which the local variables are actually global threadlocal, and Generic, in
302-
/// which the local variables are placed in global memory if they may escape
303-
/// their declaration context.
304-
enum DataSharingMode {
305-
/// CUDA data sharing mode.
306-
CUDA,
307-
/// Generic data-sharing mode.
308-
Generic,
309-
};
310-
311314
/// Cleans up references to the objects in finished function.
312315
///
313316
void functionFinished(CodeGenFunction &CGF) override;
@@ -343,6 +346,10 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
343346
/// to emit optimized code.
344347
ExecutionMode CurrentExecutionMode = EM_Unknown;
345348

349+
/// Track the data sharing mode when codegening directives within a target
350+
/// region.
351+
DataSharingMode CurrentDataSharingMode = DataSharingMode::DS_Generic;
352+
346353
/// true if currently emitting code for target/teams/distribute region, false
347354
/// - otherwise.
348355
bool IsInTTDRegion = false;

clang/lib/Frontend/ASTUnit.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2341,9 +2341,12 @@ bool ASTUnit::Save(StringRef File) {
23412341
return false;
23422342
}
23432343

2344-
static bool serializeUnit(ASTWriter &Writer, SmallVectorImpl<char> &Buffer,
2345-
Sema &S, raw_ostream &OS) {
2346-
Writer.WriteAST(S, std::string(), nullptr, "");
2344+
static bool serializeUnit(ASTWriter &Writer,
2345+
SmallVectorImpl<char> &Buffer,
2346+
Sema &S,
2347+
bool hasErrors,
2348+
raw_ostream &OS) {
2349+
Writer.WriteAST(S, std::string(), nullptr, "", hasErrors);
23472350

23482351
// Write the generated bitstream to "Out".
23492352
if (!Buffer.empty())
@@ -2353,14 +2356,18 @@ static bool serializeUnit(ASTWriter &Writer, SmallVectorImpl<char> &Buffer,
23532356
}
23542357

23552358
bool ASTUnit::serialize(raw_ostream &OS) {
2359+
// For serialization we are lenient if the errors were only warn-as-error kind.
2360+
bool hasErrors = getDiagnostics().hasUncompilableErrorOccurred();
2361+
23562362
if (WriterData)
2357-
return serializeUnit(WriterData->Writer, WriterData->Buffer, getSema(), OS);
2363+
return serializeUnit(WriterData->Writer, WriterData->Buffer,
2364+
getSema(), hasErrors, OS);
23582365

23592366
SmallString<128> Buffer;
23602367
llvm::BitstreamWriter Stream(Buffer);
23612368
InMemoryModuleCache ModuleCache;
23622369
ASTWriter Writer(Stream, Buffer, ModuleCache, {});
2363-
return serializeUnit(Writer, Buffer, getSema(), OS);
2370+
return serializeUnit(Writer, Buffer, getSema(), hasErrors, OS);
23642371
}
23652372

23662373
using SLocRemap = ContinuousRangeMap<unsigned, int, 2>;

clang/lib/Parse/ParseOpenMP.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3416,6 +3416,17 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
34163416
case OMPC_ompx_attribute:
34173417
Clause = ParseOpenMPOMPXAttributesClause(WrongDirective);
34183418
break;
3419+
case OMPC_ompx_bare:
3420+
if (WrongDirective)
3421+
Diag(Tok, diag::note_ompx_bare_clause)
3422+
<< getOpenMPClauseName(CKind) << "target teams";
3423+
if (!ErrorFound && !getLangOpts().OpenMPExtensions) {
3424+
Diag(Tok, diag::err_omp_unexpected_clause_extension_only)
3425+
<< getOpenMPClauseName(CKind) << getOpenMPDirectiveName(DKind);
3426+
ErrorFound = true;
3427+
}
3428+
Clause = ParseOpenMPClause(CKind, WrongDirective);
3429+
break;
34193430
default:
34203431
break;
34213432
}

clang/lib/Sema/SemaOpenMP.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17553,6 +17553,9 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind,
1755317553
case OMPC_partial:
1755417554
Res = ActOnOpenMPPartialClause(nullptr, StartLoc, /*LParenLoc=*/{}, EndLoc);
1755517555
break;
17556+
case OMPC_ompx_bare:
17557+
Res = ActOnOpenMPXBareClause(StartLoc, EndLoc);
17558+
break;
1755617559
case OMPC_if:
1755717560
case OMPC_final:
1755817561
case OMPC_num_threads:
@@ -24279,3 +24282,8 @@ OMPClause *Sema::ActOnOpenMPXAttributeClause(ArrayRef<const Attr *> Attrs,
2427924282
SourceLocation EndLoc) {
2428024283
return new (Context) OMPXAttributeClause(Attrs, StartLoc, LParenLoc, EndLoc);
2428124284
}
24285+
24286+
OMPClause *Sema::ActOnOpenMPXBareClause(SourceLocation StartLoc,
24287+
SourceLocation EndLoc) {
24288+
return new (Context) OMPXBareClause(StartLoc, EndLoc);
24289+
}

0 commit comments

Comments
 (0)