Skip to content

Commit 347e41c

Browse files
author
Erich Keane
authored
[SYCL] Implemented SYCL 2020 sub-group size functionality. (#3444)
As specified here: https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/SubGroup/SYCL_INTEL_sub_group.asciidoc#attributes This patch implements the named_sub_group_size attribute as well as the command line parameter, and creates a new spelling of reqd_sub_group_size (sub_group_size) to work like the SYCL 2020 version.
1 parent 2045052 commit 347e41c

19 files changed

+581
-19
lines changed

clang/include/clang/Basic/Attr.td

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1213,6 +1213,7 @@ def SYCLSimd : InheritableAttr {
12131213
let Subjects = SubjectList<[Function, GlobalVar]>;
12141214
let Documentation = [SYCLSimdDocs];
12151215
let SupportsNonconformingLambdaSyntax = 1;
1216+
let SimpleHandler = 1;
12161217
}
12171218

12181219
// Available in SYCL explicit SIMD extension. Binds a file scope private
@@ -1431,15 +1432,32 @@ def LoopUnrollHint : StmtAttr {
14311432
}
14321433

14331434
def IntelReqdSubGroupSize: InheritableAttr {
1434-
let Spellings = [GNU<"intel_reqd_sub_group_size">,
1435-
CXX11<"intel", "reqd_sub_group_size">];
1435+
let Spellings = [
1436+
GNU<"intel_reqd_sub_group_size">, CXX11<"intel", "reqd_sub_group_size">,
1437+
CXX11<"intel", "sub_group_size"> // SYCL2020 spelling.
1438+
];
14361439
let Args = [ExprArgument<"Value">];
14371440
let Subjects = SubjectList<[Function], ErrorDiag>;
14381441
let Documentation = [IntelReqdSubGroupSizeDocs];
14391442
let LangOpts = [OpenCL, SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
14401443
let SupportsNonconformingLambdaSyntax = 1;
1444+
let Accessors =
1445+
[Accessor<"isSYCL2020Spelling", [CXX11<"intel", "sub_group_size">]>];
14411446
}
14421447

1448+
def IntelNamedSubGroupSize : InheritableAttr {
1449+
let Spellings = [CXX11<"intel", "named_sub_group_size">];
1450+
let Args = [EnumArgument<"Type", "SubGroupSizeType", ["automatic", "primary"],
1451+
["Automatic", "Primary"]>];
1452+
let Subjects = SubjectList<[Function], ErrorDiag>;
1453+
let Documentation = [IntelNamedSubGroupSizeDocs];
1454+
let LangOpts = [OpenCL, SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
1455+
let SupportsNonconformingLambdaSyntax = 1;
1456+
}
1457+
1458+
def :
1459+
MutualExclusions<[IntelReqdSubGroupSize, IntelNamedSubGroupSize, SYCLSimd]>;
1460+
14431461
// This attribute is both a type attribute, and a declaration attribute (for
14441462
// parameter variables).
14451463
def OpenCLAccess : Attr {

clang/include/clang/Basic/AttrDocs.td

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4354,9 +4354,9 @@ This attribute can be used in both OpenCL and SYCL.
43544354

43554355
OpenCL documentation:
43564356
The optional attribute intel_reqd_sub_group_size can be used to indicate that
4357-
the kernel must be compiled and executed with the specified subgroup size. When
4357+
the kernel must be compiled and executed with the specified sub group size. When
43584358
this attribute is present, get_max_sub_group_size() is guaranteed to return the
4359-
specified integer value. This is important for the correctness of many subgroup
4359+
specified integer value. This is important for the correctness of many sub group
43604360
algorithms, and in some cases may be used by the compiler to generate more optimal
43614361
code. See `cl_intel_required_subgroup_size
43624362
<https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_required_subgroup_size.txt>`
@@ -4367,6 +4367,13 @@ The [[intel::reqd_sub_group_size(n)]] attribute indicates that the kernel must
43674367
be compiled and executed with a sub-group of size n. The value of n must be set
43684368
to a sub-group size supported by the device, or device compilation will fail.
43694369

4370+
The ``[[intel::sub_group_size(n)]]`` attribute has the same effect as the other
4371+
attribute spellings, except that it follows the SYCL 2020 Attribute Rules. See
4372+
the ``[[intel::named_sub_group_size(NAME)]]`` documentation for clarification.
4373+
4374+
This attribute is mutually exclusive with ``[[intel::named_sub_group_size(NAME)]]``
4375+
and ``[[intel::sycl_explicit_simd]]``.
4376+
43704377
In addition to device functions, the required sub-group size attribute may also
43714378
be specified in the definition of a named functor object and lambda functions,
43724379
as in the examples below:
@@ -4390,6 +4397,47 @@ See Sub-groups for NDRange Parallelism proposal in sycl/doc/extensions/sub_group
43904397
}];
43914398
}
43924399

4400+
def IntelNamedSubGroupSizeDocs : Documentation {
4401+
let Category = DocCatFunction;
4402+
let Content = [{
4403+
The ``[[intel::named_sub_group_size(NAME)]]`` attribute works similar to
4404+
``[[intel::sub_group_size(N)]]`` attribute in that it defines the specific
4405+
sub group size for the kernel. The ``[[intel::named_sub_group_size(NAME)]]``
4406+
form accepts a required parameter of either ``automatic`` or ``primary``.
4407+
4408+
``automatic`` specifies that the implementation is free to select any of the
4409+
valid sub-group sizes associated with the device to which the kernel is
4410+
submitted. ``primary`` specifies that the implementation should select the
4411+
device's primary sub-group size as reported by
4412+
``info::device::primary_sub_group_size``.
4413+
4414+
This attribute may not be combined with ``[[intel::sub_group_size(N)]]``, as
4415+
the two attributes have different meanings. It is also mutually exclusive with
4416+
``[[intel::sycl_explicit_simd]]``.
4417+
4418+
In addition to the attributes, a default sub-group size strategy may be
4419+
specified by the ``-fsycl-default-sub-group-size`` command line option, which
4420+
accepts either ``automatic``, ``primary``, or a default size as an integer.
4421+
These values match and have the same behavior as the ``automatic``, ``primary``,
4422+
and ``[[intel::sub_group_size(N)]]`` values respectively.
4423+
4424+
SYCL 2020 Attribute Rules:
4425+
SYCL 2020 specifies that kernel-type attributes should only be specified on the
4426+
kernel or a ``SYCL_EXTERNAL`` function. This implementation permits these
4427+
attributes to appear on all function declarations for the purposes of
4428+
self-documenting declarations. However, these attributes must match the kernel's
4429+
sub-group size as configured by the command line, or via an attribute
4430+
specifically.
4431+
4432+
In addition to the SYCL 2020 Attribute Rules, this attribute and the
4433+
``[[intel::sub_group_size(N)]]`` attribute also require that any
4434+
``SYCL_EXTERNAL`` functions defined in a different translation unit must have a
4435+
matching sub-group size specification, so ``SYCL_EXTERNAL`` functions not
4436+
defined in this translation unit must also have a matching sub-group
4437+
specification to the kernel function that calls it.
4438+
}];
4439+
}
4440+
43934441
def OpenCLAccessDocs : Documentation {
43944442
let Category = DocCatStmt;
43954443
let Heading = "__read_only, __write_only, __read_write (read_only, write_only, read_write)";

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11317,6 +11317,11 @@ def note_spelling_suggestion : Note<
1131711317
"did you mean to use %0 instead?">;
1131811318
def warn_attribute_requires_non_negative_integer_argument :
1131911319
Warning<warn_impcast_integer_sign.Text>, InGroup<AcceptedAttributes>;
11320+
def err_sycl_mismatch_group_size
11321+
: Error<"%select{kernel-called|undefined 'SYCL_EXTERNAL'}0 function must "
11322+
"have a sub group size that matches the size specified for the "
11323+
"kernel">;
11324+
def note_sycl_kernel_declared_here : Note<"kernel declared here">;
1132011325

1132111326
// errors of expect.with.probability
1132211327
def err_probability_not_constant_float : Error<

clang/include/clang/Basic/LangOptions.def

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,14 @@ LANGOPT(RelativeCXXABIVTables, 1, 0,
423423

424424
LANGOPT(ArmSveVectorBits, 32, 0, "SVE vector size in bits")
425425

426+
ENUM_LANGOPT(DefaultSubGroupSizeType, SubGroupSizeType, 2,
427+
SubGroupSizeType::None,
428+
"Strategy via which sub group is assigned for SYCL kernel "
429+
"types if not overridden via attributes")
430+
431+
VALUE_LANGOPT(DefaultSubGroupSize, 32, 0,
432+
"If DefaultSubGroupSizeType is Integer contains the value")
433+
426434
#undef LANGOPT
427435
#undef COMPATIBLE_LANGOPT
428436
#undef BENIGN_LANGOPT

clang/include/clang/Basic/LangOptions.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,8 @@ class LangOptions : public LangOptionsBase {
264264
Single
265265
};
266266

267+
enum class SubGroupSizeType { None, Auto, Primary, Integer };
268+
267269
public:
268270
/// The used language standard.
269271
LangStandard::Kind LangStd;

clang/include/clang/Driver/Options.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5728,6 +5728,14 @@ def sycl_std_EQ : Joined<["-"], "sycl-std=">, Group<sycl_Group>,
57285728
MarshallingInfoEnum<LangOpts<"SYCLVersion">, "SYCL_None">,
57295729
ShouldParseIf<!strconcat(fsycl_is_device.KeyPath, "||", fsycl_is_host.KeyPath)>;
57305730

5731+
def fsycl_default_sub_group_size
5732+
: Separate<["-"], "fsycl-default-sub-group-size">,
5733+
HelpText<"Set the default sub group size for SYCL kernels">,
5734+
Flags<[CC1Option]>;
5735+
def fsycl_default_sub_group_size_EQ
5736+
: Joined<["-"], "fsycl-default-sub-group-size=">,
5737+
Alias<fsycl_default_sub_group_size>, Flags<[CC1Option]>;
5738+
57315739
defm cuda_approx_transcendentals : BoolFOption<"cuda-approx-transcendentals",
57325740
LangOpts<"CUDADeviceApproxTranscendentals">, DefaultFalse,
57335741
PosFlag<SetTrue, [CC1Option], "Use">, NegFlag<SetFalse, [], "Don't use">,

clang/include/clang/Sema/Sema.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10275,6 +10275,8 @@ class Sema final {
1027510275
Expr *E);
1027610276
IntelReqdSubGroupSizeAttr *
1027710277
MergeIntelReqdSubGroupSizeAttr(Decl *D, const IntelReqdSubGroupSizeAttr &A);
10278+
IntelNamedSubGroupSizeAttr *
10279+
MergeIntelNamedSubGroupSizeAttr(Decl *D, const IntelNamedSubGroupSizeAttr &A);
1027810280
void AddSYCLIntelNumSimdWorkItemsAttr(Decl *D, const AttributeCommonInfo &CI,
1027910281
Expr *E);
1028010282
SYCLIntelNumSimdWorkItemsAttr *

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,7 @@ CodeGenFunction::DecodeAddrUsedInPrologue(llvm::Value *F,
592592
void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
593593
llvm::Function *Fn)
594594
{
595-
if (!FD->hasAttr<OpenCLKernelAttr>())
595+
if (!FD->hasAttr<OpenCLKernelAttr>() && !FD->hasAttr<SYCLDeviceAttr>())
596596
return;
597597

598598
// TODO Module identifier is not reliable for this purpose since two modules
@@ -602,7 +602,8 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
602602

603603
llvm::LLVMContext &Context = getLLVMContext();
604604

605-
CGM.GenOpenCLArgMetadata(Fn, FD, this);
605+
if (FD->hasAttr<OpenCLKernelAttr>())
606+
CGM.GenOpenCLArgMetadata(Fn, FD, this);
606607

607608
if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) {
608609
QualType HintQTy = A->getTypeHint();
@@ -648,15 +649,52 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
648649
llvm::MDNode::get(Context, AttrMDArgs));
649650
}
650651

651-
if (const IntelReqdSubGroupSizeAttr *A =
652-
FD->getAttr<IntelReqdSubGroupSizeAttr>()) {
653-
const auto *CE = dyn_cast<ConstantExpr>(A->getValue());
652+
bool IsKernelOrDevice =
653+
FD->hasAttr<SYCLKernelAttr>() || FD->hasAttr<SYCLDeviceAttr>();
654+
const IntelReqdSubGroupSizeAttr *ReqSubGroup =
655+
FD->getAttr<IntelReqdSubGroupSizeAttr>();
656+
657+
// To support the SYCL 2020 spelling with no propagation, only emit for
658+
// kernel-or-device when that spelling, fall-back to old behavior.
659+
if (ReqSubGroup && (IsKernelOrDevice || !ReqSubGroup->isSYCL2020Spelling())) {
660+
const auto *CE = dyn_cast<ConstantExpr>(ReqSubGroup->getValue());
654661
assert(CE && "Not an integer constant expression");
655662
Optional<llvm::APSInt> ArgVal = CE->getResultAsAPSInt();
656663
llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get(
657664
Builder.getInt32(ArgVal->getSExtValue()))};
658665
Fn->setMetadata("intel_reqd_sub_group_size",
659666
llvm::MDNode::get(Context, AttrMDArgs));
667+
} else if (IsKernelOrDevice &&
668+
CGM.getLangOpts().getDefaultSubGroupSizeType() ==
669+
LangOptions::SubGroupSizeType::Integer) {
670+
llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get(
671+
Builder.getInt32(CGM.getLangOpts().DefaultSubGroupSize))};
672+
Fn->setMetadata("intel_reqd_sub_group_size",
673+
llvm::MDNode::get(Context, AttrMDArgs));
674+
}
675+
676+
// SCYL2020 doesn't propagate attributes, so don't put it in an intermediate
677+
// location.
678+
if (IsKernelOrDevice) {
679+
if (const auto *A = FD->getAttr<IntelNamedSubGroupSizeAttr>()) {
680+
llvm::Metadata *AttrMDArgs[] = {llvm::MDString::get(
681+
Context, A->getType() == IntelNamedSubGroupSizeAttr::Primary
682+
? "primary"
683+
: "automatic")};
684+
Fn->setMetadata("intel_reqd_sub_group_size",
685+
llvm::MDNode::get(Context, AttrMDArgs));
686+
} else if (CGM.getLangOpts().getDefaultSubGroupSizeType() ==
687+
LangOptions::SubGroupSizeType::Auto) {
688+
llvm::Metadata *AttrMDArgs[] = {
689+
llvm::MDString::get(Context, "automatic")};
690+
Fn->setMetadata("intel_reqd_sub_group_size",
691+
llvm::MDNode::get(Context, AttrMDArgs));
692+
} else if (CGM.getLangOpts().getDefaultSubGroupSizeType() ==
693+
LangOptions::SubGroupSizeType::Primary) {
694+
llvm::Metadata *AttrMDArgs[] = {llvm::MDString::get(Context, "primary")};
695+
Fn->setMetadata("intel_reqd_sub_group_size",
696+
llvm::MDNode::get(Context, AttrMDArgs));
697+
}
660698
}
661699

662700
if (FD->hasAttr<SYCLSimdAttr>()) {

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4394,6 +4394,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
43944394
HeaderOpt.append(Header);
43954395
CmdArgs.push_back(Args.MakeArgString(HeaderOpt));
43964396
}
4397+
4398+
// Forward -fsycl-default-sub-group-size if in SYCL mode.
4399+
Args.AddLastArg(CmdArgs, options::OPT_fsycl_default_sub_group_size);
43974400
}
43984401

43994402
if (IsSYCL) {

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3502,6 +3502,20 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,
35023502
LangOptions::SignReturnAddressKeyKind::BKey)
35033503
GenerateArg(Args, OPT_msign_return_address_key_EQ, "b_key", SA);
35043504

3505+
switch (Opts.getDefaultSubGroupSizeType()) {
3506+
case LangOptions::SubGroupSizeType::Auto:
3507+
GenerateArg(Args, OPT_fsycl_default_sub_group_size, "automatic", SA);
3508+
break;
3509+
case LangOptions::SubGroupSizeType::Primary:
3510+
GenerateArg(Args, OPT_fsycl_default_sub_group_size, "primary", SA);
3511+
break;
3512+
case LangOptions::SubGroupSizeType::Integer:
3513+
GenerateArg(Args, OPT_fsycl_default_sub_group_size,
3514+
Twine(Opts.DefaultSubGroupSize), SA);
3515+
break;
3516+
case LangOptions::SubGroupSizeType::None:
3517+
break;
3518+
}
35053519
}
35063520

35073521
bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
@@ -3592,6 +3606,28 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
35923606
}
35933607
}
35943608

3609+
// Parse SYCL Default Sub group size.
3610+
if (const Arg *A = Args.getLastArg(OPT_fsycl_default_sub_group_size)) {
3611+
StringRef Value = A->getValue();
3612+
Opts.setDefaultSubGroupSizeType(
3613+
llvm::StringSwitch<LangOptions::SubGroupSizeType>(Value)
3614+
.Case("automatic", LangOptions::SubGroupSizeType::Auto)
3615+
.Case("primary", LangOptions::SubGroupSizeType::Primary)
3616+
.Default(LangOptions::SubGroupSizeType::Integer));
3617+
3618+
if (Opts.getDefaultSubGroupSizeType() ==
3619+
LangOptions::SubGroupSizeType::Integer) {
3620+
int64_t IntResult;
3621+
if (!Value.getAsInteger(10, IntResult)) {
3622+
Opts.DefaultSubGroupSize = IntResult;
3623+
} else {
3624+
Diags.Report(diag::err_drv_invalid_value)
3625+
<< A->getAsString(Args) << A->getValue();
3626+
Opts.setDefaultSubGroupSizeType(LangOptions::SubGroupSizeType::None);
3627+
}
3628+
}
3629+
}
3630+
35953631
// These need to be parsed now. They are used to set OpenCL defaults.
35963632
Opts.IncludeDefaultHeader = Args.hasArg(OPT_finclude_default_header);
35973633
Opts.DeclareOpenCLBuiltins = Args.hasArg(OPT_fdeclare_opencl_builtins);

clang/lib/Sema/SemaDecl.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2623,6 +2623,8 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
26232623
NewAttr = S.mergeEnforceTCBLeafAttr(D, *TCBLA);
26242624
else if (const auto *A = dyn_cast<IntelReqdSubGroupSizeAttr>(Attr))
26252625
NewAttr = S.MergeIntelReqdSubGroupSizeAttr(D, *A);
2626+
else if (const auto *A = dyn_cast<IntelNamedSubGroupSizeAttr>(Attr))
2627+
NewAttr = S.MergeIntelNamedSubGroupSizeAttr(D, *A);
26262628
else if (const auto *A = dyn_cast<SYCLIntelNumSimdWorkItemsAttr>(Attr))
26272629
NewAttr = S.MergeSYCLIntelNumSimdWorkItemsAttr(D, *A);
26282630
else if (const auto *A = dyn_cast<SYCLIntelSchedulerTargetFmaxMhzAttr>(Attr))

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3206,6 +3206,42 @@ static void handleIntelReqdSubGroupSize(Sema &S, Decl *D,
32063206
S.AddIntelReqdSubGroupSize(D, AL, E);
32073207
}
32083208

3209+
IntelNamedSubGroupSizeAttr *
3210+
Sema::MergeIntelNamedSubGroupSizeAttr(Decl *D,
3211+
const IntelNamedSubGroupSizeAttr &A) {
3212+
// Check to see if there's a duplicate attribute with different values
3213+
// already applied to the declaration.
3214+
if (const auto *DeclAttr = D->getAttr<IntelNamedSubGroupSizeAttr>()) {
3215+
if (DeclAttr->getType() != A.getType()) {
3216+
Diag(DeclAttr->getLoc(), diag::warn_duplicate_attribute) << &A;
3217+
Diag(A.getLoc(), diag::note_previous_attribute);
3218+
}
3219+
return nullptr;
3220+
}
3221+
3222+
return IntelNamedSubGroupSizeAttr::Create(Context, A.getType(), A);
3223+
}
3224+
3225+
static void handleIntelNamedSubGroupSize(Sema &S, Decl *D,
3226+
const ParsedAttr &AL) {
3227+
StringRef SizeStr;
3228+
SourceLocation Loc;
3229+
if (AL.isArgIdent(0)) {
3230+
IdentifierLoc *IL = AL.getArgAsIdent(0);
3231+
SizeStr = IL->Ident->getName();
3232+
Loc = IL->Loc;
3233+
} else if (!S.checkStringLiteralArgumentAttr(AL, 0, SizeStr, &Loc)) {
3234+
return;
3235+
}
3236+
3237+
IntelNamedSubGroupSizeAttr::SubGroupSizeType SizeType;
3238+
if (!IntelNamedSubGroupSizeAttr::ConvertStrToSubGroupSizeType(SizeStr,
3239+
SizeType)) {
3240+
S.Diag(Loc, diag::warn_attribute_type_not_supported) << AL << SizeStr;
3241+
}
3242+
D->addAttr(IntelNamedSubGroupSizeAttr::Create(S.Context, SizeType, AL));
3243+
}
3244+
32093245
void Sema::AddSYCLIntelNumSimdWorkItemsAttr(Decl *D,
32103246
const AttributeCommonInfo &CI,
32113247
Expr *E) {
@@ -9214,6 +9250,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
92149250
case ParsedAttr::AT_IntelReqdSubGroupSize:
92159251
handleIntelReqdSubGroupSize(S, D, AL);
92169252
break;
9253+
case ParsedAttr::AT_IntelNamedSubGroupSize:
9254+
handleIntelNamedSubGroupSize(S, D, AL);
9255+
break;
92179256
case ParsedAttr::AT_SYCLIntelNumSimdWorkItems:
92189257
handleSYCLIntelNumSimdWorkItemsAttr(S, D, AL);
92199258
break;

0 commit comments

Comments
 (0)