Skip to content

[SYCL] Implemented SYCL 2020 sub-group size functionality. #3444

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Apr 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
0bf5ea8
Implemented SYCL 2020 sub-group size functionality.
Mar 29, 2021
25a28b3
Apply clang-format changes
Mar 30, 2021
646a227
Another clang-format change?
Mar 30, 2021
5c03d1a
Fix all of Aaron's comments, plus change auto to automatic
Mar 30, 2021
149a832
Fix attr.td param spelling + nullptr placement based on review comments
Mar 30, 2021
2c35a95
Add test for semadeclattr checks, semasycl checks to come
Mar 30, 2021
6be3ccc
A few more comments from Aaron, plus getting the SemaSYCL diagnostics…
Mar 30, 2021
5260caa
More progress on tests, found issue with defined SYCL_KERNEL functions
Mar 30, 2021
10f2ec1
Fix a few more semasycl diagnostics, fix command line arg
Mar 30, 2021
22671a1
Get to a reasonable position so that we can pick this up later.
Mar 31, 2021
cad6ca1
Merge branch 'sycl' into sub_group_size
Apr 9, 2021
5036d3a
Allow string parameters, finish up the sema tests.
Apr 9, 2021
0318b40
Clang-format fixes
Apr 9, 2021
612ea78
more clang-format fixes
Apr 9, 2021
0b2e128
Merge remote-tracking branch 'SYCL_public/sycl' into sub_group_size
Apr 12, 2021
2812d80
Merge remote-tracking branch 'SYCL_public/sycl' into sub_group_size
Apr 12, 2021
b692be1
Aaron's review comments & Merge build failure fixes
Apr 12, 2021
f028166
Revert some inadvertent changes to AttrDocs.td
Apr 12, 2021
1cd4cc2
Clang-format fixes + getting codegen to work right
Apr 12, 2021
c1ab6a0
Add sufficient code-gen tests, fix codegen.
Apr 13, 2021
9e186cf
Fix Aaron's nits
Apr 13, 2021
59b7b5b
Add driver test, fix it the way Mike suggested
Apr 13, 2021
3129b5f
Remove newline to satisfy clang-format
Apr 13, 2021
4fc91bd
Fix check-sycl tests by excluding old-spelling from the test
Apr 13, 2021
ebd9087
Merge remote-tracking branch 'SYCL_public/sycl' into sub_group_size
Apr 14, 2021
51f7617
Add comments to each test
Apr 14, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1213,6 +1213,7 @@ def SYCLSimd : InheritableAttr {
let Subjects = SubjectList<[Function, GlobalVar]>;
let Documentation = [SYCLSimdDocs];
let SupportsNonconformingLambdaSyntax = 1;
let SimpleHandler = 1;
}

// Available in SYCL explicit SIMD extension. Binds a file scope private
Expand Down Expand Up @@ -1431,15 +1432,32 @@ def LoopUnrollHint : StmtAttr {
}

def IntelReqdSubGroupSize: InheritableAttr {
let Spellings = [GNU<"intel_reqd_sub_group_size">,
CXX11<"intel", "reqd_sub_group_size">];
let Spellings = [
GNU<"intel_reqd_sub_group_size">, CXX11<"intel", "reqd_sub_group_size">,
CXX11<"intel", "sub_group_size"> // SYCL2020 spelling.
];
let Args = [ExprArgument<"Value">];
let Subjects = SubjectList<[Function], ErrorDiag>;
let Documentation = [IntelReqdSubGroupSizeDocs];
let LangOpts = [OpenCL, SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
let SupportsNonconformingLambdaSyntax = 1;
let Accessors =
[Accessor<"isSYCL2020Spelling", [CXX11<"intel", "sub_group_size">]>];
}

def IntelNamedSubGroupSize : InheritableAttr {
let Spellings = [CXX11<"intel", "named_sub_group_size">];
let Args = [EnumArgument<"Type", "SubGroupSizeType", ["automatic", "primary"],
["Automatic", "Primary"]>];
let Subjects = SubjectList<[Function], ErrorDiag>;
let Documentation = [IntelNamedSubGroupSizeDocs];
let LangOpts = [OpenCL, SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
let SupportsNonconformingLambdaSyntax = 1;
}

def :
MutualExclusions<[IntelReqdSubGroupSize, IntelNamedSubGroupSize, SYCLSimd]>;

// This attribute is both a type attribute, and a declaration attribute (for
// parameter variables).
def OpenCLAccess : Attr {
Expand Down
52 changes: 50 additions & 2 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -4354,9 +4354,9 @@ This attribute can be used in both OpenCL and SYCL.

OpenCL documentation:
The optional attribute intel_reqd_sub_group_size can be used to indicate that
the kernel must be compiled and executed with the specified subgroup size. When
the kernel must be compiled and executed with the specified sub group size. When
this attribute is present, get_max_sub_group_size() is guaranteed to return the
specified integer value. This is important for the correctness of many subgroup
specified integer value. This is important for the correctness of many sub group
algorithms, and in some cases may be used by the compiler to generate more optimal
code. See `cl_intel_required_subgroup_size
<https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_required_subgroup_size.txt>`
Expand All @@ -4367,6 +4367,13 @@ The [[intel::reqd_sub_group_size(n)]] attribute indicates that the kernel must
be compiled and executed with a sub-group of size n. The value of n must be set
to a sub-group size supported by the device, or device compilation will fail.

The ``[[intel::sub_group_size(n)]]`` attribute has the same effect as the other
attribute spellings, except that it follows the SYCL 2020 Attribute Rules. See
the ``[[intel::named_sub_group_size(NAME)]]`` documentation for clarification.

This attribute is mutually exclusive with ``[[intel::named_sub_group_size(NAME)]]``
and ``[[intel::sycl_explicit_simd]]``.

In addition to device functions, the required sub-group size attribute may also
be specified in the definition of a named functor object and lambda functions,
as in the examples below:
Expand All @@ -4390,6 +4397,47 @@ See Sub-groups for NDRange Parallelism proposal in sycl/doc/extensions/sub_group
}];
}

def IntelNamedSubGroupSizeDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
The ``[[intel::named_sub_group_size(NAME)]]`` attribute works similar to
``[[intel::sub_group_size(N)]]`` attribute in that it defines the specific
sub group size for the kernel. The ``[[intel::named_sub_group_size(NAME)]]``
form accepts a required parameter of either ``automatic`` or ``primary``.

``automatic`` specifies that the implementation is free to select any of the
valid sub-group sizes associated with the device to which the kernel is
submitted. ``primary`` specifies that the implementation should select the
device's primary sub-group size as reported by
``info::device::primary_sub_group_size``.

This attribute may not be combined with ``[[intel::sub_group_size(N)]]``, as
the two attributes have different meanings. It is also mutually exclusive with
``[[intel::sycl_explicit_simd]]``.

In addition to the attributes, a default sub-group size strategy may be
specified by the ``-fsycl-default-sub-group-size`` command line option, which
accepts either ``automatic``, ``primary``, or a default size as an integer.
These values match and have the same behavior as the ``automatic``, ``primary``,
and ``[[intel::sub_group_size(N)]]`` values respectively.

SYCL 2020 Attribute Rules:
SYCL 2020 specifies that kernel-type attributes should only be specified on the
kernel or a ``SYCL_EXTERNAL`` function. This implementation permits these
attributes to appear on all function declarations for the purposes of
self-documenting declarations. However, these attributes must match the kernel's
sub-group size as configured by the command line, or via an attribute
specifically.

In addition to the SYCL 2020 Attribute Rules, this attribute and the
``[[intel::sub_group_size(N)]]`` attribute also require that any
``SYCL_EXTERNAL`` functions defined in a different translation unit must have a
matching sub-group size specification, so ``SYCL_EXTERNAL`` functions not
defined in this translation unit must also have a matching sub-group
specification to the kernel function that calls it.
}];
}

def OpenCLAccessDocs : Documentation {
let Category = DocCatStmt;
let Heading = "__read_only, __write_only, __read_write (read_only, write_only, read_write)";
Expand Down
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -11317,6 +11317,11 @@ def note_spelling_suggestion : Note<
"did you mean to use %0 instead?">;
def warn_attribute_requires_non_negative_integer_argument :
Warning<warn_impcast_integer_sign.Text>, InGroup<AcceptedAttributes>;
def err_sycl_mismatch_group_size
: Error<"%select{kernel-called|undefined 'SYCL_EXTERNAL'}0 function must "
"have a sub group size that matches the size specified for the "
"kernel">;
def note_sycl_kernel_declared_here : Note<"kernel declared here">;

// errors of expect.with.probability
def err_probability_not_constant_float : Error<
Expand Down
8 changes: 8 additions & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,14 @@ LANGOPT(RelativeCXXABIVTables, 1, 0,

LANGOPT(ArmSveVectorBits, 32, 0, "SVE vector size in bits")

ENUM_LANGOPT(DefaultSubGroupSizeType, SubGroupSizeType, 2,
SubGroupSizeType::None,
"Strategy via which sub group is assigned for SYCL kernel "
"types if not overridden via attributes")

VALUE_LANGOPT(DefaultSubGroupSize, 32, 0,
"If DefaultSubGroupSizeType is Integer contains the value")

#undef LANGOPT
#undef COMPATIBLE_LANGOPT
#undef BENIGN_LANGOPT
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/LangOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,8 @@ class LangOptions : public LangOptionsBase {
Single
};

enum class SubGroupSizeType { None, Auto, Primary, Integer };

public:
/// The used language standard.
LangStandard::Kind LangStd;
Expand Down
8 changes: 8 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -5728,6 +5728,14 @@ def sycl_std_EQ : Joined<["-"], "sycl-std=">, Group<sycl_Group>,
MarshallingInfoEnum<LangOpts<"SYCLVersion">, "SYCL_None">,
ShouldParseIf<!strconcat(fsycl_is_device.KeyPath, "||", fsycl_is_host.KeyPath)>;

def fsycl_default_sub_group_size
: Separate<["-"], "fsycl-default-sub-group-size">,
HelpText<"Set the default sub group size for SYCL kernels">,
Flags<[CC1Option]>;
def fsycl_default_sub_group_size_EQ
: Joined<["-"], "fsycl-default-sub-group-size=">,
Alias<fsycl_default_sub_group_size>, Flags<[CC1Option]>;

defm cuda_approx_transcendentals : BoolFOption<"cuda-approx-transcendentals",
LangOpts<"CUDADeviceApproxTranscendentals">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Use">, NegFlag<SetFalse, [], "Don't use">,
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -10275,6 +10275,8 @@ class Sema final {
Expr *E);
IntelReqdSubGroupSizeAttr *
MergeIntelReqdSubGroupSizeAttr(Decl *D, const IntelReqdSubGroupSizeAttr &A);
IntelNamedSubGroupSizeAttr *
MergeIntelNamedSubGroupSizeAttr(Decl *D, const IntelNamedSubGroupSizeAttr &A);
void AddSYCLIntelNumSimdWorkItemsAttr(Decl *D, const AttributeCommonInfo &CI,
Expr *E);
SYCLIntelNumSimdWorkItemsAttr *
Expand Down
48 changes: 43 additions & 5 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ CodeGenFunction::DecodeAddrUsedInPrologue(llvm::Value *F,
void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
llvm::Function *Fn)
{
if (!FD->hasAttr<OpenCLKernelAttr>())
if (!FD->hasAttr<OpenCLKernelAttr>() && !FD->hasAttr<SYCLDeviceAttr>())
return;

// TODO Module identifier is not reliable for this purpose since two modules
Expand All @@ -602,7 +602,8 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,

llvm::LLVMContext &Context = getLLVMContext();

CGM.GenOpenCLArgMetadata(Fn, FD, this);
if (FD->hasAttr<OpenCLKernelAttr>())
CGM.GenOpenCLArgMetadata(Fn, FD, this);

if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) {
QualType HintQTy = A->getTypeHint();
Expand Down Expand Up @@ -648,15 +649,52 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
llvm::MDNode::get(Context, AttrMDArgs));
}

if (const IntelReqdSubGroupSizeAttr *A =
FD->getAttr<IntelReqdSubGroupSizeAttr>()) {
const auto *CE = dyn_cast<ConstantExpr>(A->getValue());
bool IsKernelOrDevice =
FD->hasAttr<SYCLKernelAttr>() || FD->hasAttr<SYCLDeviceAttr>();
const IntelReqdSubGroupSizeAttr *ReqSubGroup =
FD->getAttr<IntelReqdSubGroupSizeAttr>();

// To support the SYCL 2020 spelling with no propagation, only emit for
// kernel-or-device when that spelling, fall-back to old behavior.
if (ReqSubGroup && (IsKernelOrDevice || !ReqSubGroup->isSYCL2020Spelling())) {
const auto *CE = dyn_cast<ConstantExpr>(ReqSubGroup->getValue());
assert(CE && "Not an integer constant expression");
Optional<llvm::APSInt> ArgVal = CE->getResultAsAPSInt();
llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get(
Builder.getInt32(ArgVal->getSExtValue()))};
Fn->setMetadata("intel_reqd_sub_group_size",
llvm::MDNode::get(Context, AttrMDArgs));
} else if (IsKernelOrDevice &&
CGM.getLangOpts().getDefaultSubGroupSizeType() ==
LangOptions::SubGroupSizeType::Integer) {
llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get(
Builder.getInt32(CGM.getLangOpts().DefaultSubGroupSize))};
Fn->setMetadata("intel_reqd_sub_group_size",
llvm::MDNode::get(Context, AttrMDArgs));
}

// SCYL2020 doesn't propagate attributes, so don't put it in an intermediate
// location.
if (IsKernelOrDevice) {
if (const auto *A = FD->getAttr<IntelNamedSubGroupSizeAttr>()) {
llvm::Metadata *AttrMDArgs[] = {llvm::MDString::get(
Context, A->getType() == IntelNamedSubGroupSizeAttr::Primary
? "primary"
: "automatic")};
Fn->setMetadata("intel_reqd_sub_group_size",
llvm::MDNode::get(Context, AttrMDArgs));
} else if (CGM.getLangOpts().getDefaultSubGroupSizeType() ==
LangOptions::SubGroupSizeType::Auto) {
llvm::Metadata *AttrMDArgs[] = {
llvm::MDString::get(Context, "automatic")};
Fn->setMetadata("intel_reqd_sub_group_size",
llvm::MDNode::get(Context, AttrMDArgs));
} else if (CGM.getLangOpts().getDefaultSubGroupSizeType() ==
LangOptions::SubGroupSizeType::Primary) {
llvm::Metadata *AttrMDArgs[] = {llvm::MDString::get(Context, "primary")};
Fn->setMetadata("intel_reqd_sub_group_size",
llvm::MDNode::get(Context, AttrMDArgs));
}
}

if (FD->hasAttr<SYCLSimdAttr>()) {
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4394,6 +4394,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
HeaderOpt.append(Header);
CmdArgs.push_back(Args.MakeArgString(HeaderOpt));
}

// Forward -fsycl-default-sub-group-size if in SYCL mode.
Args.AddLastArg(CmdArgs, options::OPT_fsycl_default_sub_group_size);
}

if (IsSYCL) {
Expand Down
36 changes: 36 additions & 0 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3502,6 +3502,20 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,
LangOptions::SignReturnAddressKeyKind::BKey)
GenerateArg(Args, OPT_msign_return_address_key_EQ, "b_key", SA);

switch (Opts.getDefaultSubGroupSizeType()) {
case LangOptions::SubGroupSizeType::Auto:
GenerateArg(Args, OPT_fsycl_default_sub_group_size, "automatic", SA);
break;
case LangOptions::SubGroupSizeType::Primary:
GenerateArg(Args, OPT_fsycl_default_sub_group_size, "primary", SA);
break;
case LangOptions::SubGroupSizeType::Integer:
GenerateArg(Args, OPT_fsycl_default_sub_group_size,
Twine(Opts.DefaultSubGroupSize), SA);
break;
case LangOptions::SubGroupSizeType::None:
break;
}
}

bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
Expand Down Expand Up @@ -3592,6 +3606,28 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
}
}

// Parse SYCL Default Sub group size.
if (const Arg *A = Args.getLastArg(OPT_fsycl_default_sub_group_size)) {
StringRef Value = A->getValue();
Opts.setDefaultSubGroupSizeType(
llvm::StringSwitch<LangOptions::SubGroupSizeType>(Value)
.Case("automatic", LangOptions::SubGroupSizeType::Auto)
.Case("primary", LangOptions::SubGroupSizeType::Primary)
.Default(LangOptions::SubGroupSizeType::Integer));

if (Opts.getDefaultSubGroupSizeType() ==
LangOptions::SubGroupSizeType::Integer) {
int64_t IntResult;
if (!Value.getAsInteger(10, IntResult)) {
Opts.DefaultSubGroupSize = IntResult;
} else {
Diags.Report(diag::err_drv_invalid_value)
<< A->getAsString(Args) << A->getValue();
Opts.setDefaultSubGroupSizeType(LangOptions::SubGroupSizeType::None);
}
}
}

// These need to be parsed now. They are used to set OpenCL defaults.
Opts.IncludeDefaultHeader = Args.hasArg(OPT_finclude_default_header);
Opts.DeclareOpenCLBuiltins = Args.hasArg(OPT_fdeclare_opencl_builtins);
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2623,6 +2623,8 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
NewAttr = S.mergeEnforceTCBLeafAttr(D, *TCBLA);
else if (const auto *A = dyn_cast<IntelReqdSubGroupSizeAttr>(Attr))
NewAttr = S.MergeIntelReqdSubGroupSizeAttr(D, *A);
else if (const auto *A = dyn_cast<IntelNamedSubGroupSizeAttr>(Attr))
NewAttr = S.MergeIntelNamedSubGroupSizeAttr(D, *A);
else if (const auto *A = dyn_cast<SYCLIntelNumSimdWorkItemsAttr>(Attr))
NewAttr = S.MergeSYCLIntelNumSimdWorkItemsAttr(D, *A);
else if (const auto *A = dyn_cast<SYCLIntelSchedulerTargetFmaxMhzAttr>(Attr))
Expand Down
39 changes: 39 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3206,6 +3206,42 @@ static void handleIntelReqdSubGroupSize(Sema &S, Decl *D,
S.AddIntelReqdSubGroupSize(D, AL, E);
}

IntelNamedSubGroupSizeAttr *
Sema::MergeIntelNamedSubGroupSizeAttr(Decl *D,
const IntelNamedSubGroupSizeAttr &A) {
// Check to see if there's a duplicate attribute with different values
// already applied to the declaration.
if (const auto *DeclAttr = D->getAttr<IntelNamedSubGroupSizeAttr>()) {
if (DeclAttr->getType() != A.getType()) {
Diag(DeclAttr->getLoc(), diag::warn_duplicate_attribute) << &A;
Diag(A.getLoc(), diag::note_previous_attribute);
}
return nullptr;
}

return IntelNamedSubGroupSizeAttr::Create(Context, A.getType(), A);
}

static void handleIntelNamedSubGroupSize(Sema &S, Decl *D,
const ParsedAttr &AL) {
StringRef SizeStr;
SourceLocation Loc;
if (AL.isArgIdent(0)) {
IdentifierLoc *IL = AL.getArgAsIdent(0);
SizeStr = IL->Ident->getName();
Loc = IL->Loc;
} else if (!S.checkStringLiteralArgumentAttr(AL, 0, SizeStr, &Loc)) {
return;
}

IntelNamedSubGroupSizeAttr::SubGroupSizeType SizeType;
if (!IntelNamedSubGroupSizeAttr::ConvertStrToSubGroupSizeType(SizeStr,
SizeType)) {
S.Diag(Loc, diag::warn_attribute_type_not_supported) << AL << SizeStr;
}
D->addAttr(IntelNamedSubGroupSizeAttr::Create(S.Context, SizeType, AL));
}

void Sema::AddSYCLIntelNumSimdWorkItemsAttr(Decl *D,
const AttributeCommonInfo &CI,
Expr *E) {
Expand Down Expand Up @@ -9214,6 +9250,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case ParsedAttr::AT_IntelReqdSubGroupSize:
handleIntelReqdSubGroupSize(S, D, AL);
break;
case ParsedAttr::AT_IntelNamedSubGroupSize:
handleIntelNamedSubGroupSize(S, D, AL);
break;
case ParsedAttr::AT_SYCLIntelNumSimdWorkItems:
handleSYCLIntelNumSimdWorkItemsAttr(S, D, AL);
break;
Expand Down
Loading