From db2f8c7c3bd3f5913747daeca10c6c7baff38c97 Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Wed, 3 Jul 2024 23:23:31 +0100 Subject: [PATCH 1/8] [SYCL][NVPTX] Do not decompose SYCL functor unless necessary CUDA backend can support passing pointer in the generic address space. The patch prevent the decomposition of the SYCL functor if there is no special types in it. Signed-off-by: Victor Lomuller --- clang/lib/Sema/SemaSYCL.cpp | 191 +++++++++++++++++++++++++++--------- 1 file changed, 145 insertions(+), 46 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 7a0b2dede0a7f..7b4787a70427a 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -312,6 +312,13 @@ ExprResult SemaSYCL::BuildSYCLBuiltinBaseTypeExpr(SourceLocation Loc, SYCLBuiltinBaseTypeExpr(Loc, SourceTy, Idx, BaseTy); } +/// Returns true if the target requires a new type. +/// This happens if a pointer to generic cannot be passed +static bool targetRequiresNewType(ASTContext &Context) { + llvm::Triple T = Context.getTargetInfo().getTriple(); + return !T.isNVPTX(); +} + // This information is from Section 4.13 of the SYCL spec // https://www.khronos.org/registry/SYCL/specs/sycl-1.2.1.pdf // This function returns false if the math lib function @@ -1447,6 +1454,19 @@ class KernelObjVisitor { public: KernelObjVisitor(SemaSYCL &S) : SemaSYCLRef(S) {} + static bool useTopLevelKernelObj(const CXXRecordDecl *KernelObj) { + return !(targetRequiresNewType(KernelObj->getASTContext()) || + KernelObj->hasAttr() || + KernelObj->hasAttr()); + } + + template + void visitTopLevelRecord(const CXXRecordDecl *Owner, QualType RecordTy, + HandlerTys &...Handlers) { + (void)std::initializer_list{ + (Handlers.handleTopLevelStruct(Owner, RecordTy), 0)...}; + } + template void VisitRecordBases(const CXXRecordDecl *KernelFunctor, HandlerTys &... Handlers) { @@ -1465,6 +1485,20 @@ class KernelObjVisitor { void visitArray(const CXXRecordDecl *Owner, FieldDecl *Field, QualType ArrayTy, HandlerTys &...Handlers); + // A visitor for Kernel object to functions as defined in + // SyclKernelFieldHandler by iterating over fields and bases + // if they require decomposition or new type. + template + void VisitKernelRecord(const CXXRecordDecl *KernelObj, + QualType KernelFunctorTy, HandlerTys &...Handlers) { + if (!useTopLevelKernelObj(KernelObj)) { + VisitRecordBases(KernelObj, Handlers...); + VisitRecordFields(KernelObj, Handlers...); + } else { + visitTopLevelRecord(KernelObj, KernelFunctorTy, Handlers...); + } + } + // A visitor function that dispatches to functions as defined in // SyclKernelFieldHandler by iterating over a free function parameter list. template @@ -1517,6 +1551,13 @@ class SyclKernelFieldHandlerBase { virtual bool handleOtherType(FieldDecl *, QualType) { return true; } virtual bool handleOtherType(ParmVarDecl *, QualType) { return true; } + // Handle the SYCL kernel as a whole. This applies only when the target can + // support pointer to the generic address space as arguments and the functor + // doesn't have any SYCL special types. + virtual bool handleTopLevelStruct(const CXXRecordDecl *, QualType) { + return true; + } + // Handle a simple struct that doesn't need to be decomposed, only called on // handlers with VisitInsideSimpleContainers as false. Replaces // handleStructType, enterStruct, leaveStruct, and visiting of sub-elements. @@ -2078,10 +2119,9 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { static constexpr const bool VisitNthArrayElement = false; SyclKernelDecompMarker(SemaSYCL &S) : SyclKernelFieldHandler(S) { - // In order to prevent checking this over and over, just add a dummy-base - // entry. - CollectionStack.push_back(true); - PointerStack.push_back(true); + // Base entry. + CollectionStack.push_back(false); + PointerStack.push_back(false); } bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &, @@ -2111,6 +2151,26 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { return true; } + // Add Top level information to ease checks for processor. + bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { + CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); + assert(RD && "should not be null."); + if (CollectionStack.pop_back_val()) { + if (!RD->hasAttr()) + RD->addAttr(SYCLRequiresDecompositionAttr::CreateImplicit( + SemaSYCLRef.getASTContext())); + PointerStack.pop_back(); + } else if (PointerStack.pop_back_val()) { + if (!RD->hasAttr() && + targetRequiresNewType(SemaSYCLRef.getASTContext())) + RD->addAttr(SYCLGenerateNewTypeAttr::CreateImplicit( + SemaSYCLRef.getASTContext())); + } + assert(CollectionStack.size() == 0); + assert(PointerStack.size() == 0); + return true; + } + bool enterStruct(const CXXRecordDecl *, FieldDecl *, QualType) final { CollectionStack.push_back(false); PointerStack.push_back(false); @@ -2138,7 +2198,8 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { PointerStack.pop_back(); } else if (PointerStack.pop_back_val()) { PointerStack.back() = true; - if (!RD->hasAttr()) + if (!RD->hasAttr() && + targetRequiresNewType(SemaSYCLRef.getASTContext())) RD->addAttr(SYCLGenerateNewTypeAttr::CreateImplicit( SemaSYCLRef.getASTContext())); } @@ -2175,7 +2236,8 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { PointerStack.pop_back(); } else if (PointerStack.pop_back_val()) { PointerStack.back() = true; - if (!RD->hasAttr()) + if (!RD->hasAttr() && + targetRequiresNewType(SemaSYCLRef.getASTContext())) RD->addAttr(SYCLGenerateNewTypeAttr::CreateImplicit( SemaSYCLRef.getASTContext())); } @@ -2209,7 +2271,8 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { CollectionStack.back() = true; PointerStack.pop_back(); } else if (PointerStack.pop_back_val()) { - if (!FD->hasAttr()) + if (!FD->hasAttr() && + targetRequiresNewType(SemaSYCLRef.getASTContext())) FD->addAttr(SYCLGenerateNewTypeAttr::CreateImplicit( SemaSYCLRef.getASTContext())); PointerStack.back() = true; @@ -2878,6 +2941,12 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { return true; } + bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { + StringRef Name = "_arg__sycl_functor"; + addParam(Name, Ty); + return true; + } + bool handleNonDecompStruct(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final { // This is a field which should not be decomposed. @@ -3110,6 +3179,11 @@ class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { return true; } + bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { + addParam(Ty); + return true; + } + bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final { addParam(Ty); @@ -3271,6 +3345,12 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { return true; } + bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { + addParam(DC.getParamVarDeclsForCurrentField()[0]->getType(), + "SYCL Functor"); + return true; + } + using SyclKernelFieldHandler::handleNonDecompStruct; bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final { @@ -3311,6 +3391,7 @@ static bool isESIMDKernelType(CXXMethodDecl *CallOperator) { } class SyclKernelBodyCreator : public SyclKernelFieldHandler { + bool UseTopLevelKernelObj; SyclKernelDeclCreator &DeclCreator; llvm::SmallVector BodyStmts; llvm::SmallVector CollectionInitExprs; @@ -3322,7 +3403,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { // nextElement. llvm::SmallVector, 8> ArrayInfos; VarDecl *KernelObjClone; - InitializedEntity VarEntity; + std::optional VarEntity; llvm::SmallVector MemberExprBases; llvm::SmallVector ArrayParamBases; FunctionDecl *KernelCallerFunc; @@ -3353,10 +3434,12 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { // Push the Kernel function scope to ensure the scope isn't empty SemaSYCLRef.SemaRef.PushFunctionScope(); - // Initialize kernel object local clone - assert(CollectionInitExprs.size() == 1 && - "Should have been popped down to just the first one"); - KernelObjClone->setInit(CollectionInitExprs.back()); + if (!UseTopLevelKernelObj) { + // Initialize kernel object local clone + assert(CollectionInitExprs.size() == 1 && + "Should have been popped down to just the first one"); + KernelObjClone->setInit(CollectionInitExprs.back()); + } // Replace references to the kernel object in kernel body, to use the // compiler generated local clone @@ -3480,7 +3563,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { return InitializedEntity::InitializeElement(SemaSYCLRef.getASTContext(), ArrayInfos.back().second, ArrayInfos.back().first); - return InitializedEntity::InitializeMember(FD, &VarEntity); + return InitializedEntity::InitializeMember(FD, &VarEntity.value()); } void addFieldInit(FieldDecl *FD, QualType Ty, MultiExprArg ParamRef) { @@ -3510,7 +3593,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { InitializationKind InitKind) { InitializedEntity Entity = InitializedEntity::InitializeBase( SemaSYCLRef.getASTContext(), &BS, /*IsInheritedVirtualBase*/ false, - &VarEntity); + &VarEntity.value()); InitializationSequence InitSeq(SemaSYCLRef.SemaRef, Entity, InitKind, std::nullopt); ExprResult Init = @@ -3525,7 +3608,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { InitializationKind InitKind, MultiExprArg Args) { InitializedEntity Entity = InitializedEntity::InitializeBase( SemaSYCLRef.getASTContext(), &BS, /*IsInheritedVirtualBase*/ false, - &VarEntity); + &VarEntity.value()); InitializationSequence InitSeq(SemaSYCLRef.SemaRef, Entity, InitKind, Args); ExprResult Init = InitSeq.Perform(SemaSYCLRef.SemaRef, Entity, InitKind, Args); @@ -3541,7 +3624,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { InitializedEntity Entity = InitializedEntity::InitializeBase( SemaSYCLRef.getASTContext(), &BS, /*IsInheritedVirtualBase*/ false, - &VarEntity); + &VarEntity.value()); Expr *ParamRef = createParamReferenceExpr(); InitializationSequence InitSeq(SemaSYCLRef.SemaRef, Entity, InitKind, @@ -3799,8 +3882,8 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { Expr *ArrayRef = createSimpleArrayParamReferenceExpr(FieldTy); InitializationKind InitKind = InitializationKind::CreateDirect({}, {}, {}); - InitializedEntity Entity = - InitializedEntity::InitializeMember(FD, &VarEntity, /*Implicit*/ true); + InitializedEntity Entity = InitializedEntity::InitializeMember( + FD, &VarEntity.value(), /*Implicit*/ true); addFieldInit(FD, FieldTy, ArrayRef, InitKind, Entity); } @@ -3865,27 +3948,33 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { const CXXRecordDecl *KernelObj, FunctionDecl *KernelCallerFunc, bool IsSIMDKernel, CXXMethodDecl *CallOperator) - : SyclKernelFieldHandler(S), DeclCreator(DC), - KernelObjClone(createKernelObjClone(S.getASTContext(), - DC.getKernelDecl(), KernelObj)), - VarEntity(InitializedEntity::InitializeVariable(KernelObjClone)), - KernelCallerFunc(KernelCallerFunc), + : SyclKernelFieldHandler(S), + UseTopLevelKernelObj(KernelObjVisitor::useTopLevelKernelObj(KernelObj)), + DeclCreator(DC), + KernelObjClone(UseTopLevelKernelObj + ? nullptr + : createKernelObjClone(S.getASTContext(), + DC.getKernelDecl(), + KernelObj)), + VarEntity(), KernelCallerFunc(KernelCallerFunc), KernelCallerSrcLoc(KernelCallerFunc->getLocation()), IsESIMD(IsSIMDKernel), CallOperator(CallOperator) { - CollectionInitExprs.push_back(createInitListExpr(KernelObj)); - annotateHierarchicalParallelismAPICalls(); - - Stmt *DS = new (S.getASTContext()) DeclStmt( - DeclGroupRef(KernelObjClone), KernelCallerSrcLoc, KernelCallerSrcLoc); - BodyStmts.push_back(DS); - DeclRefExpr *KernelObjCloneRef = DeclRefExpr::Create( - S.getASTContext(), NestedNameSpecifierLoc(), KernelCallerSrcLoc, - KernelObjClone, false, DeclarationNameInfo(), - QualType(KernelObj->getTypeForDecl(), 0), VK_LValue); - MemberExprBases.push_back(KernelObjCloneRef); + if (!UseTopLevelKernelObj) { + VarEntity.emplace(InitializedEntity::InitializeVariable(KernelObjClone)); + Stmt *DS = new (S.getASTContext()) DeclStmt( + DeclGroupRef(KernelObjClone), KernelCallerSrcLoc, KernelCallerSrcLoc); + BodyStmts.push_back(DS); + CollectionInitExprs.push_back(createInitListExpr(KernelObj)); + DeclRefExpr *KernelObjCloneRef = DeclRefExpr::Create( + S.getASTContext(), NestedNameSpecifierLoc(), KernelCallerSrcLoc, + KernelObjClone, false, DeclarationNameInfo(), + QualType(KernelObj->getTypeForDecl(), 0), VK_LValue); + MemberExprBases.push_back(KernelObjCloneRef); + } } ~SyclKernelBodyCreator() { + annotateHierarchicalParallelismAPICalls(); CompoundStmt *KernelBody = createKernelBody(); DeclCreator.setBody(KernelBody); } @@ -3914,6 +4003,13 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { return true; } + bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { + // As the functor is passed as a whole, use the param as the vardecl + // otherwise used as the clone. + KernelObjClone = DeclCreator.getParamVarDeclsForCurrentField()[0]; + return true; + } + bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final { CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); @@ -4468,6 +4564,11 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { return true; } + bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { + addParam(Ty, SYCLIntegrationHeader::kind_std_layout, /*Offset=*/0); + return true; + } + bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final { addParam(FD, Ty, SYCLIntegrationHeader::kind_std_layout); @@ -4835,6 +4936,8 @@ void SemaSYCL::CheckSYCLKernelCall(FunctionDecl *KernelFunc, Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker, DecompMarker); Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker, DecompMarker); + Visitor.visitTopLevelRecord(KernelObj, GetSYCLKernelObjectType(KernelFunc), + FieldChecker, UnionChecker, DecompMarker); DiagnosingSYCLKernel = false; // Set the kernel function as invalid, if any of the checkers fail validation. @@ -4949,8 +5052,8 @@ void SemaSYCL::SetSYCLKernelNames() { void SemaSYCL::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC) { // The first argument to the KernelCallerFunc is the lambda object. - const CXXRecordDecl *KernelObj = - GetSYCLKernelObjectType(KernelCallerFunc)->getAsCXXRecordDecl(); + QualType KernelObjTy = GetSYCLKernelObjectType(KernelCallerFunc); + const CXXRecordDecl *KernelObj = KernelObjTy->getAsCXXRecordDecl(); assert(KernelObj && "invalid kernel caller"); // Do not visit invalid kernel object. @@ -5005,17 +5108,13 @@ void SemaSYCL::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, // Visit handlers to generate information for optimization record only if // optimization record is saved. if (!getLangOpts().OptRecordFile.empty()) { - Visitor.VisitRecordBases(KernelObj, argsSizeChecker, esimdKernel, - kernel_decl, kernel_body, int_header, int_footer, - opt_report); - Visitor.VisitRecordFields(KernelObj, argsSizeChecker, esimdKernel, - kernel_decl, kernel_body, int_header, int_footer, - opt_report); + Visitor.VisitKernelRecord(KernelObj, KernelObjTy, argsSizeChecker, + esimdKernel, kernel_decl, kernel_body, int_header, + int_footer, opt_report); } else { - Visitor.VisitRecordBases(KernelObj, argsSizeChecker, esimdKernel, - kernel_decl, kernel_body, int_header, int_footer); - Visitor.VisitRecordFields(KernelObj, argsSizeChecker, esimdKernel, - kernel_decl, kernel_body, int_header, int_footer); + Visitor.VisitKernelRecord(KernelObj, KernelObjTy, argsSizeChecker, + esimdKernel, kernel_decl, kernel_body, int_header, + int_footer); } if (ParmVarDecl *KernelHandlerArg = From 1aecb8b1e3f94cd7c4dafef524e706cb056d4eed Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Mon, 8 Jul 2024 13:01:22 +0100 Subject: [PATCH 2/8] "decompose" empty structs to avoid uneeded arguments Signed-off-by: Victor Lomuller --- clang/lib/Sema/SemaSYCL.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 7b4787a70427a..ddd752331b72c 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -1455,9 +1455,17 @@ class KernelObjVisitor { KernelObjVisitor(SemaSYCL &S) : SemaSYCLRef(S) {} static bool useTopLevelKernelObj(const CXXRecordDecl *KernelObj) { - return !(targetRequiresNewType(KernelObj->getASTContext()) || - KernelObj->hasAttr() || - KernelObj->hasAttr()); + // If the kernel is empty, "decompose" it so we don't generate arguments. + if (KernelObj->isEmpty()) + return false; + // FIXME: Workaround to not change large number of tests + // this is covered by the test below. + if (targetRequiresNewType(KernelObj->getASTContext())) + return false; + if (KernelObj->hasAttr() || + KernelObj->hasAttr()) + return false; + return true; } template From 02e07bad05e5d0a65c422ce80a581eebe92f59ce Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Mon, 8 Jul 2024 16:28:19 +0100 Subject: [PATCH 3/8] Update clang tests --- clang/test/CodeGenSYCL/kernel-handler.cpp | 3 +- clang/test/SemaSYCL/kernel-arg-opt-report.cpp | 6 ++-- clang/test/SemaSYCL/kernel-handler.cpp | 32 ++++++------------- 3 files changed, 14 insertions(+), 27 deletions(-) diff --git a/clang/test/CodeGenSYCL/kernel-handler.cpp b/clang/test/CodeGenSYCL/kernel-handler.cpp index efa27788fd57c..e8f3bbf041ef9 100644 --- a/clang/test/CodeGenSYCL/kernel-handler.cpp +++ b/clang/test/CodeGenSYCL/kernel-handler.cpp @@ -23,7 +23,8 @@ void test(int val) { } // ALL: define dso_local{{ spir_kernel | }}void @{{.*}}test_kernel_handler{{[^(]*}} -// ALL-SAME: (i32 noundef %_arg_a, ptr addrspace(1) noundef align 1 %_arg__specialization_constants_buffer) +// NONATIVESUPPORT-SAME: (ptr noundef byval(%class.anon) align 4 %_arg__sycl_functor, ptr addrspace(1) noundef align 1 %_arg__specialization_constants_buffer) +// NATIVESUPPORT-SAME: (i32 noundef %_arg_a, ptr addrspace(1) noundef align 1 %_arg__specialization_constants_buffer) // ALL: %kh = alloca %"class.sycl::_V1::kernel_handler", align 1 // NONATIVESUPPORT: %[[KH:[0-9]+]] = load ptr addrspace(1), ptr %_arg__specialization_constants_buffer.addr, align 8 diff --git a/clang/test/SemaSYCL/kernel-arg-opt-report.cpp b/clang/test/SemaSYCL/kernel-arg-opt-report.cpp index 95f2106d72655..92f19374818ed 100644 --- a/clang/test/SemaSYCL/kernel-arg-opt-report.cpp +++ b/clang/test/SemaSYCL/kernel-arg-opt-report.cpp @@ -348,10 +348,10 @@ int main() { // SPIR-NEXT: String: 'Arg ' // SPIR-NEXT: Argument: '13' // SPIR-NEXT: String: ':' -// SPIR-NEXT: String: '' -// SPIR-NEXT: String: A +// SPIR-NEXT: String: 'Compiler generated argument for decomposed struct/class,' +// SPIR-NEXT: String: KernelFunctor // SPIR-NEXT: String: ' (' -// SPIR-NEXT: String: '' +// SPIR-NEXT: String: 'Field:A, ' // SPIR-NEXT: String: 'Type:' // SPIR-NEXT: String: int // SPIR-NEXT: String: ', ' diff --git a/clang/test/SemaSYCL/kernel-handler.cpp b/clang/test/SemaSYCL/kernel-handler.cpp index 4df4a8d17bc7f..e73c24f6d89e3 100644 --- a/clang/test/SemaSYCL/kernel-handler.cpp +++ b/clang/test/SemaSYCL/kernel-handler.cpp @@ -28,19 +28,12 @@ int main() { } // Check test_kernel_handler parameters -// NONATIVESUPPORT: FunctionDecl {{.*}}test_kernel_handler{{.*}} 'void (int, __global char *)' -// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg_a 'int' +// NONATIVESUPPORT: FunctionDecl {{.*}}test_kernel_handler{{.*}} 'void ((lambda at {{.*}}kernel-handler.cpp{{.*}}), __global char *)' +// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__sycl_functor '(lambda at {{.*}}' // NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__specialization_constants_buffer '__global char *' -// Check declaration and initialization of kernel object local clone -// NONATIVESUPPORT-NEXT: CompoundStmt -// NONATIVESUPPORT-NEXT: DeclStmt -// NONATIVESUPPORT-NEXT: VarDecl {{.*}} cinit -// NONATIVESUPPORT-NEXT: InitListExpr -// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'int' -// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_a' 'int' - // Check declaration and initialization of kernel handler local clone using default constructor +// NONATIVESUPPORT-NEXT: CompoundStmt // NONATIVESUPPORT-NEXT: DeclStmt // NONATIVESUPPORT-NEXT: VarDecl {{.*}} callinit // NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}}'sycl::kernel_handler' 'void () noexcept' @@ -58,26 +51,19 @@ int main() { // NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'void (sycl::kernel_handler) const' lvalue CXXMethod {{.*}} 'operator()' 'void (sycl::kernel_handler) const' // Kernel body with clones // NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'const (lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue -// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue Var {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' +// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue ParmVar {{.*}} '_arg__sycl_functor' '(lambda at {{.*}}kernel-handler.cpp{{.*}})' // NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}} 'sycl::kernel_handler' 'void (const kernel_handler &) noexcept' // NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'const kernel_handler':'const sycl::kernel_handler' lvalue // NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'kernel_handler':'sycl::kernel_handler' lvalue Var {{.*}} 'kh' 'kernel_handler':'sycl::kernel_handler' // Check test_pfwg_kernel_handler parameters -// NONATIVESUPPORT: FunctionDecl {{.*}}test_pfwg_kernel_handler{{.*}} 'void (int, __global char *)' -// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg_a 'int' -// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__specialization_constants_buffer '__global char *' - -// Check declaration and initialization of kernel object local clone -// NONATIVESUPPORT-NEXT: CompoundStmt -// NONATIVESUPPORT-NEXT: DeclStmt -// NONATIVESUPPORT-NEXT: VarDecl {{.*}} cinit -// NONATIVESUPPORT-NEXT: InitListExpr -// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'int' -// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_a' 'int' +// NONATIVESUPPORT: FunctionDecl {{.*}}test_pfwg_kernel_handler{{.*}} 'void ((lambda at {{.*}}kernel-handler.cpp{{.*}}), __global char *)' +// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__sycl_functor '(lambda at {{.*}}kernel-handler.cpp{{.*}})' // NONATIVESUPPORT-NEXT: SYCLScopeAttr {{.*}} Implicit WorkGroup +// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__specialization_constants_buffer '__global char *' // Check declaration and initialization of kernel handler local clone using default constructor +// NONATIVESUPPORT-NEXT: CompoundStmt // NONATIVESUPPORT-NEXT: DeclStmt // NONATIVESUPPORT-NEXT: VarDecl {{.*}} callinit // NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}}'sycl::kernel_handler' 'void () noexcept' @@ -96,7 +82,7 @@ int main() { // Kernel body with clones // NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'const (lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue -// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue Var {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' +// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue ParmVar {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' // NONATIVESUPPORT-NEXT: CXXTemporaryObjectExpr {{.*}} 'group<1>':'sycl::group<>' 'void () noexcept' zeroing // NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}}'kernel_handler':'sycl::kernel_handler' 'void (const kernel_handler &) noexcept' // NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}}'const sycl::kernel_handler' lvalue From e95ad2537ddc91dbef189bfc5f0ad3e87148bd80 Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Mon, 8 Jul 2024 22:42:01 +0100 Subject: [PATCH 4/8] add decomp / no decomp flags, fix e2e test, add test Signed-off-by: Victor Lomuller --- clang/include/clang/Basic/LangOptions.def | 1 + clang/include/clang/Driver/Options.td | 7 +++ clang/lib/Driver/ToolChains/Clang.cpp | 3 + clang/lib/Sema/SemaSYCL.cpp | 3 +- clang/test/SemaSYCL/no-decomp.cpp | 56 +++++++++++++++++++ ...s_ptr_multiple_nodes_different_indices.cpp | 3 +- ...pdate_with_indices_ptr_multiple_params.cpp | 3 +- .../Update/update_with_indices_scalar.cpp | 3 +- .../Update/whole_update_dynamic_param.cpp | 3 +- .../Tracing/usm/queue_single_task_nullptr.cpp | 3 +- .../queue_single_task_released_pointer.cpp | 3 +- 11 files changed, 81 insertions(+), 7 deletions(-) create mode 100644 clang/test/SemaSYCL/no-decomp.cpp diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index ee5af40abe232..3c15696870638 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -302,6 +302,7 @@ ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions") LANGOPT(SYCLExplicitSIMD , 1, 0, "SYCL compilation with explicit SIMD extension") LANGOPT(EnableDAEInSpirKernels , 1, 0, "Enable Dead Argument Elimination in SPIR kernels") +LANGOPT(SYCLDecomposeStruct, 1, 0, "Force top level decomposition of SYCL functor") LANGOPT( SYCLValueFitInMaxInt, 1, 1, "SYCL compiler assumes value fits within MAX_INT for member function of " diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 12a1dd9e9dcd8..f0f3671b29a10 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3992,6 +3992,13 @@ defm sycl_instrument_device_code BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option], " Instrumentation and Tracing " "Technology (ITT) instrumentation intrinsics calls " "(experimental)">>; +defm sycl_decompose_functor + : BoolFOption<"sycl-decompose-functor", + LangOpts<"SYCLDecomposeStruct">, DefaultFalse, + PosFlag, + NegFlag, + BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option], + " decompose SYCL functor if possible (experimental, CUDA only)">>; def flink_huge_device_code : Flag<["-"], "flink-huge-device-code">, Group, HelpText<"Generate and use a custom linker script for huge" " device code sections">; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 9dada7cf351fc..9f3756c48c56c 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5429,6 +5429,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fsycl-allow-func-ptr"); } + Args.AddLastArg(CmdArgs, options::OPT_fsycl_instrument_device_code, + options::OPT_fno_sycl_instrument_device_code); + // Forward -fsycl-instrument-device-code option to cc1. This option will // only be used for SPIR/SPIR-V based targets. if (Triple.isSPIROrSPIRV()) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index ddd752331b72c..fd104f1c8dde9 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2163,7 +2163,8 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); assert(RD && "should not be null."); - if (CollectionStack.pop_back_val()) { + if (CollectionStack.pop_back_val() || + SemaSYCLRef.getLangOpts().SYCLDecomposeStruct) { if (!RD->hasAttr()) RD->addAttr(SYCLRequiresDecompositionAttr::CreateImplicit( SemaSYCLRef.getASTContext())); diff --git a/clang/test/SemaSYCL/no-decomp.cpp b/clang/test/SemaSYCL/no-decomp.cpp new file mode 100644 index 0000000000000..a5dd442cb3acb --- /dev/null +++ b/clang/test/SemaSYCL/no-decomp.cpp @@ -0,0 +1,56 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL,NODECOMP +// RUN: %clang_cc1 -fsycl-is-device -fno-sycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL,NODECOMP +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL,DECOMP + +#include "Inputs/sycl.hpp" + +class with_acc { +public: + int *d; + sycl::accessor AccField; +}; + +class wraping_acc { +public: + with_acc acc; + void operator()() const { + } +}; + +class pointer_wrap { +public: + int *d; + void operator()() const { + } +}; + +class empty { +public: + void operator()() const { + } +}; + +int main() { + sycl::queue q; + + q.submit([&](sycl::handler &cgh) { + wraping_acc acc; + cgh.single_task(acc); + }); + // ALL: FunctionDecl {{.*}} _ZTS11wraping_acc 'void (__wrapper_class, __global char *, sycl::range<1>, sycl::range<1>, sycl::id<1>)' + + q.submit([&](sycl::handler &cgh) { + pointer_wrap ptr; + cgh.single_task(ptr); + }); + // NODECOMP: FunctionDecl {{.*}} _ZTS12pointer_wrap 'void (pointer_wrap)' + // DECOMP: FunctionDecl {{.*}} _ZTS12pointer_wrap 'void (__global int *)' + + q.submit([&](sycl::handler &cgh) { + empty e; + cgh.single_task(e); + }); + // ALL: FunctionDecl {{.*}} _ZTS5empty 'void ()' + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp index 0b5d97dffcccb..57d2cca61a30b 100644 --- a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp +++ b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp @@ -1,4 +1,5 @@ -// RUN: %{build} -o %t.out +// Force decomposition as it assumes arguments are passed individually. +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp index 212074b5450f3..80dadf1b15fe2 100644 --- a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp +++ b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp @@ -1,4 +1,5 @@ -// RUN: %{build} -o %t.out +// Force decomposition as it assumes arguments are passed individually. +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp b/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp index 3c4bb8f189e7a..ab7753fc58eac 100644 --- a/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp +++ b/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp @@ -1,4 +1,5 @@ -// RUN: %{build} -o %t.out +// Force decomposition as it assumes arguments are passed individually. +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp b/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp index f907006c88701..abd3b02a563cf 100644 --- a/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp +++ b/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp @@ -1,4 +1,5 @@ -// RUN: %{build} -o %t.out +// Force decomposition as it assumes arguments are passed individually. +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp b/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp index 6d19f88dd5487..bf44705371162 100644 --- a/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp +++ b/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp @@ -1,5 +1,6 @@ // UNSUPPORTED: windows || hip_amd -// RUN: %{build} -o %t.out +// Force decomposition as it assumes arguments are passed individually. +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: not env SYCL_TRACE_TERMINATE_ON_WARNING=1 %{run} sycl-trace --verify %t.out | FileCheck %s // Test parameter analysis of USM usage diff --git a/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp b/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp index f87717c8efc28..aa8d38754ffc8 100644 --- a/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp +++ b/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp @@ -1,5 +1,6 @@ // UNSUPPORTED: windows || hip_amd -// RUN: %{build} -o %t.out +// Force decomposition as it assumes arguments are passed individually. +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: not env SYCL_TRACE_TERMINATE_ON_WARNING=1 %{run} sycl-trace --verify %t.out | FileCheck %s // Test parameter analysis of USM usage From f41f8f9738daf9f8f9a6a0dc8e593fe89abb4087 Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Tue, 9 Jul 2024 09:21:32 +0100 Subject: [PATCH 5/8] fix test --- clang/lib/Driver/ToolChains/Clang.cpp | 4 ++-- clang/test/Driver/sycl-offload.c | 10 ++++++++++ clang/test/SemaSYCL/no-decomp.cpp | 6 +++--- .../test-e2e/KernelFusion/queue-shortcut-functions.cpp | 2 +- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 9f3756c48c56c..60bff0284b072 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5429,8 +5429,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fsycl-allow-func-ptr"); } - Args.AddLastArg(CmdArgs, options::OPT_fsycl_instrument_device_code, - options::OPT_fno_sycl_instrument_device_code); + Args.AddLastArg(CmdArgs, options::OPT_fsycl_decompose_functor, + options::OPT_fno_sycl_decompose_functor); // Forward -fsycl-instrument-device-code option to cc1. This option will // only be used for SPIR/SPIR-V based targets. diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index a49b48954b4c8..6187f67c78a30 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -837,3 +837,13 @@ // FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK: --dependent-lib=sycl{{[0-9]*}}-previewd // FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}.lib // FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}-preview.lib + +/// ########################################################################### + +/// Check -fsycl-decompose-functor behaviors from source +// RUN: %clang -### -fsycl-decompose-functor -target x86_64-unknown-linux-gnu -fsycl -o %t.out %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-DECOMP %s +// RUN: %clang -### -fno-sycl-decompose-functor -target x86_64-unknown-linux-gnu -fsycl -o %t.out %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-NODECOMP %s +// CHK-DECOMP: -fsycl-decompose-functor +// CHK-NODECOMP: -fno-sycl-decompose-functor diff --git a/clang/test/SemaSYCL/no-decomp.cpp b/clang/test/SemaSYCL/no-decomp.cpp index a5dd442cb3acb..d0ec63ac1867c 100644 --- a/clang/test/SemaSYCL/no-decomp.cpp +++ b/clang/test/SemaSYCL/no-decomp.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL,NODECOMP -// RUN: %clang_cc1 -fsycl-is-device -fno-sycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL,NODECOMP -// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL,DECOMP +// RUN: %clang_cc1 -fsycl-is-device -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=NODECOMP +// RUN: %clang_cc1 -fsycl-is-device -fno-sycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=NODECOMP +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=DECOMP #include "Inputs/sycl.hpp" diff --git a/sycl/test-e2e/KernelFusion/queue-shortcut-functions.cpp b/sycl/test-e2e/KernelFusion/queue-shortcut-functions.cpp index 6adcd29ab3071..370e4dcefe9aa 100644 --- a/sycl/test-e2e/KernelFusion/queue-shortcut-functions.cpp +++ b/sycl/test-e2e/KernelFusion/queue-shortcut-functions.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -o %t.out +// RUN: %{build} %{embed-ir} -fsycl-decompose-functor -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 \ // RUN: | FileCheck %s --implicit-check-not=ERROR From dceb600df1ee6951c5592b0059d13996e150736d Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Wed, 10 Jul 2024 15:11:01 +0100 Subject: [PATCH 6/8] fix grid const test --- clang/test/CodeGenSYCL/nvvm-annotations.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGenSYCL/nvvm-annotations.cpp b/clang/test/CodeGenSYCL/nvvm-annotations.cpp index 858648d901fb7..1d13e22c04fdc 100644 --- a/clang/test/CodeGenSYCL/nvvm-annotations.cpp +++ b/clang/test/CodeGenSYCL/nvvm-annotations.cpp @@ -18,7 +18,7 @@ int main() { } s; q.submit([&](handler &h) { - // CHECK: define{{.*}} void @[[FUNC1:.*kernel_grid_const_params]](ptr noundef byval(%struct.S) align 4 %_arg_s) + // CHECK: define{{.*}} void @[[FUNC1:.*kernel_grid_const_params]](ptr noundef byval(%class.anon) align 4 %_arg__sycl_functor) h.single_task([=]() { (void) s;}); }); From a9abeb34b536591424df6e8d4d0957983a67c300 Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Mon, 15 Jul 2024 20:54:00 +0100 Subject: [PATCH 7/8] apply feedbacks --- clang/lib/Sema/SemaSYCL.cpp | 14 +++++--------- clang/test/SemaSYCL/no-decomp.cpp | 6 +++--- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 020291c6cec14..61fcba6346925 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2169,7 +2169,7 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { } bool handlePointerType(FieldDecl *, QualType) final { - PointerStack.back() = true; + PointerStack.back() = targetRequiresNewType(SemaSYCLRef.getASTContext()); return true; } @@ -2190,8 +2190,7 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { SemaSYCLRef.getASTContext())); PointerStack.pop_back(); } else if (PointerStack.pop_back_val()) { - if (!RD->hasAttr() && - targetRequiresNewType(SemaSYCLRef.getASTContext())) + if (!RD->hasAttr()) RD->addAttr(SYCLGenerateNewTypeAttr::CreateImplicit( SemaSYCLRef.getASTContext())); } @@ -2227,8 +2226,7 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { PointerStack.pop_back(); } else if (PointerStack.pop_back_val()) { PointerStack.back() = true; - if (!RD->hasAttr() && - targetRequiresNewType(SemaSYCLRef.getASTContext())) + if (!RD->hasAttr()) RD->addAttr(SYCLGenerateNewTypeAttr::CreateImplicit( SemaSYCLRef.getASTContext())); } @@ -2265,8 +2263,7 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { PointerStack.pop_back(); } else if (PointerStack.pop_back_val()) { PointerStack.back() = true; - if (!RD->hasAttr() && - targetRequiresNewType(SemaSYCLRef.getASTContext())) + if (!RD->hasAttr()) RD->addAttr(SYCLGenerateNewTypeAttr::CreateImplicit( SemaSYCLRef.getASTContext())); } @@ -2300,8 +2297,7 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { CollectionStack.back() = true; PointerStack.pop_back(); } else if (PointerStack.pop_back_val()) { - if (!FD->hasAttr() && - targetRequiresNewType(SemaSYCLRef.getASTContext())) + if (!FD->hasAttr()) FD->addAttr(SYCLGenerateNewTypeAttr::CreateImplicit( SemaSYCLRef.getASTContext())); PointerStack.back() = true; diff --git a/clang/test/SemaSYCL/no-decomp.cpp b/clang/test/SemaSYCL/no-decomp.cpp index d0ec63ac1867c..65b64424d29d5 100644 --- a/clang/test/SemaSYCL/no-decomp.cpp +++ b/clang/test/SemaSYCL/no-decomp.cpp @@ -10,7 +10,7 @@ class with_acc { sycl::accessor AccField; }; -class wraping_acc { +class wrapping_acc { public: with_acc acc; void operator()() const { @@ -34,10 +34,10 @@ int main() { sycl::queue q; q.submit([&](sycl::handler &cgh) { - wraping_acc acc; + wrapping_acc acc; cgh.single_task(acc); }); - // ALL: FunctionDecl {{.*}} _ZTS11wraping_acc 'void (__wrapper_class, __global char *, sycl::range<1>, sycl::range<1>, sycl::id<1>)' + // ALL: FunctionDecl {{.*}} _ZTS12wrapping_acc 'void (__wrapper_class, __global char *, sycl::range<1>, sycl::range<1>, sycl::id<1>)' q.submit([&](sycl::handler &cgh) { pointer_wrap ptr; From d7f7521b6d97d46878edda70d23ae1b7b63eed42 Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Wed, 17 Jul 2024 15:56:01 +0100 Subject: [PATCH 8/8] Flip make fsycl-decompose-functor the default and the no decompose opt-in --- clang/include/clang/Basic/LangOptions.def | 2 +- clang/include/clang/Driver/Options.td | 2 +- clang/test/CodeGenSYCL/kernel-handler.cpp | 2 +- clang/test/CodeGenSYCL/nvvm-annotations.cpp | 8 ++++---- clang/test/SemaSYCL/kernel-handler.cpp | 2 +- clang/test/SemaSYCL/no-decomp.cpp | 2 +- ..._with_indices_ptr_multiple_nodes_different_indices.cpp | 3 +-- .../Update/update_with_indices_ptr_multiple_params.cpp | 3 +-- sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp | 3 +-- sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp | 3 +-- sycl/test-e2e/KernelFusion/queue-shortcut-functions.cpp | 2 +- sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp | 3 +-- .../Tracing/usm/queue_single_task_released_pointer.cpp | 3 +-- 13 files changed, 16 insertions(+), 22 deletions(-) diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index eed218530a51b..84dad8c7f9473 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -304,7 +304,7 @@ ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions") LANGOPT(SYCLExplicitSIMD , 1, 0, "SYCL compilation with explicit SIMD extension") LANGOPT(EnableDAEInSpirKernels , 1, 0, "Enable Dead Argument Elimination in SPIR kernels") -LANGOPT(SYCLDecomposeStruct, 1, 0, "Force top level decomposition of SYCL functor") +LANGOPT(SYCLDecomposeStruct, 1, 1, "Force top level decomposition of SYCL functor") LANGOPT( SYCLValueFitInMaxInt, 1, 1, "SYCL compiler assumes value fits within MAX_INT for member function of " diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 9bc39ab5eec90..caa30cf7995f0 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4030,7 +4030,7 @@ defm sycl_instrument_device_code "(experimental)">>; defm sycl_decompose_functor : BoolFOption<"sycl-decompose-functor", - LangOpts<"SYCLDecomposeStruct">, DefaultFalse, + LangOpts<"SYCLDecomposeStruct">, DefaultTrue, PosFlag, NegFlag, BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option], diff --git a/clang/test/CodeGenSYCL/kernel-handler.cpp b/clang/test/CodeGenSYCL/kernel-handler.cpp index e8f3bbf041ef9..00e799521a10f 100644 --- a/clang/test/CodeGenSYCL/kernel-handler.cpp +++ b/clang/test/CodeGenSYCL/kernel-handler.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT // RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT // This test checks IR generated when kernel_handler argument diff --git a/clang/test/CodeGenSYCL/nvvm-annotations.cpp b/clang/test/CodeGenSYCL/nvvm-annotations.cpp index 1d13e22c04fdc..d5376fa26db58 100644 --- a/clang/test/CodeGenSYCL/nvvm-annotations.cpp +++ b/clang/test/CodeGenSYCL/nvvm-annotations.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST // Tests that certain SYCL kernel parameters are annotated with "grid_constant" for supported microarchitectures. diff --git a/clang/test/SemaSYCL/kernel-handler.cpp b/clang/test/SemaSYCL/kernel-handler.cpp index e73c24f6d89e3..ec9644a3bec24 100644 --- a/clang/test/SemaSYCL/kernel-handler.cpp +++ b/clang/test/SemaSYCL/kernel-handler.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NONATIVESUPPORT +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fno-sycl-decompose-functor -triple nvptx64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NONATIVESUPPORT // RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NATIVESUPPORT // This test checks that the compiler handles kernel_handler type (for diff --git a/clang/test/SemaSYCL/no-decomp.cpp b/clang/test/SemaSYCL/no-decomp.cpp index 65b64424d29d5..5f61574935c8b 100644 --- a/clang/test/SemaSYCL/no-decomp.cpp +++ b/clang/test/SemaSYCL/no-decomp.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=NODECOMP +// RUN: %clang_cc1 -fsycl-is-device -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=DECOMP // RUN: %clang_cc1 -fsycl-is-device -fno-sycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=NODECOMP // RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=DECOMP diff --git a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp index 57d2cca61a30b..0b5d97dffcccb 100644 --- a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp +++ b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp @@ -1,5 +1,4 @@ -// Force decomposition as it assumes arguments are passed individually. -// RUN: %{build} -fsycl-decompose-functor -o %t.out +// RUN: %{build} -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp index 80dadf1b15fe2..212074b5450f3 100644 --- a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp +++ b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp @@ -1,5 +1,4 @@ -// Force decomposition as it assumes arguments are passed individually. -// RUN: %{build} -fsycl-decompose-functor -o %t.out +// RUN: %{build} -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp b/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp index ab7753fc58eac..3c4bb8f189e7a 100644 --- a/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp +++ b/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp @@ -1,5 +1,4 @@ -// Force decomposition as it assumes arguments are passed individually. -// RUN: %{build} -fsycl-decompose-functor -o %t.out +// RUN: %{build} -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp b/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp index 2d1edad0ec883..c9a4922e7fd46 100644 --- a/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp +++ b/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp @@ -1,5 +1,4 @@ -// Force decomposition as it assumes arguments are passed individually. -// RUN: %{build} -fsycl-decompose-functor -o %t.out +// RUN: %{build} -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/KernelFusion/queue-shortcut-functions.cpp b/sycl/test-e2e/KernelFusion/queue-shortcut-functions.cpp index 370e4dcefe9aa..6adcd29ab3071 100644 --- a/sycl/test-e2e/KernelFusion/queue-shortcut-functions.cpp +++ b/sycl/test-e2e/KernelFusion/queue-shortcut-functions.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} %{embed-ir} -fsycl-decompose-functor -o %t.out +// RUN: %{build} %{embed-ir} -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 \ // RUN: | FileCheck %s --implicit-check-not=ERROR diff --git a/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp b/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp index bf44705371162..6d19f88dd5487 100644 --- a/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp +++ b/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp @@ -1,6 +1,5 @@ // UNSUPPORTED: windows || hip_amd -// Force decomposition as it assumes arguments are passed individually. -// RUN: %{build} -fsycl-decompose-functor -o %t.out +// RUN: %{build} -o %t.out // RUN: not env SYCL_TRACE_TERMINATE_ON_WARNING=1 %{run} sycl-trace --verify %t.out | FileCheck %s // Test parameter analysis of USM usage diff --git a/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp b/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp index aa8d38754ffc8..f87717c8efc28 100644 --- a/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp +++ b/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp @@ -1,6 +1,5 @@ // UNSUPPORTED: windows || hip_amd -// Force decomposition as it assumes arguments are passed individually. -// RUN: %{build} -fsycl-decompose-functor -o %t.out +// RUN: %{build} -o %t.out // RUN: not env SYCL_TRACE_TERMINATE_ON_WARNING=1 %{run} sycl-trace --verify %t.out | FileCheck %s // Test parameter analysis of USM usage