From a0651db490328a972185e44ff637970b3456406b Mon Sep 17 00:00:00 2001 From: Younan Zhang Date: Wed, 10 Apr 2024 19:23:32 +0800 Subject: [PATCH 001/886] [clang][Sema] Avoid guessing unexpanded packs' size in getFullyPackExpandedSize (#87768) There has been an optimization for `SizeOfPackExprs` since c5452ed9, in which we overlooked a case where the template arguments were not yet formed into a `PackExpansionType` at the token annotation stage. This led to a problem in that a template involving such expressions may lose its nature of being dependent, causing some false-positive diagnostics. Fixes https://github.com/llvm/llvm-project/issues/84220 --- clang/docs/ReleaseNotes.rst | 1 + clang/lib/Sema/SemaTemplateVariadic.cpp | 11 ++++++++++ clang/test/SemaTemplate/alias-templates.cpp | 23 +++++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index f96cebbde3d82..6bff80ed4d210 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -526,6 +526,7 @@ Bug Fixes to C++ Support - Fix crash when inheriting from a cv-qualified type. Fixes: (`#35603 `_) - Fix a crash when the using enum declaration uses an anonymous enumeration. Fixes (#GH86790). +- Handled an edge case in ``getFullyPackExpandedSize`` so that we now avoid a false-positive diagnostic. (#GH84220) - Clang now correctly tracks type dependence of by-value captures in lambdas with an explicit object parameter. Fixes (#GH70604), (#GH79754), (#GH84163), (#GH84425), (#GH86054), (#GH86398), and (#GH86399). diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp index 903fbfd18e779..4909414c0c78d 100644 --- a/clang/lib/Sema/SemaTemplateVariadic.cpp +++ b/clang/lib/Sema/SemaTemplateVariadic.cpp @@ -1243,6 +1243,17 @@ std::optional Sema::getFullyPackExpandedSize(TemplateArgument Arg) { // expanded this pack expansion into the enclosing pack if we could. if (Elem.isPackExpansion()) return std::nullopt; + // Don't guess the size of unexpanded packs. The pack within a template + // argument may have yet to be of a PackExpansion type before we see the + // ellipsis in the annotation stage. + // + // This doesn't mean we would invalidate the optimization: Arg can be an + // unexpanded pack regardless of Elem's dependence. For instance, + // A TemplateArgument that contains either a SubstTemplateTypeParmPackType + // or SubstNonTypeTemplateParmPackExpr is always considered Unexpanded, but + // the underlying TemplateArgument thereof may not. + if (Elem.containsUnexpandedParameterPack()) + return std::nullopt; } return Pack.pack_size(); } diff --git a/clang/test/SemaTemplate/alias-templates.cpp b/clang/test/SemaTemplate/alias-templates.cpp index 8d7cc6118610a..ab5cad72faf1b 100644 --- a/clang/test/SemaTemplate/alias-templates.cpp +++ b/clang/test/SemaTemplate/alias-templates.cpp @@ -236,6 +236,29 @@ namespace PR14858 { void test_q(int (&a)[5]) { Q().f(&a); } } +namespace PR84220 { + +template class list {}; + +template struct foo_impl { + template using f = int; +}; + +template +using foo = typename foo_impl::template f; + +// We call getFullyPackExpandedSize at the annotation stage +// before parsing the ellipsis next to the foo. This happens before +// a PackExpansionType is formed for foo. +// getFullyPackExpandedSize shouldn't determine the value here. Otherwise, +// foo_impl would lose its dependency despite the template +// arguments being unsubstituted. +template using test = list...>; + +test a; + +} + namespace redecl { template using A = int; template using A = int; From 8d206f51497fdf1ceebd6430b2f7d31ef735d0dc Mon Sep 17 00:00:00 2001 From: Edwin Vane Date: Wed, 10 Apr 2024 07:40:35 -0400 Subject: [PATCH 002/886] [clang-tidy] Allow renaming macro arguments (#87792) Although the identifier-naming.cpp lit test expected macro arguments not to be renamed, the code seemed to already allow it. The code was simply not being exercised because a SourceManager argument wasn't being provided. With this change, renaming of macro arguments that expand to renamable decls is permitted. --- .../utils/RenamerClangTidyCheck.cpp | 2 +- clang-tools-extra/docs/ReleaseNotes.rst | 2 +- .../readability/identifier-naming.cpp | 23 +++++++++++++++---- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp index 69b7d40ef628d..ad8048e2a92b7 100644 --- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp @@ -489,7 +489,7 @@ void RenamerClangTidyCheck::checkNamedDecl(const NamedDecl *Decl, } Failure.Info = std::move(Info); - addUsage(Decl, Range); + addUsage(Decl, Range, &SourceMgr); } void RenamerClangTidyCheck::check(const MatchFinder::MatchResult &Result) { diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index a7193e90c38da..b66be44e9f8a6 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -268,7 +268,7 @@ Changes in existing checks ` check in `GetConfigPerFile` mode by resolving symbolic links to header files. Fixed handling of Hungarian Prefix when configured to `LowerCase`. Added support for renaming designated - initializers. + initializers. Added support for renaming macro arguments. - Improved :doc:`readability-implicit-bool-conversion ` check to provide diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp index 57ef4aae5ddb7..99149fe86acee 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp @@ -108,10 +108,12 @@ USER_NS::object g_s2; // NO warnings or fixes expected as USER_NS and object are declared in a header file SYSTEM_MACRO(var1); -// NO warnings or fixes expected as var1 is from macro expansion +// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: invalid case style for global variable 'var1' [readability-identifier-naming] +// CHECK-FIXES: {{^}}SYSTEM_MACRO(g_var1); USER_MACRO(var2); -// NO warnings or fixes expected as var2 is declared in a macro expansion +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: invalid case style for global variable 'var2' [readability-identifier-naming] +// CHECK-FIXES: {{^}}USER_MACRO(g_var2); #define BLA int FOO_bar BLA; @@ -602,9 +604,20 @@ static void static_Function() { // CHECK-FIXES: {{^}}#define MY_TEST_MACRO(X) X() void MY_TEST_Macro(function) {} -// CHECK-FIXES: {{^}}void MY_TEST_MACRO(function) {} -} -} +// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: invalid case style for global function 'function' [readability-identifier-naming] +// CHECK-FIXES: {{^}}void MY_TEST_MACRO(Function) {} + +#define MY_CAT_IMPL(l, r) l ## r +#define MY_CAT(l, r) MY_CAT_IMPL(l, r) +#define MY_MACRO2(foo) int MY_CAT(awesome_, MY_CAT(foo, __COUNTER__)) = 0 +#define MY_MACRO3(foo) int MY_CAT(awesome_, foo) = 0 +MY_MACRO2(myglob); +MY_MACRO3(myglob); +// No suggestions should occur even though the resulting decl of awesome_myglob# +// or awesome_myglob are not entirely within a macro argument. + +} // namespace InlineNamespace +} // namespace FOO_NS template struct a { // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: invalid case style for struct 'a' From f2ade91a9fe7c222ea919748d30b74397911ecc8 Mon Sep 17 00:00:00 2001 From: Jeff Niu Date: Wed, 10 Apr 2024 14:11:45 +0200 Subject: [PATCH 003/886] [mlir] Optimize getting properties on concrete ops (#88259) This makes retrieving properties on concrete operations faster by removing a branch when it is known that the operation must have properties. --- mlir/include/mlir/IR/OpDefinition.h | 2 +- mlir/include/mlir/IR/Operation.h | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/IR/OpDefinition.h b/mlir/include/mlir/IR/OpDefinition.h index c177ae3594d11..2d1dee2303e8f 100644 --- a/mlir/include/mlir/IR/OpDefinition.h +++ b/mlir/include/mlir/IR/OpDefinition.h @@ -1965,7 +1965,7 @@ class Op : public OpState, public Traits... { if constexpr (!hasProperties()) return getEmptyProperties(); return *getOperation() - ->getPropertiesStorage() + ->getPropertiesStorageUnsafe() .template as *>(); } diff --git a/mlir/include/mlir/IR/Operation.h b/mlir/include/mlir/IR/Operation.h index 3ffd3517fe5a6..c52a6fcac10c1 100644 --- a/mlir/include/mlir/IR/Operation.h +++ b/mlir/include/mlir/IR/Operation.h @@ -895,8 +895,7 @@ class alignas(8) Operation final /// Returns the properties storage. OpaqueProperties getPropertiesStorage() { if (propertiesStorageSize) - return { - reinterpret_cast(getTrailingObjects())}; + return getPropertiesStorageUnsafe(); return {nullptr}; } OpaqueProperties getPropertiesStorage() const { @@ -905,6 +904,12 @@ class alignas(8) Operation final getTrailingObjects()))}; return {nullptr}; } + /// Returns the properties storage without checking whether properties are + /// present. + OpaqueProperties getPropertiesStorageUnsafe() { + return { + reinterpret_cast(getTrailingObjects())}; + } /// Return the properties converted to an attribute. /// This is expensive, and mostly useful when dealing with unregistered From 94ed57dab64ccb248a342a91957f390209c5c7ce Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 10 Apr 2024 13:30:29 +0100 Subject: [PATCH 004/886] [PhaseOrdering] Add test for #85551. Add test for missed hoisting of checks from std::span https://github.com/llvm/llvm-project/issues/85551 --- .../AArch64/hoist-runtime-checks.ll | 143 ++++++++++++++++++ 1 file changed, 143 insertions(+) diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll index c6c9a52167d54..a140e17a0dd15 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll @@ -91,8 +91,151 @@ for.end: ; preds = %for.cond.cleanup ret i32 %9 } +%"class.std::__1::span" = type { ptr, i64 } +%"class.std::__1::__wrap_iter" = type { ptr } + +define dso_local noundef i32 @sum_prefix_with_sum(ptr %s.coerce0, i64 %s.coerce1, i64 noundef %n) { +; CHECK-LABEL: define dso_local noundef i32 @sum_prefix_with_sum( +; CHECK-SAME: ptr nocapture readonly [[S_COERCE0:%.*]], i64 [[S_COERCE1:%.*]], i64 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP5_NOT:%.*]] = icmp eq i64 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP5_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 +; CHECK-NEXT: [[DOTNOT_NOT:%.*]] = icmp ult i64 [[TMP0]], [[S_COERCE1]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[RET_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[SPAN_CHECKED_ACCESS_EXIT:%.*]] ] +; CHECK-NEXT: ret i32 [[RET_0_LCSSA]] +; CHECK: for.body: +; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[SPAN_CHECKED_ACCESS_EXIT]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[RET_06:%.*]] = phi i32 [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[SPAN_CHECKED_ACCESS_EXIT]], label [[COND_FALSE_I:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: cond.false.i: +; CHECK-NEXT: tail call void @llvm.trap() +; CHECK-NEXT: unreachable +; CHECK: span_checked_access.exit: +; CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr [[S_COERCE0]], i64 [[I_07]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_I]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP7]], [[RET_06]] +; CHECK-NEXT: [[INC]] = add nuw i64 [[I_07]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] +; +entry: + %s = alloca %"class.std::__1::span", align 8 + %n.addr = alloca i64, align 8 + %ret = alloca i32, align 4 + %i = alloca i64, align 8 + %0 = getelementptr inbounds { ptr, i64 }, ptr %s, i32 0, i32 0 + store ptr %s.coerce0, ptr %0, align 8 + %1 = getelementptr inbounds { ptr, i64 }, ptr %s, i32 0, i32 1 + store i64 %s.coerce1, ptr %1, align 8 + store i64 %n, ptr %n.addr, align 8 + call void @llvm.lifetime.start.p0(i64 4, ptr %ret) #7 + store i32 0, ptr %ret, align 4 + call void @llvm.lifetime.start.p0(i64 8, ptr %i) #7 + store i64 0, ptr %i, align 8 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %2 = load i64, ptr %i, align 8 + %3 = load i64, ptr %n.addr, align 8 + %cmp = icmp ult i64 %2, %3 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 8, ptr %i) #7 + br label %for.end + +for.body: ; preds = %for.cond + %4 = load i64, ptr %i, align 8 + %call = call noundef nonnull align 4 dereferenceable(4) ptr @span_checked_access(ptr noundef nonnull align 8 dereferenceable(16) %s, i64 noundef %4) #7 + %5 = load i32, ptr %call, align 4 + %6 = load i32, ptr %ret, align 4 + %add = add nsw i32 %6, %5 + store i32 %add, ptr %ret, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %7 = load i64, ptr %i, align 8 + %inc = add i64 %7, 1 + store i64 %inc, ptr %i, align 8 + br label %for.cond + +for.end: ; preds = %for.cond.cleanup + %8 = load i32, ptr %ret, align 4 + call void @llvm.lifetime.end.p0(i64 4, ptr %ret) + ret i32 %8 +} + +define hidden noundef nonnull align 4 dereferenceable(4) ptr @span_checked_access(ptr noundef nonnull align 8 dereferenceable(16) %this, i64 noundef %__idx) { +; CHECK-LABEL: define hidden noundef nonnull align 4 dereferenceable(4) ptr @span_checked_access( +; CHECK-SAME: ptr nocapture noundef nonnull readonly align 8 dereferenceable(16) [[THIS:%.*]], i64 noundef [[__IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[__SIZE__I:%.*]] = getelementptr inbounds i8, ptr [[THIS]], i64 8 +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[__SIZE__I]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP0]], [[__IDX]] +; CHECK-NEXT: br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]], !prof [[PROF0]] +; CHECK: cond.false: +; CHECK-NEXT: tail call void @llvm.trap() +; CHECK-NEXT: unreachable +; CHECK: cond.end: +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[__IDX]] +; CHECK-NEXT: ret ptr [[ARRAYIDX]] +; +entry: + %this.addr = alloca ptr, align 8 + %__idx.addr = alloca i64, align 8 + store ptr %this, ptr %this.addr, align 8 + store i64 %__idx, ptr %__idx.addr, align 8 + %this1 = load ptr, ptr %this.addr, align 8 + %0 = load i64, ptr %__idx.addr, align 8 + %call = call noundef i64 @span_access(ptr noundef nonnull align 8 dereferenceable(16) %this1) + %cmp = icmp ult i64 %0, %call + %conv = zext i1 %cmp to i64 + %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1) + %tobool = icmp ne i64 %expval, 0 + br i1 %tobool, label %cond.true, label %cond.false + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + call void @llvm.trap() + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %__data_ = getelementptr inbounds %"class.std::__1::span", ptr %this1, i32 0, i32 0 + %1 = load ptr, ptr %__data_, align 8 + %2 = load i64, ptr %__idx.addr, align 8 + %arrayidx = getelementptr inbounds i32, ptr %1, i64 %2 + ret ptr %arrayidx +} + +define hidden noundef i64 @span_access(ptr noundef nonnull align 8 dereferenceable(16) %this) { +; CHECK-LABEL: define hidden noundef i64 @span_access( +; CHECK-SAME: ptr nocapture noundef nonnull readonly align 8 dereferenceable(16) [[THIS:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[__SIZE_:%.*]] = getelementptr inbounds i8, ptr [[THIS]], i64 8 +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[__SIZE_]], align 8 +; CHECK-NEXT: ret i64 [[TMP0]] +; +entry: + %this.addr = alloca ptr, align 8 + store ptr %this, ptr %this.addr, align 8 + %this1 = load ptr, ptr %this.addr, align 8 + %__size_ = getelementptr inbounds %"class.std::__1::span", ptr %this1, i32 0, i32 1 + %0 = load i64, ptr %__size_, align 8 + ret i64 %0 +} + declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) declare void @llvm.trap() declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 2000, i32 1} +;. From a2bdbc6f0da2a9d0cecb23c64bd20423b3fd0340 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Wed, 10 Apr 2024 14:37:18 +0200 Subject: [PATCH 005/886] [LLD][COFF] Check machine types in ICF::equalsConstant. (#88140) Avoid replacing replacing a chunk with one from a different type. It's mostly a concern for ARM64X, where we don't want to merge aarch64 and arm64ec chunks, but it may also in theory happen between arm64ec and x86_64 chunks. --- lld/COFF/ICF.cpp | 2 +- lld/test/COFF/arm64x-icf.s | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 lld/test/COFF/arm64x-icf.s diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp index 013ffcfb3d5d1..b899a25324239 100644 --- a/lld/COFF/ICF.cpp +++ b/lld/COFF/ICF.cpp @@ -178,7 +178,7 @@ bool ICF::equalsConstant(const SectionChunk *a, const SectionChunk *b) { a->getSectionName() == b->getSectionName() && a->header->SizeOfRawData == b->header->SizeOfRawData && a->checksum == b->checksum && a->getContents() == b->getContents() && - assocEquals(a, b); + a->getMachine() == b->getMachine() && assocEquals(a, b); } // Compare "moving" part of two sections, namely relocation targets. diff --git a/lld/test/COFF/arm64x-icf.s b/lld/test/COFF/arm64x-icf.s new file mode 100644 index 0000000000000..c8df21d3e4969 --- /dev/null +++ b/lld/test/COFF/arm64x-icf.s @@ -0,0 +1,37 @@ +// REQUIRES: aarch64 +// RUN: split-file %s %t.dir && cd %t.dir + +// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows func-arm64ec.s -o func-arm64ec.obj +// RUN: llvm-mc -filetype=obj -triple=aarch64-windows func-arm64.s -o func-arm64.obj +// RUN: lld-link -machine:arm64x -dll -noentry -out:out.dll func-arm64ec.obj func-arm64.obj +// RUN: llvm-objdump -d out.dll | FileCheck %s + +// CHECK: 0000000180001000 <.text>: +// CHECK-NEXT: 180001000: 52800020 mov w0, #0x1 // =1 +// CHECK-NEXT: 180001004: d65f03c0 ret +// CHECK-NEXT: ... +// CHECK-NEXT: 180002000: 52800020 mov w0, #0x1 // =1 +// CHECK-NEXT: 180002004: d65f03c0 ret + + +#--- func-arm64.s + .section .text,"xr",discard,func + .globl func + .p2align 2 +func: + mov w0, #1 + ret + + .data + .rva func + +#--- func-arm64ec.s + .section .text,"xr",discard,"#func" + .globl "#func" + .p2align 2 +"#func": + mov w0, #1 + ret + + .data + .rva "#func" From b47e439559ad03a1b32614f573aad66f145a634d Mon Sep 17 00:00:00 2001 From: Krystian Stasiowski Date: Wed, 10 Apr 2024 08:38:49 -0400 Subject: [PATCH 006/886] Revert "[Clang][Sema] Fix crash when 'this' is used in a dependent class scope function template specialization that instantiates to a static member function" (#88264) Reverts llvm/llvm-project#87541 --- clang/docs/ReleaseNotes.rst | 2 - clang/include/clang/Sema/Sema.h | 8 +--- clang/lib/Sema/SemaExpr.cpp | 5 +-- clang/lib/Sema/SemaExprCXX.cpp | 44 ++++++------------- clang/lib/Sema/SemaExprMember.cpp | 42 +++++------------- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 8 ---- clang/lib/Sema/TreeTransform.h | 7 ++- ...ms-function-specialization-class-scope.cpp | 44 ++----------------- 8 files changed, 36 insertions(+), 124 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6bff80ed4d210..f5359afe1f099 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -520,8 +520,6 @@ Bug Fixes to C++ Support - Fix an issue caused by not handling invalid cases when substituting into the parameter mapping of a constraint. Fixes (#GH86757). - Fixed a bug that prevented member function templates of class templates declared with a deduced return type from being explicitly specialized for a given implicit instantiation of the class template. -- Fixed a crash when ``this`` is used in a dependent class scope function template specialization - that instantiates to a static member function. - Fix crash when inheriting from a cv-qualified type. Fixes: (`#35603 `_) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index f311f9f374345..9769d36900664 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -5439,8 +5439,7 @@ class Sema final : public SemaBase { ExprResult BuildDeclarationNameExpr(const CXXScopeSpec &SS, LookupResult &R, bool NeedsADL, - bool AcceptInvalidDecl = false, - bool NeedUnresolved = false); + bool AcceptInvalidDecl = false); ExprResult BuildDeclarationNameExpr( const CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo, NamedDecl *D, NamedDecl *FoundD = nullptr, @@ -6592,10 +6591,7 @@ class Sema final : public SemaBase { SourceLocation RParenLoc); //// ActOnCXXThis - Parse 'this' pointer. - ExprResult ActOnCXXThis(SourceLocation Loc); - - /// Check whether the type of 'this' is valid in the current context. - bool CheckCXXThisType(SourceLocation Loc, QualType Type); + ExprResult ActOnCXXThis(SourceLocation loc); /// Build a CXXThisExpr and mark it referenced in the current context. Expr *BuildCXXThisExpr(SourceLocation Loc, QualType Type, bool IsImplicit); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 45acbf197ea6b..594c11788f4e7 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -3442,11 +3442,10 @@ static bool ShouldLookupResultBeMultiVersionOverload(const LookupResult &R) { ExprResult Sema::BuildDeclarationNameExpr(const CXXScopeSpec &SS, LookupResult &R, bool NeedsADL, - bool AcceptInvalidDecl, - bool NeedUnresolved) { + bool AcceptInvalidDecl) { // If this is a single, fully-resolved result and we don't need ADL, // just build an ordinary singleton decl ref. - if (!NeedUnresolved && !NeedsADL && R.isSingleResult() && + if (!NeedsADL && R.isSingleResult() && !R.getAsSingle() && !ShouldLookupResultBeMultiVersionOverload(R)) return BuildDeclarationNameExpr(SS, R.getLookupNameInfo(), R.getFoundDecl(), diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 9822477260e59..7b9b8f149d9ed 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1415,42 +1415,26 @@ bool Sema::CheckCXXThisCapture(SourceLocation Loc, const bool Explicit, } ExprResult Sema::ActOnCXXThis(SourceLocation Loc) { - // C++20 [expr.prim.this]p1: - // The keyword this names a pointer to the object for which an - // implicit object member function is invoked or a non-static - // data member's initializer is evaluated. + /// C++ 9.3.2: In the body of a non-static member function, the keyword this + /// is a non-lvalue expression whose value is the address of the object for + /// which the function is called. QualType ThisTy = getCurrentThisType(); - if (CheckCXXThisType(Loc, ThisTy)) - return ExprError(); + if (ThisTy.isNull()) { + DeclContext *DC = getFunctionLevelDeclContext(); - return BuildCXXThisExpr(Loc, ThisTy, /*IsImplicit=*/false); -} + if (const auto *Method = dyn_cast(DC); + Method && Method->isExplicitObjectMemberFunction()) { + return Diag(Loc, diag::err_invalid_this_use) << 1; + } -bool Sema::CheckCXXThisType(SourceLocation Loc, QualType Type) { - if (!Type.isNull()) - return false; + if (isLambdaCallWithExplicitObjectParameter(CurContext)) + return Diag(Loc, diag::err_invalid_this_use) << 1; - // C++20 [expr.prim.this]p3: - // If a declaration declares a member function or member function template - // of a class X, the expression this is a prvalue of type - // "pointer to cv-qualifier-seq X" wherever X is the current class between - // the optional cv-qualifier-seq and the end of the function-definition, - // member-declarator, or declarator. It shall not appear within the - // declaration of either a static member function or an explicit object - // member function of the current class (although its type and value - // category are defined within such member functions as they are within - // an implicit object member function). - DeclContext *DC = getFunctionLevelDeclContext(); - if (const auto *Method = dyn_cast(DC); - Method && Method->isExplicitObjectMemberFunction()) { - Diag(Loc, diag::err_invalid_this_use) << 1; - } else if (isLambdaCallWithExplicitObjectParameter(CurContext)) { - Diag(Loc, diag::err_invalid_this_use) << 1; - } else { - Diag(Loc, diag::err_invalid_this_use) << 0; + return Diag(Loc, diag::err_invalid_this_use) << 0; } - return true; + + return BuildCXXThisExpr(Loc, ThisTy, /*IsImplicit=*/false); } Expr *Sema::BuildCXXThisExpr(SourceLocation Loc, QualType Type, diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp index 8cd2288d279cc..32998ae60eafe 100644 --- a/clang/lib/Sema/SemaExprMember.cpp +++ b/clang/lib/Sema/SemaExprMember.cpp @@ -61,10 +61,6 @@ enum IMAKind { /// The reference is a contextually-permitted abstract member reference. IMA_Abstract, - /// Whether the context is static is dependent on the enclosing template (i.e. - /// in a dependent class scope explicit specialization). - IMA_Dependent, - /// The reference may be to an unresolved using declaration and the /// context is not an instance method. IMA_Unresolved_StaticOrExplicitContext, @@ -95,18 +91,10 @@ static IMAKind ClassifyImplicitMemberAccess(Sema &SemaRef, DeclContext *DC = SemaRef.getFunctionLevelDeclContext(); - bool couldInstantiateToStatic = false; - bool isStaticOrExplicitContext = SemaRef.CXXThisTypeOverride.isNull(); - - if (auto *MD = dyn_cast(DC)) { - if (MD->isImplicitObjectMemberFunction()) { - isStaticOrExplicitContext = false; - // A dependent class scope function template explicit specialization - // that is neither declared 'static' nor with an explicit object - // parameter could instantiate to a static or non-static member function. - couldInstantiateToStatic = MD->getDependentSpecializationInfo(); - } - } + bool isStaticOrExplicitContext = + SemaRef.CXXThisTypeOverride.isNull() && + (!isa(DC) || cast(DC)->isStatic() || + cast(DC)->isExplicitObjectMemberFunction()); if (R.isUnresolvableResult()) return isStaticOrExplicitContext ? IMA_Unresolved_StaticOrExplicitContext @@ -135,9 +123,6 @@ static IMAKind ClassifyImplicitMemberAccess(Sema &SemaRef, if (Classes.empty()) return IMA_Static; - if (couldInstantiateToStatic) - return IMA_Dependent; - // C++11 [expr.prim.general]p12: // An id-expression that denotes a non-static data member or non-static // member function of a class can only be used: @@ -283,30 +268,27 @@ ExprResult Sema::BuildPossibleImplicitMemberExpr( const CXXScopeSpec &SS, SourceLocation TemplateKWLoc, LookupResult &R, const TemplateArgumentListInfo *TemplateArgs, const Scope *S, UnresolvedLookupExpr *AsULE) { - switch (IMAKind Classification = ClassifyImplicitMemberAccess(*this, R)) { + switch (ClassifyImplicitMemberAccess(*this, R)) { case IMA_Instance: + return BuildImplicitMemberExpr(SS, TemplateKWLoc, R, TemplateArgs, true, S); + case IMA_Mixed: case IMA_Mixed_Unrelated: case IMA_Unresolved: - return BuildImplicitMemberExpr( - SS, TemplateKWLoc, R, TemplateArgs, - /*IsKnownInstance=*/Classification == IMA_Instance, S); + return BuildImplicitMemberExpr(SS, TemplateKWLoc, R, TemplateArgs, false, + S); + case IMA_Field_Uneval_Context: Diag(R.getNameLoc(), diag::warn_cxx98_compat_non_static_member_use) << R.getLookupNameInfo().getName(); [[fallthrough]]; case IMA_Static: case IMA_Abstract: - case IMA_Dependent: case IMA_Mixed_StaticOrExplicitContext: case IMA_Unresolved_StaticOrExplicitContext: if (TemplateArgs || TemplateKWLoc.isValid()) - return BuildTemplateIdExpr(SS, TemplateKWLoc, R, /*RequiresADL=*/false, - TemplateArgs); - return AsULE ? AsULE - : BuildDeclarationNameExpr( - SS, R, /*NeedsADL=*/false, /*AcceptInvalidDecl=*/false, - /*NeedUnresolved=*/Classification == IMA_Dependent); + return BuildTemplateIdExpr(SS, TemplateKWLoc, R, false, TemplateArgs); + return AsULE ? AsULE : BuildDeclarationNameExpr(SS, R, false); case IMA_Error_StaticOrExplicitContext: case IMA_Error_Unrelated: diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 8248b10814fea..127a432367b95 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -5093,14 +5093,6 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, EnterExpressionEvaluationContext EvalContext( *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); - Qualifiers ThisTypeQuals; - CXXRecordDecl *ThisContext = nullptr; - if (CXXMethodDecl *Method = dyn_cast(Function)) { - ThisContext = Method->getParent(); - ThisTypeQuals = Method->getMethodQualifiers(); - } - CXXThisScopeRAII ThisScope(*this, ThisContext, ThisTypeQuals); - // Introduce a new scope where local variable instantiations will be // recorded, unless we're actually a member function within a local // class, in which case we need to merge our results with the parent diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 13d7b00430d52..d4d2fa61d65ea 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -3307,13 +3307,12 @@ class TreeTransform { /// Build a new C++ "this" expression. /// - /// By default, performs semantic analysis to build a new "this" expression. - /// Subclasses may override this routine to provide different behavior. + /// By default, builds a new "this" expression without performing any + /// semantic analysis. Subclasses may override this routine to provide + /// different behavior. ExprResult RebuildCXXThisExpr(SourceLocation ThisLoc, QualType ThisType, bool isImplicit) { - if (getSema().CheckCXXThisType(ThisLoc, ThisType)) - return ExprError(); return getSema().BuildCXXThisExpr(ThisLoc, ThisType, isImplicit); } diff --git a/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp b/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp index 6977623a0816e..dcab9bfaeabcb 100644 --- a/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp +++ b/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp @@ -1,6 +1,7 @@ -// RUN: %clang_cc1 -fms-extensions -fsyntax-only -Wno-unused-value -verify %s -// RUN: %clang_cc1 -fms-extensions -fdelayed-template-parsing -fsyntax-only -Wno-unused-value -verify %s +// RUN: %clang_cc1 -fms-extensions -fsyntax-only -verify %s +// RUN: %clang_cc1 -fms-extensions -fdelayed-template-parsing -fsyntax-only -verify %s +// expected-no-diagnostics class A { public: template A(U p) {} @@ -75,42 +76,3 @@ struct S { int f<0>(int); }; } - -namespace UsesThis { - template - struct A { - int x; - - template - static void f(); - - template<> - void f() { - this->x; // expected-error {{invalid use of 'this' outside of a non-static member function}} - x; // expected-error {{invalid use of member 'x' in static member function}} - A::x; // expected-error {{invalid use of member 'x' in static member function}} - +x; // expected-error {{invalid use of member 'x' in static member function}} - +A::x; // expected-error {{invalid use of member 'x' in static member function}} - } - - template - void g(); - - template<> - void g() { - this->x; - x; - A::x; - +x; - +A::x; - } - - template - static auto h() -> A*; - - template<> - auto h() -> decltype(this); // expected-error {{'this' cannot be used in a static member function declaration}} - }; - - template struct A; // expected-note 2{{in instantiation of}} -} From 1ca01958310f2956abd72ece1652c3218bcf27e1 Mon Sep 17 00:00:00 2001 From: Krystian Stasiowski Date: Wed, 10 Apr 2024 08:41:22 -0400 Subject: [PATCH 007/886] [Clang][AST][NFC] Fix printing of dependent PackIndexTypes (#88146) Dependent `PackIndexType`s currently print the memory address of the index `Expr*` rather than pretty printing the expression. This patch fixes that. --- clang/lib/AST/TypePrinter.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 075c8aba11fcb..9602f448e9427 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1213,10 +1213,13 @@ void TypePrinter::printDecltypeBefore(const DecltypeType *T, raw_ostream &OS) { void TypePrinter::printPackIndexingBefore(const PackIndexingType *T, raw_ostream &OS) { - if (T->hasSelectedType()) + if (T->hasSelectedType()) { OS << T->getSelectedType(); - else - OS << T->getPattern() << "...[" << T->getIndexExpr() << "]"; + } else { + OS << T->getPattern() << "...["; + T->getIndexExpr()->printPretty(OS, nullptr, Policy); + OS << "]"; + } spaceBeforePlaceHolder(OS); } From 49ef12a08c4c7d7ae4765929e72fe2320a12b08c Mon Sep 17 00:00:00 2001 From: Johannes Reifferscheid Date: Wed, 10 Apr 2024 14:55:56 +0200 Subject: [PATCH 008/886] Fix complex log1p accuracy with large abs values. (#88260) This ports https://github.com/openxla/xla/pull/10503 by @pearu. The new implementation matches mpmath's results for most inputs, see caveats in the linked pull request. In addition to the filecheck test here, the accuracy was tested with XLA's complex_unary_op_test and its MLIR emitters. --- .../ComplexToStandard/ComplexToStandard.cpp | 50 ++++++++++--------- .../convert-to-standard.mlir | 48 +++++++++++------- 2 files changed, 57 insertions(+), 41 deletions(-) diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp index 9c3c4d96a301e..0aa1de5fa5d9a 100644 --- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp +++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp @@ -570,37 +570,39 @@ struct Log1pOpConversion : public OpConversionPattern { ConversionPatternRewriter &rewriter) const override { auto type = cast(adaptor.getComplex().getType()); auto elementType = cast(type.getElementType()); - arith::FastMathFlagsAttr fmf = op.getFastMathFlagsAttr(); + arith::FastMathFlags fmf = op.getFastMathFlagsAttr().getValue(); mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter); - Value real = b.create(elementType, adaptor.getComplex()); - Value imag = b.create(elementType, adaptor.getComplex()); + Value real = b.create(adaptor.getComplex()); + Value imag = b.create(adaptor.getComplex()); Value half = b.create(elementType, b.getFloatAttr(elementType, 0.5)); Value one = b.create(elementType, b.getFloatAttr(elementType, 1)); - Value two = b.create(elementType, - b.getFloatAttr(elementType, 2)); - - // log1p(a+bi) = .5*log((a+1)^2+b^2) + i*atan2(b, a + 1) - // log((a+1)+bi) = .5*log(a*a + 2*a + 1 + b*b) + i*atan2(b, a+1) - // log((a+1)+bi) = .5*log1p(a*a + 2*a + b*b) + i*atan2(b, a+1) - Value sumSq = b.create(real, real, fmf.getValue()); - sumSq = b.create( - sumSq, b.create(real, two, fmf.getValue()), - fmf.getValue()); - sumSq = b.create( - sumSq, b.create(imag, imag, fmf.getValue()), - fmf.getValue()); - Value logSumSq = - b.create(elementType, sumSq, fmf.getValue()); - Value resultReal = b.create(logSumSq, half, fmf.getValue()); - - Value realPlusOne = b.create(real, one, fmf.getValue()); - - Value resultImag = - b.create(elementType, imag, realPlusOne, fmf.getValue()); + Value realPlusOne = b.create(real, one, fmf); + Value absRealPlusOne = b.create(realPlusOne, fmf); + Value absImag = b.create(imag, fmf); + + Value maxAbs = b.create(absRealPlusOne, absImag, fmf); + Value minAbs = b.create(absRealPlusOne, absImag, fmf); + + Value maxAbsOfRealPlusOneAndImagMinusOne = b.create( + b.create(arith::CmpFPredicate::OGT, realPlusOne, absImag, + fmf), + real, b.create(maxAbs, one, fmf)); + Value minMaxRatio = b.create(minAbs, maxAbs, fmf); + Value logOfMaxAbsOfRealPlusOneAndImag = + b.create(maxAbsOfRealPlusOneAndImagMinusOne, fmf); + Value logOfSqrtPart = b.create( + b.create(minMaxRatio, minMaxRatio, fmf), fmf); + Value r = b.create( + b.create(half, logOfSqrtPart, fmf), + logOfMaxAbsOfRealPlusOneAndImag, fmf); + Value resultReal = b.create( + b.create(arith::CmpFPredicate::UNO, r, r, fmf), minAbs, + r); + Value resultImag = b.create(imag, realPlusOne, fmf); rewriter.replaceOpWithNewOp(op, type, resultReal, resultImag); return success(); diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir index f5d9499eadda4..43918904a09f4 100644 --- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir +++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir @@ -300,15 +300,22 @@ func.func @complex_log1p(%arg: complex) -> complex { // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex // CHECK: %[[ONE_HALF:.*]] = arith.constant 5.000000e-01 : f32 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 -// CHECK: %[[TWO:.*]] = arith.constant 2.000000e+00 : f32 -// CHECK: %[[SQ_SUM_0:.*]] = arith.mulf %[[REAL]], %[[REAL]] : f32 -// CHECK: %[[TWO_REAL:.*]] = arith.mulf %[[REAL]], %[[TWO]] : f32 -// CHECK: %[[SQ_SUM_1:.*]] = arith.addf %[[SQ_SUM_0]], %[[TWO_REAL]] : f32 -// CHECK: %[[SQ_IMAG:.*]] = arith.mulf %[[IMAG]], %[[IMAG]] : f32 -// CHECK: %[[SQ_SUM_2:.*]] = arith.addf %[[SQ_SUM_1]], %[[SQ_IMAG]] : f32 -// CHECK: %[[LOG_SQ_SUM:.*]] = math.log1p %[[SQ_SUM_2]] : f32 -// CHECK: %[[RESULT_REAL:.*]] = arith.mulf %[[LOG_SQ_SUM]], %[[ONE_HALF]] : f32 // CHECK: %[[REAL_PLUS_ONE:.*]] = arith.addf %[[REAL]], %[[ONE]] : f32 +// CHECK: %[[ABS_REAL_PLUS_ONE:.*]] = math.absf %[[REAL_PLUS_ONE]] : f32 +// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] : f32 +// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] : f32 +// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] : f32 +// CHECK: %[[CMPF:.*]] = arith.cmpf ogt, %[[REAL_PLUS_ONE]], %[[ABS_IMAG]] : f32 +// CHECK: %[[MAX_MINUS_ONE:.*]] = arith.subf %[[MAX]], %cst_0 : f32 +// CHECK: %[[SELECT:.*]] = arith.select %[[CMPF]], %0, %[[MAX_MINUS_ONE]] : f32 +// CHECK: %[[MIN_MAX_RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] : f32 +// CHECK: %[[LOG_1:.*]] = math.log1p %[[SELECT]] : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[MIN_MAX_RATIO]], %[[MIN_MAX_RATIO]] : f32 +// CHECK: %[[LOG_SQ:.*]] = math.log1p %[[RATIO_SQ]] : f32 +// CHECK: %[[HALF_LOG_SQ:.*]] = arith.mulf %cst, %[[LOG_SQ]] : f32 +// CHECK: %[[R:.*]] = arith.addf %[[HALF_LOG_SQ]], %[[LOG_1]] : f32 +// CHECK: %[[ISNAN:.*]] = arith.cmpf uno, %[[R]], %[[R]] : f32 +// CHECK: %[[RESULT_REAL:.*]] = arith.select %[[ISNAN]], %[[MIN]], %[[R]] : f32 // CHECK: %[[RESULT_IMAG:.*]] = math.atan2 %[[IMAG]], %[[REAL_PLUS_ONE]] : f32 // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex @@ -963,15 +970,22 @@ func.func @complex_log1p_with_fmf(%arg: complex) -> complex { // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex // CHECK: %[[ONE_HALF:.*]] = arith.constant 5.000000e-01 : f32 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 -// CHECK: %[[TWO:.*]] = arith.constant 2.000000e+00 : f32 -// CHECK: %[[SQ_SUM_0:.*]] = arith.mulf %[[REAL]], %[[REAL]] fastmath : f32 -// CHECK: %[[TWO_REAL:.*]] = arith.mulf %[[REAL]], %[[TWO]] fastmath : f32 -// CHECK: %[[SQ_SUM_1:.*]] = arith.addf %[[SQ_SUM_0]], %[[TWO_REAL]] fastmath : f32 -// CHECK: %[[SQ_IMAG:.*]] = arith.mulf %[[IMAG]], %[[IMAG]] fastmath : f32 -// CHECK: %[[SQ_SUM_2:.*]] = arith.addf %[[SQ_SUM_1]], %[[SQ_IMAG]] fastmath : f32 -// CHECK: %[[LOG_SQ_SUM:.*]] = math.log1p %[[SQ_SUM_2]] fastmath : f32 -// CHECK: %[[RESULT_REAL:.*]] = arith.mulf %[[LOG_SQ_SUM]], %[[ONE_HALF]] fastmath : f32 -// CHECK: %[[REAL_PLUS_ONE:.*]] = arith.addf %[[REAL]], %[[ONE]] fastmath : f32 +// CHECK: %[[REAL_PLUS_ONE:.*]] = arith.addf %[[REAL]], %[[ONE]] fastmath : f32 +// CHECK: %[[ABS_REAL_PLUS_ONE:.*]] = math.absf %[[REAL_PLUS_ONE]] fastmath : f32 +// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] fastmath : f32 +// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[CMPF:.*]] = arith.cmpf ogt, %[[REAL_PLUS_ONE]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[MAX_MINUS_ONE:.*]] = arith.subf %[[MAX]], %cst_0 fastmath : f32 +// CHECK: %[[SELECT:.*]] = arith.select %[[CMPF]], %0, %[[MAX_MINUS_ONE]] : f32 +// CHECK: %[[MIN_MAX_RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 +// CHECK: %[[LOG_1:.*]] = math.log1p %[[SELECT]] fastmath : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[MIN_MAX_RATIO]], %[[MIN_MAX_RATIO]] fastmath : f32 +// CHECK: %[[LOG_SQ:.*]] = math.log1p %[[RATIO_SQ]] fastmath : f32 +// CHECK: %[[HALF_LOG_SQ:.*]] = arith.mulf %cst, %[[LOG_SQ]] fastmath : f32 +// CHECK: %[[R:.*]] = arith.addf %[[HALF_LOG_SQ]], %[[LOG_1]] fastmath : f32 +// CHECK: %[[ISNAN:.*]] = arith.cmpf uno, %[[R]], %[[R]] fastmath : f32 +// CHECK: %[[RESULT_REAL:.*]] = arith.select %[[ISNAN]], %[[MIN]], %[[R]] : f32 // CHECK: %[[RESULT_IMAG:.*]] = math.atan2 %[[IMAG]], %[[REAL_PLUS_ONE]] fastmath : f32 // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex From 54a9f0007cb4f19d2e9df30405c5027229f5def0 Mon Sep 17 00:00:00 2001 From: annamthomas Date: Wed, 10 Apr 2024 09:02:23 -0400 Subject: [PATCH 009/886] [SCEV] Fix BinomialCoefficient Iteration to fit in W bits (#88010) BinomialCoefficient computes the value of W-bit IV at iteration It of a loop. When W is 1, we can call multiplicative inverse on 0 which triggers an assert since 1b76120. Since the arithmetic is supposed to wrap if It or K does not fit in W bits, do the truncation into W bits after we do the shift. Fixes #87798 --- llvm/lib/Analysis/ScalarEvolution.cpp | 6 +- llvm/test/Analysis/ScalarEvolution/pr87798.ll | 68 +++++++++++++++++++ 2 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Analysis/ScalarEvolution/pr87798.ll diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index e030b9fc7dac4..9fcce797f5597 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -928,11 +928,9 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, APInt OddFactorial(W, 1); unsigned T = 1; for (unsigned i = 3; i <= K; ++i) { - APInt Mult(W, i); - unsigned TwoFactors = Mult.countr_zero(); + unsigned TwoFactors = countr_zero(i); T += TwoFactors; - Mult.lshrInPlace(TwoFactors); - OddFactorial *= Mult; + OddFactorial *= (i >> TwoFactors); } // We need at least W + T bits for the multiplication step diff --git a/llvm/test/Analysis/ScalarEvolution/pr87798.ll b/llvm/test/Analysis/ScalarEvolution/pr87798.ll new file mode 100644 index 0000000000000..acd445993e47b --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/pr87798.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -disable-output -passes='print' -verify-scev < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" +target triple = "x86_64-unknown-linux-gnu" + +; print is used to compute SCEVs for all values in the +; function. +; We should not crash on multiplicative inverse called within SCEV's binomial +; coefficient function. + +define i32 @pr87798() { +; CHECK-LABEL: 'pr87798' +; CHECK-NEXT: Classifying expressions for: @pr87798 +; CHECK-NEXT: %phi = phi i32 [ 0, %bb ], [ %add4, %bb1 ] +; CHECK-NEXT: --> {0,+,0,+,0,+,2,+,3}<%bb1> U: full-set S: full-set Exits: 0 LoopDispositions: { %bb1: Computable } +; CHECK-NEXT: %phi2 = phi i32 [ 0, %bb ], [ %add, %bb1 ] +; CHECK-NEXT: --> {0,+,0,+,1}<%bb1> U: full-set S: full-set Exits: 0 LoopDispositions: { %bb1: Computable } +; CHECK-NEXT: %phi3 = phi i32 [ 0, %bb ], [ %add5, %bb1 ] +; CHECK-NEXT: --> {0,+,1}<%bb1> U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %bb1: Computable } +; CHECK-NEXT: %add = add i32 %phi2, %phi3 +; CHECK-NEXT: --> {0,+,1,+,1}<%bb1> U: full-set S: full-set Exits: 0 LoopDispositions: { %bb1: Computable } +; CHECK-NEXT: %mul = mul i32 %phi2, %phi3 +; CHECK-NEXT: --> {0,+,0,+,2,+,3}<%bb1> U: full-set S: full-set Exits: 0 LoopDispositions: { %bb1: Computable } +; CHECK-NEXT: %add4 = add i32 %mul, %phi +; CHECK-NEXT: --> {0,+,0,+,2,+,5,+,3}<%bb1> U: full-set S: full-set Exits: 0 LoopDispositions: { %bb1: Computable } +; CHECK-NEXT: %and = and i32 %phi, 1 +; CHECK-NEXT: --> (zext i1 {false,+,false,+,false,+,false,+,true}<%bb1> to i32) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %bb1: Computable } +; CHECK-NEXT: %add5 = add i32 %phi3, 1 +; CHECK-NEXT: --> {1,+,1}<%bb1> U: [1,2) S: [1,2) Exits: 1 LoopDispositions: { %bb1: Computable } +; CHECK-NEXT: %phi9 = phi i32 [ %and, %bb1 ] +; CHECK-NEXT: --> (zext i1 {false,+,false,+,false,+,false,+,true}<%bb1> to i32) U: [0,2) S: [0,2) --> 0 U: [0,1) S: [0,1) +; CHECK-NEXT: %zext = zext i32 %phi9 to i64 +; CHECK-NEXT: --> poison U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @pr87798 +; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %bb1: backedge-taken count is i1 false +; CHECK-NEXT: Loop %bb1: constant max backedge-taken count is i1 false +; CHECK-NEXT: Loop %bb1: symbolic max backedge-taken count is i1 false +; CHECK-NEXT: Loop %bb1: Trip multiple is 1 +; +bb: + br label %bb1 + +bb1: ; preds = %bb1, %bb + %phi = phi i32 [ 0, %bb ], [ %add4, %bb1 ] + %phi2 = phi i32 [ 0, %bb ], [ %add, %bb1 ] + %phi3 = phi i32 [ 0, %bb ], [ %add5, %bb1 ] + %add = add i32 %phi2, %phi3 + %mul = mul i32 %phi2, %phi3 + %add4 = add i32 %mul, %phi + %and = and i32 %phi, 1 + %add5 = add i32 %phi3, 1 + br i1 true, label %preheader, label %bb1 + +preheader: ; preds = %bb1 + %phi9 = phi i32 [ %and, %bb1 ] + br label %loop + +loop: ; preds = %preheader, %loop + br label %loop + +bb7: ; No predecessors! + %zext = zext i32 %phi9 to i64 + ret i32 0 +} From 938a73422e0b964eba16f272acdfae1d0281772c Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 4 Apr 2024 13:30:57 -0700 Subject: [PATCH 010/886] [SLP][NFC]Walk over entries, not single values. Better to walk over SLP nodes rather than single values. Matching a value to a node is not a 1-to-1 relation, one value may be part of several nodes and compiler may get wrong node, when trying to map it. Currently there are no such issues detected, but they may appear in future. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 339 +++++++++--------- 1 file changed, 167 insertions(+), 172 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c3dcf73b0b762..22ef9b5fb994e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2325,19 +2325,17 @@ class BoUpSLP { ~BoUpSLP(); private: - /// Determine if a vectorized value \p V in can be demoted to - /// a smaller type with a truncation. We collect the values that will be - /// demoted in ToDemote and additional roots that require investigating in - /// Roots. - /// \param DemotedConsts list of Instruction/OperandIndex pairs that are - /// constant and to be demoted. Required to correctly identify constant nodes - /// to be demoted. - bool collectValuesToDemote( - Value *V, bool IsProfitableToDemoteRoot, unsigned &BitWidth, - SmallVectorImpl &ToDemote, - DenseMap> &DemotedConsts, - DenseSet &Visited, unsigned &MaxDepthLevel, - bool &IsProfitableToDemote, bool IsTruncRoot) const; + /// Determine if a node \p E in can be demoted to a smaller type with a + /// truncation. We collect the entries that will be demoted in ToDemote. + /// \param E Node for analysis + /// \param ToDemote indices of the nodes to be demoted. + bool collectValuesToDemote(const TreeEntry &E, bool IsProfitableToDemoteRoot, + unsigned &BitWidth, + SmallVectorImpl &ToDemote, + DenseSet &Visited, + unsigned &MaxDepthLevel, + bool &IsProfitableToDemote, + bool IsTruncRoot) const; /// Check if the operands on the edges \p Edges of the \p UserTE allows /// reordering (i.e. the operands can be reordered because they have only one @@ -14126,20 +14124,17 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) { return Width; } -// Determine if a value V in a vectorizable expression Expr can be demoted to a -// smaller type with a truncation. We collect the values that will be demoted -// in ToDemote and additional roots that require investigating in Roots. bool BoUpSLP::collectValuesToDemote( - Value *V, bool IsProfitableToDemoteRoot, unsigned &BitWidth, - SmallVectorImpl &ToDemote, - DenseMap> &DemotedConsts, - DenseSet &Visited, unsigned &MaxDepthLevel, - bool &IsProfitableToDemote, bool IsTruncRoot) const { + const TreeEntry &E, bool IsProfitableToDemoteRoot, unsigned &BitWidth, + SmallVectorImpl &ToDemote, DenseSet &Visited, + unsigned &MaxDepthLevel, bool &IsProfitableToDemote, + bool IsTruncRoot) const { // We can always demote constants. - if (isa(V)) + if (all_of(E.Scalars, IsaPred)) return true; - if (DL->getTypeSizeInBits(V->getType()) == BitWidth) { + unsigned OrigBitWidth = DL->getTypeSizeInBits(E.Scalars.front()->getType()); + if (OrigBitWidth == BitWidth) { MaxDepthLevel = 1; return true; } @@ -14150,7 +14145,6 @@ bool BoUpSLP::collectValuesToDemote( auto IsPotentiallyTruncated = [&](Value *V, unsigned &BitWidth) -> bool { if (MultiNodeScalars.contains(V)) return false; - uint32_t OrigBitWidth = DL->getTypeSizeInBits(V->getType()); if (OrigBitWidth > BitWidth) { APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); if (MaskedValueIsZero(V, Mask, SimplifyQuery(*DL))) @@ -14168,47 +14162,50 @@ bool BoUpSLP::collectValuesToDemote( BitWidth = std::max(BitWidth, BitWidth1); return BitWidth > 0 && OrigBitWidth >= (BitWidth * 2); }; - auto FinalAnalysis = [&](const TreeEntry *ITE = nullptr) { + using namespace std::placeholders; + auto FinalAnalysis = [&]() { if (!IsProfitableToDemote) return false; - return (ITE && ITE->UserTreeIndices.size() > 1) || - IsPotentiallyTruncated(V, BitWidth); + bool Res = all_of( + E.Scalars, std::bind(IsPotentiallyTruncated, _1, std::ref(BitWidth))); + // Gather demoted constant operands. + if (Res && E.State == TreeEntry::NeedToGather && + all_of(E.Scalars, IsaPred)) + ToDemote.push_back(E.Idx); + return Res; }; // TODO: improve handling of gathered values and others. - auto *I = dyn_cast(V); - const TreeEntry *ITE = I ? getTreeEntry(I) : nullptr; - if (!ITE || !Visited.insert(I).second || MultiNodeScalars.contains(I) || - all_of(I->users(), [&](User *U) { - return isa(U) && !getTreeEntry(U); + if (E.State == TreeEntry::NeedToGather || !Visited.insert(&E).second || + any_of(E.Scalars, [&](Value *V) { + return all_of(V->users(), [&](User *U) { + return isa(U) && !getTreeEntry(U); + }); })) return FinalAnalysis(); - if (!all_of(I->users(), - [=](User *U) { - return getTreeEntry(U) || - (UserIgnoreList && UserIgnoreList->contains(U)) || - (U->getType()->isSized() && - !U->getType()->isScalableTy() && - DL->getTypeSizeInBits(U->getType()) <= BitWidth); - }) && - !IsPotentiallyTruncated(I, BitWidth)) + if (any_of(E.Scalars, [&](Value *V) { + return !all_of(V->users(), [=](User *U) { + return getTreeEntry(U) || + (UserIgnoreList && UserIgnoreList->contains(U)) || + (U->getType()->isSized() && !U->getType()->isScalableTy() && + DL->getTypeSizeInBits(U->getType()) <= BitWidth); + }) && !IsPotentiallyTruncated(V, BitWidth); + })) return false; - unsigned Start = 0; - unsigned End = I->getNumOperands(); - - auto ProcessOperands = [&](ArrayRef Operands, bool &NeedToExit) { + auto ProcessOperands = [&](ArrayRef Operands, + bool &NeedToExit) { NeedToExit = false; unsigned InitLevel = MaxDepthLevel; - for (Value *IncValue : Operands) { + for (const TreeEntry *Op : Operands) { unsigned Level = InitLevel; - if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth, - ToDemote, DemotedConsts, Visited, Level, - IsProfitableToDemote, IsTruncRoot)) { + if (!collectValuesToDemote(*Op, IsProfitableToDemoteRoot, BitWidth, + ToDemote, Visited, Level, IsProfitableToDemote, + IsTruncRoot)) { if (!IsProfitableToDemote) return false; NeedToExit = true; - if (!FinalAnalysis(ITE)) + if (!FinalAnalysis()) return false; continue; } @@ -14220,7 +14217,6 @@ bool BoUpSLP::collectValuesToDemote( [&](function_ref Checker, bool &NeedToExit) { // Try all bitwidth < OrigBitWidth. NeedToExit = false; - uint32_t OrigBitWidth = DL->getTypeSizeInBits(I->getType()); unsigned BestFailBitwidth = 0; for (; BitWidth < OrigBitWidth; BitWidth *= 2) { if (Checker(BitWidth, OrigBitWidth)) @@ -14241,18 +14237,20 @@ bool BoUpSLP::collectValuesToDemote( return false; }; auto TryProcessInstruction = - [&](Instruction *I, const TreeEntry &ITE, unsigned &BitWidth, - ArrayRef Operands = std::nullopt, + [&](unsigned &BitWidth, + ArrayRef Operands = std::nullopt, function_ref Checker = {}) { if (Operands.empty()) { if (!IsTruncRoot) MaxDepthLevel = 1; - (void)IsPotentiallyTruncated(V, BitWidth); + (void)for_each(E.Scalars, std::bind(IsPotentiallyTruncated, _1, + std::ref(BitWidth))); } else { // Several vectorized uses? Check if we can truncate it, otherwise - // exit. - if (ITE.UserTreeIndices.size() > 1 && - !IsPotentiallyTruncated(I, BitWidth)) + if (E.UserTreeIndices.size() > 1 && + !all_of(E.Scalars, std::bind(IsPotentiallyTruncated, _1, + std::ref(BitWidth)))) return false; bool NeedToExit = false; if (Checker && !AttemptCheckBitwidth(Checker, NeedToExit)) @@ -14266,26 +14264,22 @@ bool BoUpSLP::collectValuesToDemote( } ++MaxDepthLevel; - // Gather demoted constant operands. - for (unsigned Idx : seq(Start, End)) - if (isa(I->getOperand(Idx))) - DemotedConsts.try_emplace(I).first->getSecond().push_back(Idx); - // Record the value that we can demote. - ToDemote.push_back(V); + // Record the entry that we can demote. + ToDemote.push_back(E.Idx); return IsProfitableToDemote; }; - switch (I->getOpcode()) { + switch (E.getOpcode()) { // We can always demote truncations and extensions. Since truncations can // seed additional demotion, we save the truncated value. case Instruction::Trunc: if (IsProfitableToDemoteRoot) IsProfitableToDemote = true; - return TryProcessInstruction(I, *ITE, BitWidth); + return TryProcessInstruction(BitWidth); case Instruction::ZExt: case Instruction::SExt: IsProfitableToDemote = true; - return TryProcessInstruction(I, *ITE, BitWidth); + return TryProcessInstruction(BitWidth); // We can demote certain binary operations if we can demote both of their // operands. @@ -14295,112 +14289,128 @@ bool BoUpSLP::collectValuesToDemote( case Instruction::And: case Instruction::Or: case Instruction::Xor: { - return TryProcessInstruction(I, *ITE, BitWidth, - {I->getOperand(0), I->getOperand(1)}); + return TryProcessInstruction( + BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)}); } case Instruction::Shl: { // If we are truncating the result of this SHL, and if it's a shift of an // inrange amount, we can always perform a SHL in a smaller type. auto ShlChecker = [&](unsigned BitWidth, unsigned) { - KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); - return AmtKnownBits.getMaxValue().ult(BitWidth); + return all_of(E.Scalars, [&](Value *V) { + auto *I = cast(V); + KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); + return AmtKnownBits.getMaxValue().ult(BitWidth); + }); }; return TryProcessInstruction( - I, *ITE, BitWidth, {I->getOperand(0), I->getOperand(1)}, ShlChecker); + BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)}, ShlChecker); } case Instruction::LShr: { // If this is a truncate of a logical shr, we can truncate it to a smaller // lshr iff we know that the bits we would otherwise be shifting in are // already zeros. auto LShrChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) { - KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); - APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); - return AmtKnownBits.getMaxValue().ult(BitWidth) && - MaskedValueIsZero(I->getOperand(0), ShiftedBits, - SimplifyQuery(*DL)); + return all_of(E.Scalars, [&](Value *V) { + auto *I = cast(V); + KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); + APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); + return AmtKnownBits.getMaxValue().ult(BitWidth) && + MaskedValueIsZero(I->getOperand(0), ShiftedBits, + SimplifyQuery(*DL)); + }); }; return TryProcessInstruction( - I, *ITE, BitWidth, {I->getOperand(0), I->getOperand(1)}, LShrChecker); + BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)}, + LShrChecker); } case Instruction::AShr: { // If this is a truncate of an arithmetic shr, we can truncate it to a // smaller ashr iff we know that all the bits from the sign bit of the // original type and the sign bit of the truncate type are similar. auto AShrChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) { - KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); - unsigned ShiftedBits = OrigBitWidth - BitWidth; - return AmtKnownBits.getMaxValue().ult(BitWidth) && - ShiftedBits < - ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, nullptr, DT); + return all_of(E.Scalars, [&](Value *V) { + auto *I = cast(V); + KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); + unsigned ShiftedBits = OrigBitWidth - BitWidth; + return AmtKnownBits.getMaxValue().ult(BitWidth) && + ShiftedBits < ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, + nullptr, DT); + }); }; return TryProcessInstruction( - I, *ITE, BitWidth, {I->getOperand(0), I->getOperand(1)}, AShrChecker); + BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)}, + AShrChecker); } case Instruction::UDiv: case Instruction::URem: { // UDiv and URem can be truncated if all the truncated bits are zero. auto Checker = [&](unsigned BitWidth, unsigned OrigBitWidth) { assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!"); - APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); - return MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)) && - MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL)); + return all_of(E.Scalars, [&](Value *V) { + auto *I = cast(V); + APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); + return MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)) && + MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL)); + }); }; - return TryProcessInstruction(I, *ITE, BitWidth, - {I->getOperand(0), I->getOperand(1)}, Checker); + return TryProcessInstruction( + BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)}, Checker); } // We can demote selects if we can demote their true and false values. case Instruction::Select: { - Start = 1; - auto *SI = cast(I); - return TryProcessInstruction(I, *ITE, BitWidth, - {SI->getTrueValue(), SI->getFalseValue()}); + return TryProcessInstruction( + BitWidth, {getOperandEntry(&E, 1), getOperandEntry(&E, 2)}); } // We can demote phis if we can demote all their incoming operands. Note that // we don't need to worry about cycles since we ensure single use above. case Instruction::PHI: { - PHINode *PN = cast(I); - SmallVector Ops(PN->incoming_values().begin(), - PN->incoming_values().end()); - return TryProcessInstruction(I, *ITE, BitWidth, Ops); + const unsigned NumOps = E.getNumOperands(); + SmallVector Ops(NumOps); + transform(seq(0, NumOps), Ops.begin(), + std::bind(&BoUpSLP::getOperandEntry, this, &E, _1)); + + return TryProcessInstruction(BitWidth, Ops); } case Instruction::Call: { - auto *IC = dyn_cast(I); + auto *IC = dyn_cast(E.getMainOp()); if (!IC) break; Intrinsic::ID ID = getVectorIntrinsicIDForCall(IC, TLI); if (ID != Intrinsic::abs && ID != Intrinsic::smin && ID != Intrinsic::smax && ID != Intrinsic::umin && ID != Intrinsic::umax) break; - SmallVector Operands(1, I->getOperand(0)); + SmallVector Operands(1, getOperandEntry(&E, 0)); function_ref CallChecker; auto CompChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) { assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!"); - if (ID == Intrinsic::umin || ID == Intrinsic::umax) { - APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); - return MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)) && - MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL)); - } - assert((ID == Intrinsic::smin || ID == Intrinsic::smax) && - "Expected min/max intrinsics only."); - unsigned SignBits = OrigBitWidth - BitWidth; - return SignBits <= ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, - nullptr, DT) && - SignBits <= - ComputeNumSignBits(I->getOperand(1), *DL, 0, AC, nullptr, DT); + return all_of(E.Scalars, [&](Value *V) { + auto *I = cast(V); + if (ID == Intrinsic::umin || ID == Intrinsic::umax) { + APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); + return MaskedValueIsZero(I->getOperand(0), Mask, + SimplifyQuery(*DL)) && + MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL)); + } + assert((ID == Intrinsic::smin || ID == Intrinsic::smax) && + "Expected min/max intrinsics only."); + unsigned SignBits = OrigBitWidth - BitWidth; + return SignBits <= ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, + nullptr, DT) && + SignBits <= ComputeNumSignBits(I->getOperand(1), *DL, 0, AC, + nullptr, DT); + }); }; - End = 1; if (ID != Intrinsic::abs) { - Operands.push_back(I->getOperand(1)); - End = 2; + Operands.push_back(getOperandEntry(&E, 1)); CallChecker = CompChecker; } InstructionCost BestCost = std::numeric_limits::max(); unsigned BestBitWidth = BitWidth; - unsigned VF = ITE->Scalars.size(); + unsigned VF = E.Scalars.size(); // Choose the best bitwidth based on cost estimations. auto Checker = [&](unsigned BitWidth, unsigned) { unsigned MinBW = PowerOf2Ceil(BitWidth); @@ -14419,7 +14429,7 @@ bool BoUpSLP::collectValuesToDemote( [[maybe_unused]] bool NeedToExit; (void)AttemptCheckBitwidth(Checker, NeedToExit); BitWidth = BestBitWidth; - return TryProcessInstruction(I, *ITE, BitWidth, Operands, CallChecker); + return TryProcessInstruction(BitWidth, Operands, CallChecker); } // Otherwise, conservatively give up. @@ -14473,26 +14483,27 @@ void BoUpSLP::computeMinimumValueSizes() { ++NodeIdx; } - // Analyzed in reduction already and not profitable - exit. + // Analyzed the reduction already and not profitable - exit. if (AnalyzedMinBWVals.contains(VectorizableTree[NodeIdx]->Scalars.front())) return; - SmallVector ToDemote; - DenseMap> DemotedConsts; - auto ComputeMaxBitWidth = [&](ArrayRef TreeRoot, unsigned VF, - bool IsTopRoot, bool IsProfitableToDemoteRoot, - unsigned Opcode, unsigned Limit, - bool IsTruncRoot, bool IsSignedCmp) { + SmallVector ToDemote; + auto ComputeMaxBitWidth = [&](const TreeEntry &E, bool IsTopRoot, + bool IsProfitableToDemoteRoot, unsigned Opcode, + unsigned Limit, bool IsTruncRoot, + bool IsSignedCmp) { ToDemote.clear(); - auto *TreeRootIT = dyn_cast(TreeRoot[0]->getType()); + unsigned VF = E.getVectorFactor(); + auto *TreeRootIT = dyn_cast(E.Scalars.front()->getType()); if (!TreeRootIT || !Opcode) return 0u; - if (AnalyzedMinBWVals.contains(TreeRoot.front())) + if (any_of(E.Scalars, + [&](Value *V) { return AnalyzedMinBWVals.contains(V); })) return 0u; - unsigned NumParts = TTI->getNumberOfParts( - FixedVectorType::get(TreeRoot.front()->getType(), VF)); + unsigned NumParts = + TTI->getNumberOfParts(FixedVectorType::get(TreeRootIT, VF)); // The maximum bit width required to represent all the values that can be // demoted without loss of precision. It would be safe to truncate the roots @@ -14505,14 +14516,14 @@ void BoUpSLP::computeMinimumValueSizes() { // True. // Determine if the sign bit of all the roots is known to be zero. If not, // IsKnownPositive is set to False. - bool IsKnownPositive = !IsSignedCmp && all_of(TreeRoot, [&](Value *R) { + bool IsKnownPositive = !IsSignedCmp && all_of(E.Scalars, [&](Value *R) { KnownBits Known = computeKnownBits(R, *DL); return Known.isNonNegative(); }); // We first check if all the bits of the roots are demanded. If they're not, // we can truncate the roots to this narrower type. - for (auto *Root : TreeRoot) { + for (Value *Root : E.Scalars) { unsigned NumSignBits = ComputeNumSignBits(Root, *DL, 0, AC, nullptr, DT); TypeSize NumTypeBits = DL->getTypeSizeInBits(Root->getType()); unsigned BitWidth1 = NumTypeBits - NumSignBits; @@ -14557,23 +14568,22 @@ void BoUpSLP::computeMinimumValueSizes() { // Conservatively determine if we can actually truncate the roots of the // expression. Collect the values that can be demoted in ToDemote and // additional roots that require investigating in Roots. - for (auto *Root : TreeRoot) { - DenseSet Visited; - unsigned MaxDepthLevel = IsTruncRoot ? Limit : 1; - bool NeedToDemote = IsProfitableToDemote; - - if (!collectValuesToDemote(Root, IsProfitableToDemoteRoot, MaxBitWidth, - ToDemote, DemotedConsts, Visited, - MaxDepthLevel, NeedToDemote, IsTruncRoot) || - (MaxDepthLevel <= Limit && - !(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && - (!IsTopRoot || !(IsStoreOrInsertElt || UserIgnoreList) || - DL->getTypeSizeInBits(Root->getType()) / - DL->getTypeSizeInBits( - cast(Root)->getOperand(0)->getType()) > - 2))))) - return 0u; - } + DenseSet Visited; + unsigned MaxDepthLevel = IsTruncRoot ? Limit : 1; + bool NeedToDemote = IsProfitableToDemote; + + if (!collectValuesToDemote(E, IsProfitableToDemoteRoot, MaxBitWidth, + ToDemote, Visited, MaxDepthLevel, NeedToDemote, + IsTruncRoot) || + (MaxDepthLevel <= Limit && + !(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && + (!IsTopRoot || !(IsStoreOrInsertElt || UserIgnoreList) || + DL->getTypeSizeInBits(TreeRootIT) / + DL->getTypeSizeInBits(cast(E.Scalars.front()) + ->getOperand(0) + ->getType()) > + 2))))) + return 0u; // Round MaxBitWidth up to the next power-of-two. MaxBitWidth = bit_ceil(MaxBitWidth); @@ -14624,8 +14634,8 @@ void BoUpSLP::computeMinimumValueSizes() { VectorizableTree.front()->Scalars.front()->getType())) Limit = 3; unsigned MaxBitWidth = ComputeMaxBitWidth( - TreeRoot, VectorizableTree[NodeIdx]->getVectorFactor(), IsTopRoot, - IsProfitableToDemoteRoot, Opcode, Limit, IsTruncRoot, IsSignedCmp); + *VectorizableTree[NodeIdx].get(), IsTopRoot, IsProfitableToDemoteRoot, + Opcode, Limit, IsTruncRoot, IsSignedCmp); if (ReductionBitWidth != 0 && (IsTopRoot || !RootDemotes.empty())) { if (MaxBitWidth != 0 && ReductionBitWidth < MaxBitWidth) ReductionBitWidth = bit_ceil(MaxBitWidth); @@ -14634,13 +14644,15 @@ void BoUpSLP::computeMinimumValueSizes() { } for (unsigned Idx : RootDemotes) { - Value *V = VectorizableTree[Idx]->Scalars.front(); - uint32_t OrigBitWidth = DL->getTypeSizeInBits(V->getType()); - if (OrigBitWidth > MaxBitWidth) { - APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, MaxBitWidth); - if (MaskedValueIsZero(V, Mask, SimplifyQuery(*DL))) - ToDemote.push_back(V); - } + if (all_of(VectorizableTree[Idx]->Scalars, [&](Value *V) { + uint32_t OrigBitWidth = DL->getTypeSizeInBits(V->getType()); + if (OrigBitWidth > MaxBitWidth) { + APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, MaxBitWidth); + return MaskedValueIsZero(V, Mask, SimplifyQuery(*DL)); + } + return false; + })) + ToDemote.push_back(Idx); } RootDemotes.clear(); IsTopRoot = false; @@ -14687,9 +14699,8 @@ void BoUpSLP::computeMinimumValueSizes() { // Finally, map the values we can demote to the maximum bit with we // computed. - for (Value *Scalar : ToDemote) { - TreeEntry *TE = getTreeEntry(Scalar); - assert(TE && "Expected vectorized scalar."); + for (unsigned Idx : ToDemote) { + TreeEntry *TE = VectorizableTree[Idx].get(); if (MinBWs.contains(TE)) continue; bool IsSigned = TE->getOpcode() == Instruction::SExt || @@ -14697,22 +14708,6 @@ void BoUpSLP::computeMinimumValueSizes() { return !isKnownNonNegative(R, SimplifyQuery(*DL)); }); MinBWs.try_emplace(TE, MaxBitWidth, IsSigned); - const auto *I = cast(Scalar); - auto DCIt = DemotedConsts.find(I); - if (DCIt != DemotedConsts.end()) { - for (unsigned Idx : DCIt->getSecond()) { - // Check that all instructions operands are demoted. - const TreeEntry *CTE = getOperandEntry(TE, Idx); - if (all_of(TE->Scalars, - [&](Value *V) { - auto SIt = DemotedConsts.find(cast(V)); - return SIt != DemotedConsts.end() && - is_contained(SIt->getSecond(), Idx); - }) || - all_of(CTE->Scalars, IsaPred)) - MinBWs.try_emplace(CTE, MaxBitWidth, IsSigned); - } - } } } } From 50d368aee981738cd05f3d16f5d1cfc122c9b0ab Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 10 Apr 2024 09:07:43 -0500 Subject: [PATCH 011/886] [LinkerWrapper] Relax ordering of static libraries for offloading (#87532) Summary: The linker wrapper attempts to maintain consistent semantics with existing host invocations. Static libraries by default only extract if there are non-weak symbols that remain undefined. However, we have situations between linkers that put different meanings on ordering. The ld.bfd linker requires static libraries to be defined after the symbols, while `ld.lld` relaxes this rule. The linker wrapper went with the former as it's the easier solution, however this has caused a lot of issues as I've had to explain this rule to several people, it also make it difficult to include things like `libc` in the OpenMP runtime because it would sometimes be linked before or after. This patch reworks the logic to more or less perform the following logic for static libraries. 1. Split library / object inputs. 2. Include every object input and record its undefined symbols 3. Repeatedly try to extract static libraries to resolve these symbols. If a file is extracted we need to check every library again to resolve any new undefined symbols. This allows the following to work and will cause fewer issues when replacing HIP, which does `--whole-archive` so it's very likely the old logic will regress. ```console $ clang -lfoo main.c -fopenmp --offload-arch=native ``` --- clang/test/Driver/linker-wrapper-libs.c | 4 + .../ClangLinkerWrapper.cpp | 121 ++++++++++++------ 2 files changed, 83 insertions(+), 42 deletions(-) diff --git a/clang/test/Driver/linker-wrapper-libs.c b/clang/test/Driver/linker-wrapper-libs.c index 9a78200d7d3cf..119e306857187 100644 --- a/clang/test/Driver/linker-wrapper-libs.c +++ b/clang/test/Driver/linker-wrapper-libs.c @@ -44,6 +44,8 @@ int bar() { return weak; } // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \ +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ +// RUN: --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-RESOLVES // LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o @@ -66,6 +68,8 @@ int bar() { return weak; } // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \ +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ +// RUN: --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL // LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 73e695a67093e..a1879fc7712dc 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1448,9 +1448,9 @@ getDeviceInput(const ArgList &Args) { StringSaver Saver(Alloc); // Try to extract device code from the linker input files. - DenseMap> InputFiles; - DenseMap> Syms; bool WholeArchive = Args.hasArg(OPT_wholearchive_flag) ? true : false; + SmallVector ObjectFilesToExtract; + SmallVector ArchiveFilesToExtract; for (const opt::Arg *Arg : Args.filtered( OPT_INPUT, OPT_library, OPT_whole_archive, OPT_no_whole_archive)) { if (Arg->getOption().matches(OPT_whole_archive) || @@ -1486,50 +1486,87 @@ getDeviceInput(const ArgList &Args) { if (Error Err = extractOffloadBinaries(Buffer, Binaries)) return std::move(Err); - // We only extract archive members that are needed. - bool IsArchive = identify_magic(Buffer.getBuffer()) == file_magic::archive; - bool Extracted = true; - while (Extracted) { - Extracted = false; - for (OffloadFile &Binary : Binaries) { - // If the binary was previously extracted it will be set to null. - if (!Binary.getBinary()) + for (auto &OffloadFile : Binaries) { + if (identify_magic(Buffer.getBuffer()) == file_magic::archive && + !WholeArchive) + ArchiveFilesToExtract.emplace_back(std::move(OffloadFile)); + else + ObjectFilesToExtract.emplace_back(std::move(OffloadFile)); + } + } + + // Link all standard input files and update the list of symbols. + DenseMap> InputFiles; + DenseMap> Syms; + for (OffloadFile &Binary : ObjectFilesToExtract) { + if (!Binary.getBinary()) + continue; + + SmallVector CompatibleTargets = {Binary}; + for (const auto &[ID, Input] : InputFiles) + if (object::areTargetsCompatible(Binary, ID)) + CompatibleTargets.emplace_back(ID); + + for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) { + Expected ExtractOrErr = getSymbols( + Binary.getBinary()->getImage(), Binary.getBinary()->getOffloadKind(), + /*IsArchive=*/false, Saver, Syms[ID]); + if (!ExtractOrErr) + return ExtractOrErr.takeError(); + + // If another target needs this binary it must be copied instead. + if (Index == CompatibleTargets.size() - 1) + InputFiles[ID].emplace_back(std::move(Binary)); + else + InputFiles[ID].emplace_back(Binary.copy()); + } + } + + // Archive members only extract if they define needed symbols. We do this + // after every regular input file so that libraries may be included out of + // order. This follows 'ld.lld' semantics which are more lenient. + bool Extracted = true; + while (Extracted) { + Extracted = false; + for (OffloadFile &Binary : ArchiveFilesToExtract) { + // If the binary was previously extracted it will be set to null. + if (!Binary.getBinary()) + continue; + + SmallVector CompatibleTargets = {Binary}; + for (const auto &[ID, Input] : InputFiles) + if (object::areTargetsCompatible(Binary, ID)) + CompatibleTargets.emplace_back(ID); + + for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) { + // Only extract an if we have an an object matching this target. + if (!InputFiles.count(ID)) continue; - SmallVector CompatibleTargets = {Binary}; - for (const auto &[ID, Input] : InputFiles) - if (object::areTargetsCompatible(Binary, ID)) - CompatibleTargets.emplace_back(ID); - - for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) { - // Only extract an if we have an an object matching this target. - if (IsArchive && !WholeArchive && !InputFiles.count(ID)) - continue; - - Expected ExtractOrErr = getSymbols( - Binary.getBinary()->getImage(), - Binary.getBinary()->getOffloadKind(), IsArchive, Saver, Syms[ID]); - if (!ExtractOrErr) - return ExtractOrErr.takeError(); - - Extracted = !WholeArchive && *ExtractOrErr; - - // Skip including the file if it is an archive that does not resolve - // any symbols. - if (IsArchive && !WholeArchive && !Extracted) - continue; - - // If another target needs this binary it must be copied instead. - if (Index == CompatibleTargets.size() - 1) - InputFiles[ID].emplace_back(std::move(Binary)); - else - InputFiles[ID].emplace_back(Binary.copy()); - } + Expected ExtractOrErr = + getSymbols(Binary.getBinary()->getImage(), + Binary.getBinary()->getOffloadKind(), /*IsArchive=*/true, + Saver, Syms[ID]); + if (!ExtractOrErr) + return ExtractOrErr.takeError(); - // If we extracted any files we need to check all the symbols again. - if (Extracted) - break; + Extracted = *ExtractOrErr; + + // Skip including the file if it is an archive that does not resolve + // any symbols. + if (!Extracted) + continue; + + // If another target needs this binary it must be copied instead. + if (Index == CompatibleTargets.size() - 1) + InputFiles[ID].emplace_back(std::move(Binary)); + else + InputFiles[ID].emplace_back(Binary.copy()); } + + // If we extracted any files we need to check all the symbols again. + if (Extracted) + break; } } From 6b35cbee3f577d9ee55f7277affa0fe194859b25 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Wed, 10 Apr 2024 18:09:39 +0400 Subject: [PATCH 012/886] [clang] Introduce `SemaSYCL` (#88086) This patch moves SYCL-related `Sema` functions into new `SemaSYCL` class, following the recent example of OpenACC and HLSL. This is a part of the effort to split `Sema`. Additional context can be found in #82217, #84184, #87634. --- clang/include/clang/Sema/Sema.h | 55 ++++-------------------- clang/include/clang/Sema/SemaSYCL.h | 65 +++++++++++++++++++++++++++++ clang/lib/Parse/ParseExpr.cpp | 5 ++- clang/lib/Sema/Sema.cpp | 6 ++- clang/lib/Sema/SemaExpr.cpp | 22 ---------- clang/lib/Sema/SemaSYCL.cpp | 52 +++++++++++++++++------ clang/lib/Sema/TreeTransform.h | 4 +- 7 files changed, 121 insertions(+), 88 deletions(-) create mode 100644 clang/include/clang/Sema/SemaSYCL.h diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 9769d36900664..e3e255a0dd76f 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -184,6 +184,7 @@ class PseudoObjectExpr; class QualType; class SemaHLSL; class SemaOpenACC; +class SemaSYCL; class StandardConversionSequence; class Stmt; class StringLiteral; @@ -467,7 +468,6 @@ class Sema final : public SemaBase { // 37. Name Lookup for RISC-V Vector Intrinsic (SemaRISCVVectorLookup.cpp) // 38. CUDA (SemaCUDA.cpp) // 39. OpenMP Directives and Clauses (SemaOpenMP.cpp) - // 40. SYCL Constructs (SemaSYCL.cpp) /// \name Semantic Analysis /// Implementations are in Sema.cpp @@ -974,6 +974,11 @@ class Sema final : public SemaBase { return *OpenACCPtr; } + SemaSYCL &SYCL() { + assert(SYCLPtr); + return *SYCLPtr; + } + protected: friend class Parser; friend class InitializationSequence; @@ -1006,6 +1011,7 @@ class Sema final : public SemaBase { std::unique_ptr HLSLPtr; std::unique_ptr OpenACCPtr; + std::unique_ptr SYCLPtr; ///@} @@ -5455,15 +5461,6 @@ class Sema final : public SemaBase { ExprResult ActOnPredefinedExpr(SourceLocation Loc, tok::TokenKind Kind); ExprResult ActOnIntegerConstant(SourceLocation Loc, uint64_t Val); - ExprResult BuildSYCLUniqueStableNameExpr(SourceLocation OpLoc, - SourceLocation LParen, - SourceLocation RParen, - TypeSourceInfo *TSI); - ExprResult ActOnSYCLUniqueStableNameExpr(SourceLocation OpLoc, - SourceLocation LParen, - SourceLocation RParen, - ParsedType ParsedTy); - bool CheckLoopHintExpr(Expr *E, SourceLocation Loc); ExprResult ActOnNumericConstant(const Token &Tok, Scope *UDLScope = nullptr); @@ -14516,44 +14513,6 @@ class Sema final : public SemaBase { OpenMPDirectiveKind CancelRegion); ///@} - - // - // - // ------------------------------------------------------------------------- - // - // - - /// \name SYCL Constructs - /// Implementations are in SemaSYCL.cpp - ///@{ - -public: - /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current - /// context is "used as device code". - /// - /// - If CurLexicalContext is a kernel function or it is known that the - /// function will be emitted for the device, emits the diagnostics - /// immediately. - /// - If CurLexicalContext is a function and we are compiling - /// for the device, but we don't know that this function will be codegen'ed - /// for devive yet, creates a diagnostic which is emitted if and when we - /// realize that the function will be codegen'ed. - /// - /// Example usage: - /// - /// Diagnose __float128 type usage only from SYCL device code if the current - /// target doesn't support it - /// if (!S.Context.getTargetInfo().hasFloat128Type() && - /// S.getLangOpts().SYCLIsDevice) - /// SYCLDiagIfDeviceCode(Loc, diag::err_type_unsupported) << "__float128"; - SemaDiagnosticBuilder SYCLDiagIfDeviceCode(SourceLocation Loc, - unsigned DiagID); - - void deepTypeCheckForSYCLDevice(SourceLocation UsedAt, - llvm::DenseSet Visited, - ValueDecl *DeclToCheck); - - ///@} }; DeductionFailureInfo diff --git a/clang/include/clang/Sema/SemaSYCL.h b/clang/include/clang/Sema/SemaSYCL.h new file mode 100644 index 0000000000000..f0dcb92ee9ab3 --- /dev/null +++ b/clang/include/clang/Sema/SemaSYCL.h @@ -0,0 +1,65 @@ +//===----- SemaSYCL.h ------- Semantic Analysis for SYCL constructs -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares semantic analysis for SYCL constructs. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SEMA_SEMASYCL_H +#define LLVM_CLANG_SEMA_SEMASYCL_H + +#include "clang/AST/Decl.h" +#include "clang/AST/Type.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Sema/Ownership.h" +#include "clang/Sema/SemaBase.h" +#include "llvm/ADT/DenseSet.h" + +namespace clang { + +class SemaSYCL : public SemaBase { +public: + SemaSYCL(Sema &S); + + /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current + /// context is "used as device code". + /// + /// - If CurLexicalContext is a kernel function or it is known that the + /// function will be emitted for the device, emits the diagnostics + /// immediately. + /// - If CurLexicalContext is a function and we are compiling + /// for the device, but we don't know yet that this function will be + /// codegen'ed for the devive, creates a diagnostic which is emitted if and + /// when we realize that the function will be codegen'ed. + /// + /// Example usage: + /// + /// Diagnose __float128 type usage only from SYCL device code if the current + /// target doesn't support it + /// if (!S.Context.getTargetInfo().hasFloat128Type() && + /// S.getLangOpts().SYCLIsDevice) + /// DiagIfDeviceCode(Loc, diag::err_type_unsupported) << "__float128"; + SemaDiagnosticBuilder DiagIfDeviceCode(SourceLocation Loc, unsigned DiagID); + + void deepTypeCheckForDevice(SourceLocation UsedAt, + llvm::DenseSet Visited, + ValueDecl *DeclToCheck); + + ExprResult BuildUniqueStableNameExpr(SourceLocation OpLoc, + SourceLocation LParen, + SourceLocation RParen, + TypeSourceInfo *TSI); + ExprResult ActOnUniqueStableNameExpr(SourceLocation OpLoc, + SourceLocation LParen, + SourceLocation RParen, + ParsedType ParsedTy); +}; + +} // namespace clang + +#endif // LLVM_CLANG_SEMA_SEMASYCL_H diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index ae23cb432c439..d08e675604d19 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -30,6 +30,7 @@ #include "clang/Sema/EnterExpressionEvaluationContext.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" +#include "clang/Sema/SemaSYCL.h" #include "clang/Sema/TypoCorrection.h" #include "llvm/ADT/SmallVector.h" #include @@ -2490,8 +2491,8 @@ ExprResult Parser::ParseSYCLUniqueStableNameExpression() { if (T.consumeClose()) return ExprError(); - return Actions.ActOnSYCLUniqueStableNameExpr(OpLoc, T.getOpenLocation(), - T.getCloseLocation(), Ty.get()); + return Actions.SYCL().ActOnUniqueStableNameExpr( + OpLoc, T.getOpenLocation(), T.getCloseLocation(), Ty.get()); } /// Parse a sizeof or alignof expression. diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 04eadb5f3b8ae..801b03a63dbc8 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -45,6 +45,7 @@ #include "clang/Sema/SemaHLSL.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/SemaOpenACC.h" +#include "clang/Sema/SemaSYCL.h" #include "clang/Sema/TemplateDeduction.h" #include "clang/Sema/TemplateInstCallback.h" #include "clang/Sema/TypoCorrection.h" @@ -201,6 +202,7 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer, CurScope(nullptr), Ident_super(nullptr), HLSLPtr(std::make_unique(*this)), OpenACCPtr(std::make_unique(*this)), + SYCLPtr(std::make_unique(*this)), MSPointerToMemberRepresentationMethod( LangOpts.getMSPointerToMemberRepresentationMethod()), MSStructPragmaOn(false), VtorDispStack(LangOpts.getVtorDispMode()), @@ -1903,7 +1905,7 @@ Sema::targetDiag(SourceLocation Loc, unsigned DiagID, const FunctionDecl *FD) { : CUDADiagIfHostCode(Loc, DiagID); if (getLangOpts().SYCLIsDevice) - return SYCLDiagIfDeviceCode(Loc, DiagID); + return SYCL().DiagIfDeviceCode(Loc, DiagID); return SemaDiagnosticBuilder(SemaDiagnosticBuilder::K_Immediate, Loc, DiagID, FD, *this); @@ -1919,7 +1921,7 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { // constant byte size like zero length arrays. So, do a deep check for SYCL. if (D && LangOpts.SYCLIsDevice) { llvm::DenseSet Visited; - deepTypeCheckForSYCLDevice(Loc, Visited, D); + SYCL().deepTypeCheckForDevice(Loc, Visited, D); } Decl *C = cast(getCurLexicalContext()); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 594c11788f4e7..4d4ef9b16381b 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -3794,28 +3794,6 @@ ExprResult Sema::BuildPredefinedExpr(SourceLocation Loc, SL); } -ExprResult Sema::BuildSYCLUniqueStableNameExpr(SourceLocation OpLoc, - SourceLocation LParen, - SourceLocation RParen, - TypeSourceInfo *TSI) { - return SYCLUniqueStableNameExpr::Create(Context, OpLoc, LParen, RParen, TSI); -} - -ExprResult Sema::ActOnSYCLUniqueStableNameExpr(SourceLocation OpLoc, - SourceLocation LParen, - SourceLocation RParen, - ParsedType ParsedTy) { - TypeSourceInfo *TSI = nullptr; - QualType Ty = GetTypeFromParser(ParsedTy, &TSI); - - if (Ty.isNull()) - return ExprError(); - if (!TSI) - TSI = Context.getTrivialTypeSourceInfo(Ty, LParen); - - return BuildSYCLUniqueStableNameExpr(OpLoc, LParen, RParen, TSI); -} - ExprResult Sema::ActOnPredefinedExpr(SourceLocation Loc, tok::TokenKind Kind) { return BuildPredefinedExpr(Loc, getPredefinedExprKind(Kind)); } diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 18ebaa13346a4..18f6d8f030473 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -8,6 +8,7 @@ // This implements Semantic Analysis for SYCL constructs. //===----------------------------------------------------------------------===// +#include "clang/Sema/SemaSYCL.h" #include "clang/AST/Mangle.h" #include "clang/Sema/Sema.h" #include "clang/Sema/SemaDiagnostic.h" @@ -18,28 +19,30 @@ using namespace clang; // SYCL device specific diagnostics implementation // ----------------------------------------------------------------------------- -Sema::SemaDiagnosticBuilder Sema::SYCLDiagIfDeviceCode(SourceLocation Loc, +SemaSYCL::SemaSYCL(Sema &S) : SemaBase(S) {} + +Sema::SemaDiagnosticBuilder SemaSYCL::DiagIfDeviceCode(SourceLocation Loc, unsigned DiagID) { assert(getLangOpts().SYCLIsDevice && "Should only be called during SYCL compilation"); - FunctionDecl *FD = dyn_cast(getCurLexicalContext()); + FunctionDecl *FD = dyn_cast(SemaRef.getCurLexicalContext()); SemaDiagnosticBuilder::Kind DiagKind = [this, FD] { if (!FD) return SemaDiagnosticBuilder::K_Nop; - if (getEmissionStatus(FD) == Sema::FunctionEmissionStatus::Emitted) + if (SemaRef.getEmissionStatus(FD) == Sema::FunctionEmissionStatus::Emitted) return SemaDiagnosticBuilder::K_ImmediateWithCallStack; return SemaDiagnosticBuilder::K_Deferred; }(); - return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, FD, *this); + return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, FD, SemaRef); } -static bool isZeroSizedArray(Sema &SemaRef, QualType Ty) { - if (const auto *CAT = SemaRef.getASTContext().getAsConstantArrayType(Ty)) +static bool isZeroSizedArray(SemaSYCL &S, QualType Ty) { + if (const auto *CAT = S.getASTContext().getAsConstantArrayType(Ty)) return CAT->isZeroSize(); return false; } -void Sema::deepTypeCheckForSYCLDevice(SourceLocation UsedAt, +void SemaSYCL::deepTypeCheckForDevice(SourceLocation UsedAt, llvm::DenseSet Visited, ValueDecl *DeclToCheck) { assert(getLangOpts().SYCLIsDevice && @@ -51,18 +54,18 @@ void Sema::deepTypeCheckForSYCLDevice(SourceLocation UsedAt, auto Check = [&](QualType TypeToCheck, const ValueDecl *D) { bool ErrorFound = false; if (isZeroSizedArray(*this, TypeToCheck)) { - SYCLDiagIfDeviceCode(UsedAt, diag::err_typecheck_zero_array_size) << 1; + DiagIfDeviceCode(UsedAt, diag::err_typecheck_zero_array_size) << 1; ErrorFound = true; } // Checks for other types can also be done here. if (ErrorFound) { if (NeedToEmitNotes) { if (auto *FD = dyn_cast(D)) - SYCLDiagIfDeviceCode(FD->getLocation(), - diag::note_illegal_field_declared_here) + DiagIfDeviceCode(FD->getLocation(), + diag::note_illegal_field_declared_here) << FD->getType()->isPointerType() << FD->getType(); else - SYCLDiagIfDeviceCode(D->getLocation(), diag::note_declared_at); + DiagIfDeviceCode(D->getLocation(), diag::note_declared_at); } } @@ -93,8 +96,8 @@ void Sema::deepTypeCheckForSYCLDevice(SourceLocation UsedAt, auto EmitHistory = [&]() { // The first element is always nullptr. for (uint64_t Index = 1; Index < History.size(); ++Index) { - SYCLDiagIfDeviceCode(History[Index]->getLocation(), - diag::note_within_field_of_type) + DiagIfDeviceCode(History[Index]->getLocation(), + diag::note_within_field_of_type) << History[Index]->getType(); } }; @@ -130,3 +133,26 @@ void Sema::deepTypeCheckForSYCLDevice(SourceLocation UsedAt, } } while (!StackForRecursion.empty()); } + +ExprResult SemaSYCL::BuildUniqueStableNameExpr(SourceLocation OpLoc, + SourceLocation LParen, + SourceLocation RParen, + TypeSourceInfo *TSI) { + return SYCLUniqueStableNameExpr::Create(getASTContext(), OpLoc, LParen, + RParen, TSI); +} + +ExprResult SemaSYCL::ActOnUniqueStableNameExpr(SourceLocation OpLoc, + SourceLocation LParen, + SourceLocation RParen, + ParsedType ParsedTy) { + TypeSourceInfo *TSI = nullptr; + QualType Ty = SemaRef.GetTypeFromParser(ParsedTy, &TSI); + + if (Ty.isNull()) + return ExprError(); + if (!TSI) + TSI = getASTContext().getTrivialTypeSourceInfo(Ty, LParen); + + return BuildUniqueStableNameExpr(OpLoc, LParen, RParen, TSI); +} diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index d4d2fa61d65ea..79d60588ae536 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -40,6 +40,7 @@ #include "clang/Sema/SemaDiagnostic.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/SemaOpenACC.h" +#include "clang/Sema/SemaSYCL.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/ErrorHandling.h" #include @@ -2632,7 +2633,8 @@ class TreeTransform { SourceLocation LParen, SourceLocation RParen, TypeSourceInfo *TSI) { - return getSema().BuildSYCLUniqueStableNameExpr(OpLoc, LParen, RParen, TSI); + return getSema().SYCL().BuildUniqueStableNameExpr(OpLoc, LParen, RParen, + TSI); } /// Build a new predefined expression. From 0c7b92a42a36563dfd28e3a828e87f4f3a6e4311 Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Wed, 10 Apr 2024 07:10:24 -0700 Subject: [PATCH 013/886] [OpenACC] Implement Default clause for Compute Constructs (#88135) As a followup to my previous commits, this is an implementation of a single clause, in this case the 'default' clause. This implements all semantic analysis for it on compute clauses, and continues to leave it rejected for all others (some as 'doesnt appertain', others as 'not implemented' as appropriate). This also implements and tests the TreeTransform as requested in the previous patch. --- clang/include/clang/AST/OpenACCClause.h | 38 ++++++++++ .../clang/Basic/DiagnosticSemaKinds.td | 4 + clang/include/clang/Basic/OpenACCKinds.h | 23 ++++++ clang/include/clang/Sema/SemaOpenACC.h | 19 ++++- clang/lib/AST/OpenACCClause.cpp | 19 +++++ clang/lib/AST/StmtProfile.cpp | 5 ++ clang/lib/AST/TextNodeDumper.cpp | 11 +++ clang/lib/Parse/ParseOpenACC.cpp | 8 +- clang/lib/Sema/SemaOpenACC.cpp | 65 ++++++++++++++-- clang/lib/Sema/TreeTransform.h | 42 ++++++++++- clang/lib/Serialization/ASTReader.cpp | 7 +- clang/lib/Serialization/ASTWriter.cpp | 7 +- clang/test/ParserOpenACC/parse-clauses.c | 20 ++--- .../SemaOpenACC/compute-construct-ast.cpp | 14 +++- .../compute-construct-clause-ast.cpp | 74 +++++++++++++++++++ .../compute-construct-default-clause.c | 55 ++++++++++++++ .../compute-construct-default-clause.cpp | 39 ++++++++++ 17 files changed, 417 insertions(+), 33 deletions(-) create mode 100644 clang/test/SemaOpenACC/compute-construct-clause-ast.cpp create mode 100644 clang/test/SemaOpenACC/compute-construct-default-clause.c create mode 100644 clang/test/SemaOpenACC/compute-construct-default-clause.cpp diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h index 06a0098bbda4c..27e4e1a12c983 100644 --- a/clang/include/clang/AST/OpenACCClause.h +++ b/clang/include/clang/AST/OpenACCClause.h @@ -51,6 +51,36 @@ class OpenACCClauseWithParams : public OpenACCClause { SourceLocation getLParenLoc() const { return LParenLoc; } }; +/// A 'default' clause, has the optional 'none' or 'present' argument. +class OpenACCDefaultClause : public OpenACCClauseWithParams { + friend class ASTReaderStmt; + friend class ASTWriterStmt; + + OpenACCDefaultClauseKind DefaultClauseKind; + +protected: + OpenACCDefaultClause(OpenACCDefaultClauseKind K, SourceLocation BeginLoc, + SourceLocation LParenLoc, SourceLocation EndLoc) + : OpenACCClauseWithParams(OpenACCClauseKind::Default, BeginLoc, LParenLoc, + EndLoc), + DefaultClauseKind(K) { + assert((DefaultClauseKind == OpenACCDefaultClauseKind::None || + DefaultClauseKind == OpenACCDefaultClauseKind::Present) && + "Invalid Clause Kind"); + } + +public: + OpenACCDefaultClauseKind getDefaultClauseKind() const { + return DefaultClauseKind; + } + + static OpenACCDefaultClause *Create(const ASTContext &C, + OpenACCDefaultClauseKind K, + SourceLocation BeginLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); +}; + template class OpenACCClauseVisitor { Impl &getDerived() { return static_cast(*this); } @@ -66,6 +96,8 @@ template class OpenACCClauseVisitor { switch (C->getClauseKind()) { case OpenACCClauseKind::Default: + VisitOpenACCDefaultClause(*cast(C)); + return; case OpenACCClauseKind::Finalize: case OpenACCClauseKind::IfPresent: case OpenACCClauseKind::Seq: @@ -112,6 +144,10 @@ template class OpenACCClauseVisitor { } llvm_unreachable("Invalid Clause kind"); } + + void VisitOpenACCDefaultClause(const OpenACCDefaultClause &Clause) { + return getDerived().VisitOpenACCDefaultClause(Clause); + } }; class OpenACCClausePrinter final @@ -128,6 +164,8 @@ class OpenACCClausePrinter final } } OpenACCClausePrinter(raw_ostream &OS) : OS(OS) {} + + void VisitOpenACCDefaultClause(const OpenACCDefaultClause &Clause); }; } // namespace clang diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 64c58ab36338b..059a8f58da5db 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12265,6 +12265,10 @@ def err_acc_construct_appertainment "be used in a statement context">; def err_acc_clause_appertainment : Error<"OpenACC '%1' clause is not valid on '%0' directive">; +def err_acc_duplicate_clause_disallowed + : Error<"OpenACC '%1' clause cannot appear more than once on a '%0' " + "directive">; +def note_acc_previous_clause_here : Note<"previous clause is here">; def err_acc_branch_in_out_compute_construct : Error<"invalid %select{branch|return|throw}0 %select{out of|into}1 " "OpenACC Compute Construct">; diff --git a/clang/include/clang/Basic/OpenACCKinds.h b/clang/include/clang/Basic/OpenACCKinds.h index 95fc35a5bedb7..e191e9e0a5a15 100644 --- a/clang/include/clang/Basic/OpenACCKinds.h +++ b/clang/include/clang/Basic/OpenACCKinds.h @@ -419,6 +419,29 @@ enum class OpenACCDefaultClauseKind { Invalid, }; +template +inline StreamTy &printOpenACCDefaultClauseKind(StreamTy &Out, + OpenACCDefaultClauseKind K) { + switch (K) { + case OpenACCDefaultClauseKind::None: + return Out << "none"; + case OpenACCDefaultClauseKind::Present: + return Out << "present"; + case OpenACCDefaultClauseKind::Invalid: + return Out << ""; + } +} + +inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &Out, + OpenACCDefaultClauseKind K) { + return printOpenACCDefaultClauseKind(Out, K); +} + +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &Out, + OpenACCDefaultClauseKind K) { + return printOpenACCDefaultClauseKind(Out, K); +} + enum class OpenACCReductionOperator { /// '+'. Addition, diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h index 45929e4a9db3f..27aaee164a288 100644 --- a/clang/include/clang/Sema/SemaOpenACC.h +++ b/clang/include/clang/Sema/SemaOpenACC.h @@ -19,6 +19,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Sema/Ownership.h" #include "clang/Sema/SemaBase.h" +#include namespace clang { class OpenACCClause; @@ -35,7 +36,11 @@ class SemaOpenACC : public SemaBase { SourceRange ClauseRange; SourceLocation LParenLoc; - // TODO OpenACC: Add variant here to store details of individual clauses. + struct DefaultDetails { + OpenACCDefaultClauseKind DefaultClauseKind; + }; + + std::variant Details; public: OpenACCParsedClause(OpenACCDirectiveKind DirKind, @@ -52,8 +57,20 @@ class SemaOpenACC : public SemaBase { SourceLocation getEndLoc() const { return ClauseRange.getEnd(); } + OpenACCDefaultClauseKind getDefaultClauseKind() const { + assert(ClauseKind == OpenACCClauseKind::Default && + "Parsed clause is not a default clause"); + return std::get(Details).DefaultClauseKind; + } + void setLParenLoc(SourceLocation EndLoc) { LParenLoc = EndLoc; } void setEndLoc(SourceLocation EndLoc) { ClauseRange.setEnd(EndLoc); } + + void setDefaultDetails(OpenACCDefaultClauseKind DefKind) { + assert(ClauseKind == OpenACCClauseKind::Default && + "Parsed clause is not a default clause"); + Details = DefaultDetails{DefKind}; + } }; SemaOpenACC(Sema &S); diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp index e1db872f25c32..c83128b60e3ac 100644 --- a/clang/lib/AST/OpenACCClause.cpp +++ b/clang/lib/AST/OpenACCClause.cpp @@ -15,3 +15,22 @@ #include "clang/AST/ASTContext.h" using namespace clang; + +OpenACCDefaultClause *OpenACCDefaultClause::Create(const ASTContext &C, + OpenACCDefaultClauseKind K, + SourceLocation BeginLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + void *Mem = + C.Allocate(sizeof(OpenACCDefaultClause), alignof(OpenACCDefaultClause)); + + return new (Mem) OpenACCDefaultClause(K, BeginLoc, LParenLoc, EndLoc); +} + +//===----------------------------------------------------------------------===// +// OpenACC clauses printing methods +//===----------------------------------------------------------------------===// +void OpenACCClausePrinter::VisitOpenACCDefaultClause( + const OpenACCDefaultClause &C) { + OS << "default(" << C.getDefaultClauseKind() << ")"; +} diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index bec8bc71f5554..be3dd4b673cf9 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -2456,7 +2456,12 @@ class OpenACCClauseProfiler Visit(Clause); } } + void VisitOpenACCDefaultClause(const OpenACCDefaultClause &Clause); }; + +/// Nothing to do here, there are no sub-statements. +void OpenACCClauseProfiler::VisitOpenACCDefaultClause( + const OpenACCDefaultClause &Clause) {} } // namespace void StmtProfiler::VisitOpenACCComputeConstruct( diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 431f5d8bdb2b5..085a7f51ce99a 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -390,6 +390,17 @@ void TextNodeDumper::Visit(const OpenACCClause *C) { { ColorScope Color(OS, ShowColors, AttrColor); OS << C->getClauseKind(); + + // Handle clauses with parens for types that have no children, likely + // because there is no sub expression. + switch (C->getClauseKind()) { + case OpenACCClauseKind::Default: + OS << '(' << cast(C)->getDefaultClauseKind() << ')'; + break; + default: + // Nothing to do here. + break; + } } dumpPointer(C); dumpSourceRange(SourceRange(C->getBeginLoc(), C->getEndLoc())); diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp index f434e1542c801..59a4a5f534676 100644 --- a/clang/lib/Parse/ParseOpenACC.cpp +++ b/clang/lib/Parse/ParseOpenACC.cpp @@ -831,9 +831,13 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams( ConsumeToken(); - if (getOpenACCDefaultClauseKind(DefKindTok) == - OpenACCDefaultClauseKind::Invalid) + OpenACCDefaultClauseKind DefKind = + getOpenACCDefaultClauseKind(DefKindTok); + + if (DefKind == OpenACCDefaultClauseKind::Invalid) Diag(DefKindTok, diag::err_acc_invalid_default_clause_kind); + else + ParsedClause.setDefaultDetails(DefKind); break; } diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 2ba1e49b5739d..b6afb80b873e2 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -39,11 +39,27 @@ bool diagnoseConstructAppertainment(SemaOpenACC &S, OpenACCDirectiveKind K, bool doesClauseApplyToDirective(OpenACCDirectiveKind DirectiveKind, OpenACCClauseKind ClauseKind) { - // FIXME: For each clause as we implement them, we can add the - // 'legalization' list here. - - // Do nothing so we can go to the 'unimplemented' diagnostic instead. - return true; + switch (ClauseKind) { + // FIXME: For each clause as we implement them, we can add the + // 'legalization' list here. + case OpenACCClauseKind::Default: + switch (DirectiveKind) { + case OpenACCDirectiveKind::Parallel: + case OpenACCDirectiveKind::Serial: + case OpenACCDirectiveKind::Kernels: + case OpenACCDirectiveKind::ParallelLoop: + case OpenACCDirectiveKind::SerialLoop: + case OpenACCDirectiveKind::KernelsLoop: + case OpenACCDirectiveKind::Data: + return true; + default: + return false; + } + default: + // Do nothing so we can go to the 'unimplemented' diagnostic instead. + return true; + } + llvm_unreachable("Invalid clause kind"); } } // namespace @@ -63,8 +79,43 @@ SemaOpenACC::ActOnClause(ArrayRef ExistingClauses, return nullptr; } - // TODO OpenACC: Switch over the clauses we implement here and 'create' - // them. + switch (Clause.getClauseKind()) { + case OpenACCClauseKind::Default: { + // Restrictions only properly implemented on 'compute' constructs, and + // 'compute' constructs are the only construct that can do anything with + // this yet, so skip/treat as unimplemented in this case. + if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Parallel && + Clause.getDirectiveKind() != OpenACCDirectiveKind::Serial && + Clause.getDirectiveKind() != OpenACCDirectiveKind::Kernels) + break; + + // Don't add an invalid clause to the AST. + if (Clause.getDefaultClauseKind() == OpenACCDefaultClauseKind::Invalid) + return nullptr; + + // OpenACC 3.3, Section 2.5.4: + // At most one 'default' clause may appear, and it must have a value of + // either 'none' or 'present'. + // Second half of the sentence is diagnosed during parsing. + auto Itr = llvm::find_if(ExistingClauses, [](const OpenACCClause *C) { + return C->getClauseKind() == OpenACCClauseKind::Default; + }); + + if (Itr != ExistingClauses.end()) { + SemaRef.Diag(Clause.getBeginLoc(), + diag::err_acc_duplicate_clause_disallowed) + << Clause.getDirectiveKind() << Clause.getClauseKind(); + SemaRef.Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here); + return nullptr; + } + + return OpenACCDefaultClause::Create( + getASTContext(), Clause.getDefaultClauseKind(), Clause.getBeginLoc(), + Clause.getLParenLoc(), Clause.getEndLoc()); + } + default: + break; + } Diag(Clause.getBeginLoc(), diag::warn_acc_clause_unimplemented) << Clause.getClauseKind(); diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 79d60588ae536..33a9356e82f40 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -4035,6 +4035,11 @@ class TreeTransform { llvm::SmallVector TransformOpenACCClauseList(OpenACCDirectiveKind DirKind, ArrayRef OldClauses); + + OpenACCClause * + TransformOpenACCClause(ArrayRef ExistingClauses, + OpenACCDirectiveKind DirKind, + const OpenACCClause *OldClause); }; template @@ -11075,13 +11080,44 @@ OMPClause *TreeTransform::TransformOMPXBareClause(OMPXBareClause *C) { //===----------------------------------------------------------------------===// // OpenACC transformation //===----------------------------------------------------------------------===// +template +OpenACCClause *TreeTransform::TransformOpenACCClause( + ArrayRef ExistingClauses, + OpenACCDirectiveKind DirKind, const OpenACCClause *OldClause) { + + SemaOpenACC::OpenACCParsedClause ParsedClause( + DirKind, OldClause->getClauseKind(), OldClause->getBeginLoc()); + ParsedClause.setEndLoc(OldClause->getEndLoc()); + + if (const auto *WithParms = dyn_cast(OldClause)) + ParsedClause.setLParenLoc(WithParms->getLParenLoc()); + + switch (OldClause->getClauseKind()) { + case OpenACCClauseKind::Default: + // There is nothing to do here as nothing dependent can appear in this + // clause. So just set the values so Sema can set the right value. + ParsedClause.setDefaultDetails( + cast(OldClause)->getDefaultClauseKind()); + break; + default: + assert(false && "Unhandled OpenACC clause in TreeTransform"); + return nullptr; + } + + return getSema().OpenACC().ActOnClause(ExistingClauses, ParsedClause); +} + template llvm::SmallVector TreeTransform::TransformOpenACCClauseList( OpenACCDirectiveKind DirKind, ArrayRef OldClauses) { - // TODO OpenACC: Ensure we loop through the list and transform the individual - // clauses. - return {}; + llvm::SmallVector TransformedClauses; + for (const auto *Clause : OldClauses) { + if (OpenACCClause *TransformedClause = getDerived().TransformOpenACCClause( + TransformedClauses, DirKind, Clause)) + TransformedClauses.push_back(TransformedClause); + } + return TransformedClauses; } template diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index fa5bb9f2d5435..679302e7a838f 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11763,7 +11763,12 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() { [[maybe_unused]] SourceLocation EndLoc = readSourceLocation(); switch (ClauseKind) { - case OpenACCClauseKind::Default: + case OpenACCClauseKind::Default: { + SourceLocation LParenLoc = readSourceLocation(); + OpenACCDefaultClauseKind DCK = readEnum(); + return OpenACCDefaultClause::Create(getContext(), DCK, BeginLoc, LParenLoc, + EndLoc); + } case OpenACCClauseKind::Finalize: case OpenACCClauseKind::IfPresent: case OpenACCClauseKind::Seq: diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index baf03f69d7306..4cd74b1ba9d72 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7406,7 +7406,12 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { writeSourceLocation(C->getEndLoc()); switch (C->getClauseKind()) { - case OpenACCClauseKind::Default: + case OpenACCClauseKind::Default: { + const auto *DC = cast(C); + writeSourceLocation(DC->getLParenLoc()); + writeEnum(DC->getDefaultClauseKind()); + return; + } case OpenACCClauseKind::Finalize: case OpenACCClauseKind::IfPresent: case OpenACCClauseKind::Seq: diff --git a/clang/test/ParserOpenACC/parse-clauses.c b/clang/test/ParserOpenACC/parse-clauses.c index b58b332ad3245..b363a0cb1362b 100644 --- a/clang/test/ParserOpenACC/parse-clauses.c +++ b/clang/test/ParserOpenACC/parse-clauses.c @@ -173,45 +173,37 @@ void DefaultClause() { #pragma acc serial default), seq for(;;){} - // expected-error@+2{{expected identifier}} - // expected-warning@+1{{OpenACC clause 'default' not yet implemented, clause ignored}} + // expected-error@+1{{expected identifier}} #pragma acc serial default() for(;;){} - // expected-error@+3{{expected identifier}} - // expected-warning@+2{{OpenACC clause 'default' not yet implemented, clause ignored}} + // expected-error@+2{{expected identifier}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial default() seq for(;;){} - // expected-error@+3{{expected identifier}} - // expected-warning@+2{{OpenACC clause 'default' not yet implemented, clause ignored}} + // expected-error@+2{{expected identifier}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial default(), seq for(;;){} - // expected-error@+2{{invalid value for 'default' clause; expected 'present' or 'none'}} - // expected-warning@+1{{OpenACC clause 'default' not yet implemented, clause ignored}} + // expected-error@+1{{invalid value for 'default' clause; expected 'present' or 'none'}} #pragma acc serial default(invalid) for(;;){} - // expected-error@+3{{invalid value for 'default' clause; expected 'present' or 'none'}} - // expected-warning@+2{{OpenACC clause 'default' not yet implemented, clause ignored}} + // expected-error@+2{{invalid value for 'default' clause; expected 'present' or 'none'}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial default(auto) seq for(;;){} - // expected-error@+3{{invalid value for 'default' clause; expected 'present' or 'none'}} - // expected-warning@+2{{OpenACC clause 'default' not yet implemented, clause ignored}} + // expected-error@+2{{invalid value for 'default' clause; expected 'present' or 'none'}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial default(invalid), seq for(;;){} - // expected-warning@+1{{OpenACC clause 'default' not yet implemented, clause ignored}} #pragma acc serial default(none) for(;;){} - // expected-warning@+2{{OpenACC clause 'default' not yet implemented, clause ignored}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial default(present), seq for(;;){} diff --git a/clang/test/SemaOpenACC/compute-construct-ast.cpp b/clang/test/SemaOpenACC/compute-construct-ast.cpp index 55c080838a188..e632522f877b5 100644 --- a/clang/test/SemaOpenACC/compute-construct-ast.cpp +++ b/clang/test/SemaOpenACC/compute-construct-ast.cpp @@ -12,14 +12,16 @@ void NormalFunc() { // CHECK-LABEL: NormalFunc // CHECK-NEXT: CompoundStmt // CHECK-NEXT: OpenACCComputeConstruct {{.*}}parallel + // CHECK-NEXT: default(none) // CHECK-NEXT: CompoundStmt -#pragma acc parallel +#pragma acc parallel default(none) { #pragma acc parallel // CHECK-NEXT: OpenACCComputeConstruct {{.*}}parallel // CHECK-NEXT: OpenACCComputeConstruct {{.*}}parallel + // CHECK-NEXT: default(present) // CHECK-NEXT: CompoundStmt -#pragma acc parallel +#pragma acc parallel default(present) {} } // FIXME: Add a test once we have clauses for this. @@ -50,12 +52,12 @@ void NormalFunc() { template void TemplFunc() { -#pragma acc parallel +#pragma acc parallel default(none) { typename T::type I; } -#pragma acc serial +#pragma acc serial default(present) { typename T::type I; } @@ -72,10 +74,12 @@ void TemplFunc() { // CHECK-NEXT: FunctionDecl // CHECK-NEXT: CompoundStmt // CHECK-NEXT: OpenACCComputeConstruct {{.*}}parallel + // CHECK-NEXT: default(none) // CHECK-NEXT: CompoundStmt // CHECK-NEXT: DeclStmt // CHECK-NEXT: VarDecl{{.*}} I 'typename T::type' // CHECK-NEXT: OpenACCComputeConstruct {{.*}}serial + // CHECK-NEXT: default(present) // CHECK-NEXT: CompoundStmt // CHECK-NEXT: DeclStmt // CHECK-NEXT: VarDecl{{.*}} I 'typename T::type' @@ -91,10 +95,12 @@ void TemplFunc() { // CHECK-NEXT: CXXRecord // CHECK-NEXT: CompoundStmt // CHECK-NEXT: OpenACCComputeConstruct {{.*}}parallel + // CHECK-NEXT: default(none) // CHECK-NEXT: CompoundStmt // CHECK-NEXT: DeclStmt // CHECK-NEXT: VarDecl{{.*}} I 'typename S::type':'int' // CHECK-NEXT: OpenACCComputeConstruct {{.*}}serial + // CHECK-NEXT: default(present) // CHECK-NEXT: CompoundStmt // CHECK-NEXT: DeclStmt // CHECK-NEXT: VarDecl{{.*}} I 'typename S::type':'int' diff --git a/clang/test/SemaOpenACC/compute-construct-clause-ast.cpp b/clang/test/SemaOpenACC/compute-construct-clause-ast.cpp new file mode 100644 index 0000000000000..bd80103445028 --- /dev/null +++ b/clang/test/SemaOpenACC/compute-construct-clause-ast.cpp @@ -0,0 +1,74 @@ +// RUN: %clang_cc1 %s -fopenacc -ast-dump | FileCheck %s + +// Test this with PCH. +// RUN: %clang_cc1 %s -fopenacc -emit-pch -o %t %s +// RUN: %clang_cc1 %s -fopenacc -include-pch %t -ast-dump-all | FileCheck %s + +#ifndef PCH_HELPER +#define PCH_HELPER +void NormalFunc() { + // CHECK: FunctionDecl{{.*}}NormalFunc + // CHECK-NEXT: CompoundStmt +#pragma acc parallel default(none) + while(true); + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel + // CHECK-NEXT: default(none) + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt + +#pragma acc serial default(present) + while(true); + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial + // CHECK-NEXT: default(present) + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt +} + +template +void TemplFunc() { + // CHECK: FunctionTemplateDecl{{.*}}TemplFunc + // CHECK-NEXT: TemplateTypeParmDecl + + // Match the prototype: + // CHECK-NEXT: FunctionDecl{{.*}}TemplFunc + // CHECK-NEXT: CompoundStmt + +#pragma acc kernels default(none) + while(true); + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}kernels + // CHECK-NEXT: default(none) + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt + +#pragma acc parallel default(present) + while(true); + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel + // CHECK-NEXT: default(present) + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt + + // Match the instantiation: + // CHECK: FunctionDecl{{.*}}TemplFunc{{.*}}implicit_instantiation + // CHECK-NEXT: TemplateArgument type 'int' + // CHECK-NEXT: BuiltinType + // CHECK-NEXT: CompoundStmt + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}kernels + // CHECK-NEXT: default(none) + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel + // CHECK-NEXT: default(present) + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt +} + +void Instantiate() { + TemplFunc(); +} +#endif diff --git a/clang/test/SemaOpenACC/compute-construct-default-clause.c b/clang/test/SemaOpenACC/compute-construct-default-clause.c new file mode 100644 index 0000000000000..b1235fcca1f6a --- /dev/null +++ b/clang/test/SemaOpenACC/compute-construct-default-clause.c @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 %s -fopenacc -verify + +void SingleOnly() { + #pragma acc parallel default(none) + while(0); + + // expected-warning@+3{{OpenACC clause 'seq' not yet implemented}} + // expected-error@+2{{OpenACC 'default' clause cannot appear more than once on a 'serial' directive}} + // expected-note@+1{{previous clause is here}} + #pragma acc serial default(present) seq default(none) + while(0); + + // expected-warning@+5{{OpenACC clause 'seq' not yet implemented}} + // expected-warning@+4{{OpenACC clause 'seq' not yet implemented}} + // expected-warning@+3{{OpenACC clause 'seq' not yet implemented}} + // expected-error@+2{{OpenACC 'default' clause cannot appear more than once on a 'kernels' directive}} + // expected-note@+1{{previous clause is here}} + #pragma acc kernels seq default(present) seq default(none) seq + while(0); + + // expected-warning@+6{{OpenACC construct 'parallel loop' not yet implemented}} + // expected-warning@+5{{OpenACC clause 'seq' not yet implemented}} + // expected-warning@+4{{OpenACC clause 'seq' not yet implemented}} + // expected-warning@+3{{OpenACC clause 'seq' not yet implemented}} + // expected-warning@+2{{OpenACC clause 'default' not yet implemented}} + // expected-warning@+1{{OpenACC clause 'default' not yet implemented}} + #pragma acc parallel loop seq default(present) seq default(none) seq + while(0); + + // expected-warning@+3{{OpenACC construct 'serial loop' not yet implemented}} + // expected-warning@+2{{OpenACC clause 'seq' not yet implemented}} + // expected-error@+1{{expected '('}} + #pragma acc serial loop seq default seq default(none) seq + while(0); + + // expected-warning@+2{{OpenACC construct 'kernels loop' not yet implemented}} + // expected-warning@+1{{OpenACC clause 'default' not yet implemented}} + #pragma acc kernels loop default(none) + while(0); + + // expected-warning@+2{{OpenACC construct 'data' not yet implemented}} + // expected-warning@+1{{OpenACC clause 'default' not yet implemented}} + #pragma acc data default(none) + while(0); + + // expected-warning@+2{{OpenACC construct 'loop' not yet implemented}} + // expected-error@+1{{OpenACC 'default' clause is not valid on 'loop' directive}} + #pragma acc loop default(none) + while(0); + + // expected-warning@+2{{OpenACC construct 'wait' not yet implemented}} + // expected-error@+1{{OpenACC 'default' clause is not valid on 'wait' directive}} + #pragma acc wait default(none) + while(0); +} diff --git a/clang/test/SemaOpenACC/compute-construct-default-clause.cpp b/clang/test/SemaOpenACC/compute-construct-default-clause.cpp new file mode 100644 index 0000000000000..2c3e711ffd085 --- /dev/null +++ b/clang/test/SemaOpenACC/compute-construct-default-clause.cpp @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 %s -fopenacc -verify + +template +void SingleOnly() { + #pragma acc parallel default(none) + while(false); + + // expected-warning@+3{{OpenACC clause 'seq' not yet implemented}} + // expected-error@+2{{OpenACC 'default' clause cannot appear more than once on a 'parallel' directive}} + // expected-note@+1{{previous clause is here}} + #pragma acc parallel default(present) seq default(none) + while(false); + + // expected-warning@+5{{OpenACC clause 'seq' not yet implemented}} + // expected-warning@+4{{OpenACC clause 'seq' not yet implemented}} + // expected-warning@+3{{OpenACC clause 'seq' not yet implemented}} + // expected-error@+2{{OpenACC 'default' clause cannot appear more than once on a 'serial' directive}} + // expected-note@+1{{previous clause is here}} + #pragma acc serial seq default(present) seq default(none) seq + while(false); + + // expected-warning@+5{{OpenACC clause 'seq' not yet implemented}} + // expected-warning@+4{{OpenACC clause 'seq' not yet implemented}} + // expected-warning@+3{{OpenACC clause 'seq' not yet implemented}} + // expected-error@+2{{OpenACC 'default' clause cannot appear more than once on a 'kernels' directive}} + // expected-note@+1{{previous clause is here}} + #pragma acc kernels seq default(present) seq default(none) seq + while(false); + + // expected-warning@+3{{OpenACC clause 'seq' not yet implemented}} + // expected-warning@+2{{OpenACC clause 'seq' not yet implemented}} + // expected-error@+1{{expected '('}} + #pragma acc parallel seq default(none) seq default seq + while(false); +} + +void Instantiate() { + SingleOnly(); +} From 5ae9ffbd18fd93edbbc8efebe140aeb24cd763c2 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 10 Apr 2024 07:16:52 -0700 Subject: [PATCH 014/886] [RISCV] Address review comment from 88062 As pointed out by Fraser, KillSrcReg is always false at this point in code, and having the inconcistency on whether we check the flag between the if and else blocks is confusing. --- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 46e79272d60eb..84af6eec40ee6 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -210,8 +210,7 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, unsigned Opc = NumOfVReg == 2 ? RISCV::SH1ADD : (NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD); BuildMI(MBB, II, DL, TII->get(Opc), DestReg) - .addReg(ScratchReg, RegState::Kill) - .addReg(SrcReg, getKillRegState(KillSrcReg)) + .addReg(ScratchReg, RegState::Kill).addReg(SrcReg) .setMIFlag(Flag); } else { TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag); From a8f9f85ab0114deb0f6adae2b578bc39c62c19b3 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 10 Apr 2024 10:00:48 -0500 Subject: [PATCH 015/886] [Libomptarget][NFC] Fix unused variable warnings Summary: This patch fixes a few warnings that would show up while building. --- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp | 4 ++-- openmp/libomptarget/src/interface.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp index a0fdde951b74a..00650b801b420 100644 --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -1885,8 +1885,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { // Get the frequency of the steady clock. If the attribute is missing // assume running on an older libhsa and default to 0, omp_get_wtime // will be inaccurate but otherwise programs can still run. - if (auto Err = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY, - ClockFrequency)) + if (getDeviceAttrRaw(HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY, + ClockFrequency) != HSA_STATUS_SUCCESS) ClockFrequency = 0; // Load the grid values dependending on the wavefront. diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index b562ba8818c39..557703632c625 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -495,7 +495,7 @@ EXTERN void __tgt_target_nowait_query(void **AsyncHandle) { if (QueryCounter.isAboveThreshold()) AsyncInfo->SyncType = AsyncInfoTy::SyncTy::BLOCKING; - if (const int Rc = AsyncInfo->synchronize()) + if (AsyncInfo->synchronize()) FATAL_MESSAGE0(1, "Error while querying the async queue for completion.\n"); // If there are device operations still pending, return immediately without // deallocating the handle and increase the current thread query count. From 2bf48892ab0ce5d53126c7b114070bba18521501 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Wed, 10 Apr 2024 11:16:00 -0400 Subject: [PATCH 016/886] [HIP] document difference with CUDA (#86838) --- clang/docs/HIPSupport.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst index 543c82cf90244..5ba84c2f67055 100644 --- a/clang/docs/HIPSupport.rst +++ b/clang/docs/HIPSupport.rst @@ -208,6 +208,20 @@ Host Code Compilation - These relocatable objects are then linked together. - Host code within a TU can call host functions and launch kernels from another TU. +Syntax Difference with CUDA +=========================== + +Clang's front end, used for both CUDA and HIP programming models, shares the same parsing and semantic analysis mechanisms. This includes the resolution of overloads concerning device and host functions. While there exists a comprehensive documentation on the syntax differences between Clang and NVCC for CUDA at `Dialect Differences Between Clang and NVCC `_, it is important to note that these differences also apply to HIP code compilation. + +Predefined Macros for Differentiation +------------------------------------- + +To facilitate differentiation between HIP and CUDA code, as well as between device and host compilations within HIP, Clang defines specific macros: + +- ``__HIP__`` : This macro is defined only when compiling HIP code. It can be used to conditionally compile code specific to HIP, enabling developers to write portable code that can be compiled for both CUDA and HIP. + +- ``__HIP_DEVICE_COMPILE__`` : Defined exclusively during HIP device compilation, this macro allows for conditional compilation of device-specific code. It provides a mechanism to segregate device and host code, ensuring that each can be optimized for their respective execution environments. + Function Pointers Support ========================= From 6ca5a410d26262f06f954e91200eefe0cbfb7fb8 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 10 Apr 2024 08:16:43 -0700 Subject: [PATCH 017/886] [SLP]Fix PR87358: broken module, Instruction does not dominate all uses. If the first node is a gather node with extractelement instructions, still need to put the vector value after all instructions, not after the very first one. --- .../lib/Transforms/Vectorize/SLPVectorizer.cpp | 10 +++++++--- .../X86/extractlements-gathered-first-node.ll | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/extractlements-gathered-first-node.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 22ef9b5fb994e..6b758f63a7961 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10736,9 +10736,13 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { [](Value *V) { return !isa(V) && isa(V); })) || - all_of(E->Scalars, [](Value *V) { - return !isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V); - })) + all_of(E->Scalars, + [](Value *V) { + return !isVectorLikeInstWithConstOps(V) && + isUsedOutsideBlock(V); + }) || + (E->State == TreeEntry::NeedToGather && E->Idx == 0 && + all_of(E->Scalars, IsaPred))) Res.second = FindLastInst(); else Res.second = FindFirstInst(); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractlements-gathered-first-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractlements-gathered-first-node.ll new file mode 100644 index 0000000000000..57fa83b1ccdd6 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractlements-gathered-first-node.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define void @test() { +; CHECK-LABEL: define void @test() { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> zeroinitializer, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> zeroinitializer, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> , i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: ret void +; +bb: + %0 = extractelement <4 x i32> zeroinitializer, i32 0 + %1 = extractelement <2 x i32> zeroinitializer, i32 0 + %icmp = icmp ult i32 %0, %1 + ret void +} From 7f1b9adfc8d86c77ee87a268b3d30e0eda8ed493 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 10 Apr 2024 08:39:56 -0700 Subject: [PATCH 018/886] [RISCV] Add MachineCombiner to fold (sh3add Z, (add X, (slli Y, 6))) -> (sh3add (sh3add Y, Z), X). (#87884) This improves a pattern that occurs in 531.deepsjeng_r. Reducing the dynamic instruction count by 0.5%. This may be possible to improve in SelectionDAG, but given the special cases around shXadd formation, it's not obvious it can be done in a robust way without adding multiple special cases. I've used a GEP with 2 indices because that mostly closely resembles the motivating case. Most of the test cases are the simplest GEP case. One test has a logical right shift on an index which is closer to the deepsjeng code. This requires special handling in isel to reverse a DAGCombiner canonicalization that turns a pair of shifts into (srl (and X, C1), C2). --- .../llvm/CodeGen/MachineCombinerPattern.h | 2 + llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 151 ++++++++++++++++++ llvm/test/CodeGen/RISCV/rv64zba.ll | 40 ++--- 3 files changed, 169 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h index 89eed7463bd78..41b73eaae0298 100644 --- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h @@ -175,6 +175,8 @@ enum class MachineCombinerPattern { FMADD_XA, FMSUB, FNMSUB, + SHXADD_ADD_SLLI_OP1, + SHXADD_ADD_SLLI_OP2, // X86 VNNI DPWSSD, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index be63bc936ae8a..6b75efe684d91 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1775,6 +1775,86 @@ static bool getFPPatterns(MachineInstr &Root, return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce); } +/// Utility routine that checks if \param MO is defined by an +/// \param CombineOpc instruction in the basic block \param MBB +static const MachineInstr *canCombine(const MachineBasicBlock &MBB, + const MachineOperand &MO, + unsigned CombineOpc) { + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const MachineInstr *MI = nullptr; + + if (MO.isReg() && MO.getReg().isVirtual()) + MI = MRI.getUniqueVRegDef(MO.getReg()); + // And it needs to be in the trace (otherwise, it won't have a depth). + if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc) + return nullptr; + // Must only used by the user we combine with. + if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) + return nullptr; + + return MI; +} + +/// Utility routine that checks if \param MO is defined by a SLLI in \param +/// MBB that can be combined by splitting across 2 SHXADD instructions. The +/// first SHXADD shift amount is given by \param OuterShiftAmt. +static bool canCombineShiftIntoShXAdd(const MachineBasicBlock &MBB, + const MachineOperand &MO, + unsigned OuterShiftAmt) { + const MachineInstr *ShiftMI = canCombine(MBB, MO, RISCV::SLLI); + if (!ShiftMI) + return false; + + unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm(); + if (InnerShiftAmt < OuterShiftAmt || (InnerShiftAmt - OuterShiftAmt) > 3) + return false; + + return true; +} + +// Returns the shift amount from a SHXADD instruction. Returns 0 if the +// instruction is not a SHXADD. +static unsigned getSHXADDShiftAmount(unsigned Opc) { + switch (Opc) { + default: + return 0; + case RISCV::SH1ADD: + return 1; + case RISCV::SH2ADD: + return 2; + case RISCV::SH3ADD: + return 3; + } +} + +// Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into +// (sh3add (sh2add Y, Z), X). +static bool +getSHXADDPatterns(const MachineInstr &Root, + SmallVectorImpl &Patterns) { + unsigned ShiftAmt = getSHXADDShiftAmount(Root.getOpcode()); + if (!ShiftAmt) + return false; + + const MachineBasicBlock &MBB = *Root.getParent(); + + const MachineInstr *AddMI = canCombine(MBB, Root.getOperand(2), RISCV::ADD); + if (!AddMI) + return false; + + bool Found = false; + if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(1), ShiftAmt)) { + Patterns.push_back(MachineCombinerPattern::SHXADD_ADD_SLLI_OP1); + Found = true; + } + if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(2), ShiftAmt)) { + Patterns.push_back(MachineCombinerPattern::SHXADD_ADD_SLLI_OP2); + Found = true; + } + + return Found; +} + bool RISCVInstrInfo::getMachineCombinerPatterns( MachineInstr &Root, SmallVectorImpl &Patterns, bool DoRegPressureReduce) const { @@ -1782,6 +1862,9 @@ bool RISCVInstrInfo::getMachineCombinerPatterns( if (getFPPatterns(Root, Patterns, DoRegPressureReduce)) return true; + if (getSHXADDPatterns(Root, Patterns)) + return true; + return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, DoRegPressureReduce); } @@ -1864,6 +1947,68 @@ static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev, DelInstrs.push_back(&Root); } +// Combine patterns like (sh3add Z, (add X, (slli Y, 5))) to +// (sh3add (sh2add Y, Z), X) if the shift amount can be split across two +// shXadd instructions. The outer shXadd keeps its original opcode. +static void +genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) { + MachineFunction *MF = Root.getMF(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + unsigned OuterShiftAmt = getSHXADDShiftAmount(Root.getOpcode()); + assert(OuterShiftAmt != 0 && "Unexpected opcode"); + + MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); + MachineInstr *ShiftMI = + MRI.getUniqueVRegDef(AddMI->getOperand(AddOpIdx).getReg()); + + unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm(); + assert(InnerShiftAmt > OuterShiftAmt && "Unexpected shift amount"); + + unsigned InnerOpc; + switch (InnerShiftAmt - OuterShiftAmt) { + default: + llvm_unreachable("Unexpected shift amount"); + case 0: + InnerOpc = RISCV::ADD; + break; + case 1: + InnerOpc = RISCV::SH1ADD; + break; + case 2: + InnerOpc = RISCV::SH2ADD; + break; + case 3: + InnerOpc = RISCV::SH3ADD; + break; + } + + const MachineOperand &X = AddMI->getOperand(3 - AddOpIdx); + const MachineOperand &Y = ShiftMI->getOperand(1); + const MachineOperand &Z = Root.getOperand(1); + + Register NewVR = MRI.createVirtualRegister(&RISCV::GPRRegClass); + + auto MIB1 = BuildMI(*MF, MIMetadata(Root), TII->get(InnerOpc), NewVR) + .addReg(Y.getReg(), getKillRegState(Y.isKill())) + .addReg(Z.getReg(), getKillRegState(Z.isKill())); + auto MIB2 = BuildMI(*MF, MIMetadata(Root), TII->get(Root.getOpcode()), + Root.getOperand(0).getReg()) + .addReg(NewVR, RegState::Kill) + .addReg(X.getReg(), getKillRegState(X.isKill())); + + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + InsInstrs.push_back(MIB1); + InsInstrs.push_back(MIB2); + DelInstrs.push_back(ShiftMI); + DelInstrs.push_back(AddMI); + DelInstrs.push_back(&Root); +} + void RISCVInstrInfo::genAlternativeCodeSequence( MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl &InsInstrs, @@ -1887,6 +2032,12 @@ void RISCVInstrInfo::genAlternativeCodeSequence( combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs); return; } + case MachineCombinerPattern::SHXADD_ADD_SLLI_OP1: + genShXAddAddShift(Root, 1, InsInstrs, DelInstrs, InstrIdxForVirtReg); + return; + case MachineCombinerPattern::SHXADD_ADD_SLLI_OP2: + genShXAddAddShift(Root, 2, InsInstrs, DelInstrs, InstrIdxForVirtReg); + return; } } diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 7e32253c8653f..067addc819f7e 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -1404,9 +1404,8 @@ define i64 @sh6_sh3_add2(i64 noundef %x, i64 noundef %y, i64 noundef %z) { ; ; RV64ZBA-LABEL: sh6_sh3_add2: ; RV64ZBA: # %bb.0: # %entry -; RV64ZBA-NEXT: slli a1, a1, 6 -; RV64ZBA-NEXT: add a0, a1, a0 -; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: sh3add a1, a1, a2 +; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ret entry: %shl = shl i64 %z, 3 @@ -2111,9 +2110,8 @@ define i64 @array_index_sh1_sh3(ptr %p, i64 %idx1, i64 %idx2) { ; ; RV64ZBA-LABEL: array_index_sh1_sh3: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: slli a1, a1, 4 -; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a1, a1, a2 +; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret %a = getelementptr inbounds [2 x i64], ptr %p, i64 %idx1, i64 %idx2 @@ -2174,9 +2172,8 @@ define i32 @array_index_sh2_sh2(ptr %p, i64 %idx1, i64 %idx2) { ; ; RV64ZBA-LABEL: array_index_sh2_sh2: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: slli a1, a1, 4 -; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: sh2add a1, a1, a2 +; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret %a = getelementptr inbounds [4 x i32], ptr %p, i64 %idx1, i64 %idx2 @@ -2196,9 +2193,8 @@ define i64 @array_index_sh2_sh3(ptr %p, i64 %idx1, i64 %idx2) { ; ; RV64ZBA-LABEL: array_index_sh2_sh3: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: slli a1, a1, 5 -; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: sh2add a1, a1, a2 +; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret %a = getelementptr inbounds [4 x i64], ptr %p, i64 %idx1, i64 %idx2 @@ -2238,9 +2234,8 @@ define i16 @array_index_sh3_sh1(ptr %p, i64 %idx1, i64 %idx2) { ; ; RV64ZBA-LABEL: array_index_sh3_sh1: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: slli a1, a1, 4 -; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: sh3add a1, a1, a2 +; RV64ZBA-NEXT: sh1add a0, a1, a0 ; RV64ZBA-NEXT: lh a0, 0(a0) ; RV64ZBA-NEXT: ret %a = getelementptr inbounds [8 x i16], ptr %p, i64 %idx1, i64 %idx2 @@ -2260,9 +2255,8 @@ define i32 @array_index_sh3_sh2(ptr %p, i64 %idx1, i64 %idx2) { ; ; RV64ZBA-LABEL: array_index_sh3_sh2: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: slli a1, a1, 5 -; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: sh3add a1, a1, a2 +; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret %a = getelementptr inbounds [8 x i32], ptr %p, i64 %idx1, i64 %idx2 @@ -2282,9 +2276,8 @@ define i64 @array_index_sh3_sh3(ptr %p, i64 %idx1, i64 %idx2) { ; ; RV64ZBA-LABEL: array_index_sh3_sh3: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: slli a1, a1, 6 -; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: sh3add a1, a1, a2 +; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret %a = getelementptr inbounds [8 x i64], ptr %p, i64 %idx1, i64 %idx2 @@ -2308,9 +2301,8 @@ define i64 @array_index_lshr_sh3_sh3(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-LABEL: array_index_lshr_sh3_sh3: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: srli a1, a1, 58 -; RV64ZBA-NEXT: slli a1, a1, 6 -; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: sh3add a1, a1, a2 +; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret %shr = lshr i64 %idx1, 58 From f9f4aba547f50e6dcb2d9345b51fe4883bb64d8d Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Tue, 9 Apr 2024 12:45:05 -0500 Subject: [PATCH 019/886] [InstCombine] Add tests for non-zero/knownbits of `vector_reduce_{s,u}{min,max}`; NFC --- .../vector-reduce-min-max-known.ll | 295 ++++++++++++++++++ 1 file changed, 295 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll diff --git a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll new file mode 100644 index 0000000000000..a02ebcca8090a --- /dev/null +++ b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll @@ -0,0 +1,295 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define i1 @vec_reduce_umax_non_zero(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_umax_non_zero( +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x = add nuw <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x) + %r = icmp eq i8 %v, 0 + ret i1 %r +} + +define i1 @vec_reduce_umax_non_zero_fail(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_umax_non_zero_fail( +; CHECK-NEXT: [[X:%.*]] = add nsw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x = add nsw <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x) + %r = icmp eq i8 %v, 0 + ret i1 %r +} + +define i1 @vec_reduce_umin_non_zero(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_umin_non_zero( +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x = add nuw <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x) + %r = icmp eq i8 %v, 0 + ret i1 %r +} + +define i1 @vec_reduce_umin_non_zero_fail(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_umin_non_zero_fail( +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x = add nuw <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x) + %r = icmp eq i8 %v, 0 + ret i1 %r +} + +define i1 @vec_reduce_smax_non_zero0(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_smax_non_zero0( +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x = add nuw <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x) + %r = icmp eq i8 %v, 0 + ret i1 %r +} + +define i1 @vec_reduce_smax_non_zero1(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_smax_non_zero1( +; CHECK-NEXT: [[X0:%.*]] = and <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[X0]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x0 = and <4 x i8> %xx, + %x = or <4 x i8> %x0, + %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x) + %r = icmp eq i8 %v, 0 + ret i1 %r +} + +define i1 @vec_reduce_smax_non_zero_fail(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_smax_non_zero_fail( +; CHECK-NEXT: [[X0:%.*]] = and <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[X0]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x0 = and <4 x i8> %xx, + %x = add nuw <4 x i8> %x0, + %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x) + %r = icmp eq i8 %v, 0 + ret i1 %r +} + +define i1 @vec_reduce_smin_non_zero0(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_smin_non_zero0( +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x = add nuw <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x) + %r = icmp eq i8 %v, 0 + ret i1 %r +} + +define i1 @vec_reduce_smin_non_zero1(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_smin_non_zero1( +; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x = or <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x) + %r = icmp eq i8 %v, 0 + ret i1 %r +} + +define i1 @vec_reduce_smin_non_zero_fail(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_smin_non_zero_fail( +; CHECK-NEXT: [[X0:%.*]] = or <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[X:%.*]] = add <4 x i8> [[X0]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x0 = or <4 x i8> %xx, + %x = add <4 x i8> %x0, + %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x) + %r = icmp eq i8 %v, 0 + ret i1 %r +} + +define i8 @vec_reduce_umax_known0(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_umax_known0( +; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %x = or <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x) + %r = and i8 %v, 1 + ret i8 %r +} + +define i8 @vec_reduce_umax_known1(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_umax_known1( +; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], -128 +; CHECK-NEXT: ret i8 [[R]] +; + %x = or <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x) + %r = and i8 %v, 128 + ret i8 %r +} + +define i8 @vec_reduce_umax_known_fail0(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_umax_known_fail0( +; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %x = or <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x) + %r = and i8 %v, 1 + ret i8 %r +} + +define i8 @vec_reduce_umax_known_fail1(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_umax_known_fail1( +; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %x = or <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x) + %r = and i8 %v, 1 + ret i8 %r +} + +define i8 @vec_reduce_umin_known0(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_umin_known0( +; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %x = or <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x) + %r = and i8 %v, 1 + ret i8 %r +} + +define i8 @vec_reduce_umin_known1(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_umin_known1( +; CHECK-NEXT: [[X:%.*]] = and <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], -128 +; CHECK-NEXT: ret i8 [[R]] +; + %x = and <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x) + %r = and i8 %v, 128 + ret i8 %r +} + +define i8 @vec_reduce_umin_known_fail0(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_umin_known_fail0( +; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %x0 = and <4 x i8> %xx, + %x = or <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x) + %r = and i8 %v, 1 + ret i8 %r +} + +define i8 @vec_reduce_umin_known_fail1(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_umin_known_fail1( +; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %x = or <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x) + %r = and i8 %v, 1 + ret i8 %r +} + +define i8 @vec_reduce_smax_known(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_smax_known( +; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 4 +; CHECK-NEXT: ret i8 [[R]] +; + %x = or <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x) + %r = and i8 %v, 4 + ret i8 %r +} + +define i8 @vec_reduce_smax_known_fail(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_smax_known_fail( +; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 4 +; CHECK-NEXT: ret i8 [[R]] +; + %x = or <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x) + %r = and i8 %v, 4 + ret i8 %r +} + +define i8 @vec_reduce_smin_known(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_smin_known( +; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 8 +; CHECK-NEXT: ret i8 [[R]] +; + %x = or <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x) + %r = and i8 %v, 8 + ret i8 %r +} + +define i8 @vec_reduce_smin_known_fail(<4 x i8> %xx) { +; CHECK-LABEL: @vec_reduce_smin_known_fail( +; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 8 +; CHECK-NEXT: ret i8 [[R]] +; + %x = or <4 x i8> %xx, + %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x) + %r = and i8 %v, 8 + ret i8 %r +} From 77d668451ad2e6370eb595c171779429e9becdf2 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Tue, 9 Apr 2024 11:58:38 -0500 Subject: [PATCH 020/886] [ValueTracking] Add support for `vector_reduce_{s,u}{min,max}` in `isKnownNonZero` Previously missing, proofs for all implementations: https://alive2.llvm.org/ce/z/G8wpmG --- llvm/lib/Analysis/ValueTracking.cpp | 6 ++++++ .../InstCombine/vector-reduce-min-max-known.ll | 15 +++------------ 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index ca48cfe773815..869a94d81f4df 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2824,6 +2824,12 @@ static bool isKnownNonZeroFromOperator(const Operator *I, return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/true, /* NUW=*/false); + // umin/smin/smax/smin of all non-zero elements is always non-zero. + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + return isKnownNonZero(II->getArgOperand(0), Depth, Q); case Intrinsic::umax: case Intrinsic::uadd_sat: return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) || diff --git a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll index a02ebcca8090a..29c08b17ef885 100644 --- a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll +++ b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll @@ -29,10 +29,7 @@ define i1 @vec_reduce_umax_non_zero_fail(<4 x i8> %xx) { define i1 @vec_reduce_umin_non_zero(<4 x i8> %xx) { ; CHECK-LABEL: @vec_reduce_umin_non_zero( -; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], -; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]]) -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %x = add nuw <4 x i8> %xx, %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x) @@ -55,10 +52,7 @@ define i1 @vec_reduce_umin_non_zero_fail(<4 x i8> %xx) { define i1 @vec_reduce_smax_non_zero0(<4 x i8> %xx) { ; CHECK-LABEL: @vec_reduce_smax_non_zero0( -; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], -; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]]) -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %x = add nuw <4 x i8> %xx, %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x) @@ -98,10 +92,7 @@ define i1 @vec_reduce_smax_non_zero_fail(<4 x i8> %xx) { define i1 @vec_reduce_smin_non_zero0(<4 x i8> %xx) { ; CHECK-LABEL: @vec_reduce_smin_non_zero0( -; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], -; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]]) -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %x = add nuw <4 x i8> %xx, %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x) From 41c52217b003ce9435ae534251b0d0d035495262 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Tue, 9 Apr 2024 11:58:48 -0500 Subject: [PATCH 021/886] [ValueTracking] Add support for `vector_reduce_{s,u}{min,max}` in `computeKnownBits` Previously missing. We compute by just applying the reduce function on the knownbits of each element. Closes #88169 --- llvm/lib/Analysis/ValueTracking.cpp | 8 ++++++++ .../vector-reduce-min-max-known.ll | 20 ++++--------------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 869a94d81f4df..4120876889dec 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1621,6 +1621,14 @@ static void computeKnownBitsFromOperator(const Operator *I, computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); Known = KnownBits::ssub_sat(Known, Known2); break; + // for min/max reduce, any bit common to each element in the input vec + // is set in the output. + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + break; case Intrinsic::umin: computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); diff --git a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll index 29c08b17ef885..65d0008353262 100644 --- a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll +++ b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll @@ -130,10 +130,7 @@ define i1 @vec_reduce_smin_non_zero_fail(<4 x i8> %xx) { define i8 @vec_reduce_umax_known0(<4 x i8> %xx) { ; CHECK-LABEL: @vec_reduce_umax_known0( -; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], -; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 1 ; %x = or <4 x i8> %xx, %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x) @@ -182,10 +179,7 @@ define i8 @vec_reduce_umax_known_fail1(<4 x i8> %xx) { define i8 @vec_reduce_umin_known0(<4 x i8> %xx) { ; CHECK-LABEL: @vec_reduce_umin_known0( -; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], -; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 1 ; %x = or <4 x i8> %xx, %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x) @@ -235,10 +229,7 @@ define i8 @vec_reduce_umin_known_fail1(<4 x i8> %xx) { define i8 @vec_reduce_smax_known(<4 x i8> %xx) { ; CHECK-LABEL: @vec_reduce_smax_known( -; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], -; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 4 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 4 ; %x = or <4 x i8> %xx, %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x) @@ -261,10 +252,7 @@ define i8 @vec_reduce_smax_known_fail(<4 x i8> %xx) { define i8 @vec_reduce_smin_known(<4 x i8> %xx) { ; CHECK-LABEL: @vec_reduce_smin_known( -; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], -; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 8 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 8 ; %x = or <4 x i8> %xx, %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x) From a02b3c01820090d4208146b51372587251fdce61 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 3 Apr 2024 21:36:35 -0500 Subject: [PATCH 022/886] [ValueTracking] Add tests for overflow detection functions is `isKnownNonZero`; NFC --- .../test/Transforms/InstCombine/known-bits.ll | 381 ++++++++++++++++++ 1 file changed, 381 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll index 769f7661fc8dc..ddd5970ccabdb 100644 --- a/llvm/test/Transforms/InstCombine/known-bits.ll +++ b/llvm/test/Transforms/InstCombine/known-bits.ll @@ -630,5 +630,386 @@ define i8 @or_ne_bits_must_be_unset2_fail(i8 %x, i8 %y) { ret i8 %r } +declare void @use.i1(i1) +declare void @use.i8(i8) + +declare void @use.2xi1(<2 x i1>) + +define i1 @extract_value_uadd(<2 x i8> %xx, <2 x i8> %yy) { +; CHECK-LABEL: @extract_value_uadd( +; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y0:%.*]] = and <2 x i8> [[YY:%.*]], +; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[X0]], +; CHECK-NEXT: [[Y:%.*]] = add nuw <2 x i8> [[Y0]], +; CHECK-NEXT: [[ADD_UOV:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[X]], <2 x i8> [[Y]]) +; CHECK-NEXT: [[ADD:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 0 +; CHECK-NEXT: [[UOV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 1 +; CHECK-NEXT: call void @use.2xi1(<2 x i1> [[UOV]]) +; CHECK-NEXT: [[ADD_ELE:%.*]] = extractelement <2 x i8> [[ADD]], i64 0 +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[ADD_ELE]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x0 = and <2 x i8> %xx, + %y0 = and <2 x i8> %yy, + %x = add nuw <2 x i8> %x0, + %y = add nuw <2 x i8> %y0, + + %add_uov = call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow(<2 x i8> %x, <2 x i8> %y) + %add = extractvalue { <2 x i8>, <2 x i1> } %add_uov, 0 + %uov = extractvalue { <2 x i8>, <2 x i1> } %add_uov, 1 + call void @use.2xi1(<2 x i1> %uov) + %add_ele = extractelement <2 x i8> %add, i32 0 + %r = icmp eq i8 %add_ele, 0 + ret i1 %r +} + +define i1 @extract_value_uadd2(<2 x i8> %xx, <2 x i8> %yy) { +; CHECK-LABEL: @extract_value_uadd2( +; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y0:%.*]] = and <2 x i8> [[YY:%.*]], +; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[X0]], +; CHECK-NEXT: [[Y:%.*]] = add nuw <2 x i8> [[Y0]], +; CHECK-NEXT: [[ADD_UOV:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[X]], <2 x i8> [[Y]]) +; CHECK-NEXT: [[ADD:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 0 +; CHECK-NEXT: [[UOV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 1 +; CHECK-NEXT: call void @use.2xi1(<2 x i1> [[UOV]]) +; CHECK-NEXT: [[ADD_ELE:%.*]] = extractelement <2 x i8> [[ADD]], i64 1 +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[ADD_ELE]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x0 = and <2 x i8> %xx, + %y0 = and <2 x i8> %yy, + %x = add nuw <2 x i8> %x0, + %y = add nuw <2 x i8> %y0, + + %add_uov = call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow(<2 x i8> %x, <2 x i8> %y) + %add = extractvalue { <2 x i8>, <2 x i1> } %add_uov, 0 + %uov = extractvalue { <2 x i8>, <2 x i1> } %add_uov, 1 + call void @use.2xi1(<2 x i1> %uov) + %add_ele = extractelement <2 x i8> %add, i32 1 + %r = icmp eq i8 %add_ele, 0 + ret i1 %r +} + +define i1 @extract_value_uadd_fail(<2 x i8> %xx, <2 x i8> %yy) { +; CHECK-LABEL: @extract_value_uadd_fail( +; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y0:%.*]] = and <2 x i8> [[YY:%.*]], +; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[X0]], +; CHECK-NEXT: [[Y:%.*]] = add nuw <2 x i8> [[Y0]], +; CHECK-NEXT: [[ADD_UOV:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[X]], <2 x i8> [[Y]]) +; CHECK-NEXT: [[ADD:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 0 +; CHECK-NEXT: [[UOV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 1 +; CHECK-NEXT: call void @use.2xi1(<2 x i1> [[UOV]]) +; CHECK-NEXT: [[ADD_ELE:%.*]] = extractelement <2 x i8> [[ADD]], i64 1 +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[ADD_ELE]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x0 = and <2 x i8> %xx, + %y0 = and <2 x i8> %yy, + %x = add nuw <2 x i8> %x0, + %y = add nuw <2 x i8> %y0, + + %add_uov = call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow(<2 x i8> %x, <2 x i8> %y) + %add = extractvalue { <2 x i8>, <2 x i1> } %add_uov, 0 + %uov = extractvalue { <2 x i8>, <2 x i1> } %add_uov, 1 + call void @use.2xi1(<2 x i1> %uov) + %add_ele = extractelement <2 x i8> %add, i32 1 + %r = icmp eq i8 %add_ele, 0 + ret i1 %r +} + +define i1 @extract_value_uadd_fail2(<2 x i8> %xx, <2 x i8> %yy, i32 %idx) { +; CHECK-LABEL: @extract_value_uadd_fail2( +; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y0:%.*]] = and <2 x i8> [[YY:%.*]], +; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[X0]], +; CHECK-NEXT: [[Y:%.*]] = add nuw <2 x i8> [[Y0]], +; CHECK-NEXT: [[ADD_UOV:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[X]], <2 x i8> [[Y]]) +; CHECK-NEXT: [[ADD:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 0 +; CHECK-NEXT: [[UOV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 1 +; CHECK-NEXT: call void @use.2xi1(<2 x i1> [[UOV]]) +; CHECK-NEXT: [[ADD_ELE:%.*]] = extractelement <2 x i8> [[ADD]], i32 [[IDX:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[ADD_ELE]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x0 = and <2 x i8> %xx, + %y0 = and <2 x i8> %yy, + %x = add nuw <2 x i8> %x0, + %y = add nuw <2 x i8> %y0, + + %add_uov = call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow(<2 x i8> %x, <2 x i8> %y) + %add = extractvalue { <2 x i8>, <2 x i1> } %add_uov, 0 + %uov = extractvalue { <2 x i8>, <2 x i1> } %add_uov, 1 + call void @use.2xi1(<2 x i1> %uov) + %add_ele = extractelement <2 x i8> %add, i32 %idx + %r = icmp eq i8 %add_ele, 0 + ret i1 %r +} + +define i1 @extract_value_uadd_fail3(<2 x i8> %xx, <2 x i8> %yy) { +; CHECK-LABEL: @extract_value_uadd_fail3( +; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y0:%.*]] = and <2 x i8> [[YY:%.*]], +; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[X0]], +; CHECK-NEXT: [[Y:%.*]] = add nuw <2 x i8> [[Y0]], +; CHECK-NEXT: [[ADD_UOV:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[X]], <2 x i8> [[Y]]) +; CHECK-NEXT: [[ADD:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 0 +; CHECK-NEXT: [[UOV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 1 +; CHECK-NEXT: call void @use.2xi1(<2 x i1> [[UOV]]) +; CHECK-NEXT: [[ADD_ELE:%.*]] = extractelement <2 x i8> [[ADD]], i64 0 +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[ADD_ELE]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x0 = and <2 x i8> %xx, + %y0 = and <2 x i8> %yy, + %x = add nuw <2 x i8> %x0, + %y = add nuw <2 x i8> %y0, + + %add_uov = call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow(<2 x i8> %x, <2 x i8> %y) + %add = extractvalue { <2 x i8>, <2 x i1> } %add_uov, 0 + %uov = extractvalue { <2 x i8>, <2 x i1> } %add_uov, 1 + call void @use.2xi1(<2 x i1> %uov) + %add_ele = extractelement <2 x i8> %add, i32 0 + %r = icmp eq i8 %add_ele, 0 + ret i1 %r +} + +define i1 @extract_value_sadd(i8 %xx, i8 %yy) { +; CHECK-LABEL: @extract_value_sadd( +; CHECK-NEXT: [[X:%.*]] = add nuw i8 [[XX:%.*]], 1 +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[X_LEMMA:%.*]] = icmp sgt i8 [[X]], -1 +; CHECK-NEXT: [[Y_LEMMA:%.*]] = icmp sgt i8 [[Y]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[X_LEMMA]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[Y_LEMMA]]) +; CHECK-NEXT: [[ADD_SOV:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: [[ADD:%.*]] = extractvalue { i8, i1 } [[ADD_SOV]], 0 +; CHECK-NEXT: [[SOV:%.*]] = extractvalue { i8, i1 } [[ADD_SOV]], 1 +; CHECK-NEXT: call void @use.i1(i1 [[SOV]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[ADD]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x = add nuw i8 %xx, 1 + %y = add nuw i8 %yy, 1 + %x_lemma = icmp ult i8 %x, 128 + %y_lemma = icmp ult i8 %y, 128 + call void @llvm.assume(i1 %x_lemma) + call void @llvm.assume(i1 %y_lemma) + + %add_sov = call { i8, i1 } @llvm.sadd.with.overflow(i8 %x, i8 %y) + %add = extractvalue { i8, i1 } %add_sov, 0 + %sov = extractvalue { i8, i1 } %add_sov, 1 + call void @use.i1(i1 %sov) + %r = icmp eq i8 %add, 0 + ret i1 %r +} + +define i1 @extract_value_sadd_fail(i8 %xx, i8 %yy) { +; CHECK-LABEL: @extract_value_sadd_fail( +; CHECK-NEXT: [[X:%.*]] = add i8 [[XX:%.*]], 1 +; CHECK-NEXT: [[Y:%.*]] = add i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[ADD_SOV:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: [[ADD:%.*]] = extractvalue { i8, i1 } [[ADD_SOV]], 0 +; CHECK-NEXT: [[SOV:%.*]] = extractvalue { i8, i1 } [[ADD_SOV]], 1 +; CHECK-NEXT: call void @use.i1(i1 [[SOV]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[ADD]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x = add i8 %xx, 1 + %y = add i8 %yy, 1 + + %add_sov = call { i8, i1 } @llvm.sadd.with.overflow(i8 %x, i8 %y) + %add = extractvalue { i8, i1 } %add_sov, 0 + %sov = extractvalue { i8, i1 } %add_sov, 1 + call void @use.i1(i1 %sov) + %r = icmp eq i8 %add, 0 + ret i1 %r +} + +define i1 @extract_value_usub(i8 %x, i8 %zz) { +; CHECK-LABEL: @extract_value_usub( +; CHECK-NEXT: [[Z:%.*]] = add nuw i8 [[ZZ:%.*]], 1 +; CHECK-NEXT: [[Y:%.*]] = add i8 [[Z]], [[X:%.*]] +; CHECK-NEXT: [[SUB_UOV:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: [[SUB:%.*]] = extractvalue { i8, i1 } [[SUB_UOV]], 0 +; CHECK-NEXT: [[UOV:%.*]] = extractvalue { i8, i1 } [[SUB_UOV]], 1 +; CHECK-NEXT: call void @use.i1(i1 [[UOV]]) +; CHECK-NEXT: call void @use.i8(i8 [[SUB]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SUB]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %z = add nuw i8 %zz, 1 + %y = add i8 %x, %z + + %sub_uov = call { i8, i1 } @llvm.usub.with.overflow(i8 %x, i8 %y) + %sub = extractvalue { i8, i1 } %sub_uov, 0 + %uov = extractvalue { i8, i1 } %sub_uov, 1 + call void @use.i1(i1 %uov) + call void @use.i8(i8 %sub) + %r = icmp eq i8 %sub, 0 + ret i1 %r +} + +define i1 @extract_value_usub_fail(i8 %x, i8 %z) { +; CHECK-LABEL: @extract_value_usub_fail( +; CHECK-NEXT: [[Y:%.*]] = add i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[SUB_UOV:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: [[SUB:%.*]] = extractvalue { i8, i1 } [[SUB_UOV]], 0 +; CHECK-NEXT: [[UOV:%.*]] = extractvalue { i8, i1 } [[SUB_UOV]], 1 +; CHECK-NEXT: call void @use.i1(i1 [[UOV]]) +; CHECK-NEXT: call void @use.i8(i8 [[SUB]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SUB]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %y = add i8 %x, %z + %sub_uov = call { i8, i1 } @llvm.usub.with.overflow(i8 %x, i8 %y) + %sub = extractvalue { i8, i1 } %sub_uov, 0 + %uov = extractvalue { i8, i1 } %sub_uov, 1 + call void @use.i1(i1 %uov) + call void @use.i8(i8 %sub) + %r = icmp eq i8 %sub, 0 + ret i1 %r +} + +define i1 @extract_value_ssub(i8 %x, i8 %zz) { +; CHECK-LABEL: @extract_value_ssub( +; CHECK-NEXT: [[Z:%.*]] = add nuw i8 [[ZZ:%.*]], 1 +; CHECK-NEXT: [[Y:%.*]] = add i8 [[Z]], [[X:%.*]] +; CHECK-NEXT: [[SUB_SOV:%.*]] = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[Y]], i8 [[X]]) +; CHECK-NEXT: [[SUB:%.*]] = extractvalue { i8, i1 } [[SUB_SOV]], 0 +; CHECK-NEXT: [[SOV:%.*]] = extractvalue { i8, i1 } [[SUB_SOV]], 1 +; CHECK-NEXT: call void @use.i1(i1 [[SOV]]) +; CHECK-NEXT: call void @use.i8(i8 [[SUB]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SUB]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %z = add nuw i8 %zz, 1 + %y = add i8 %x, %z + + %sub_sov = call { i8, i1 } @llvm.ssub.with.overflow(i8 %y, i8 %x) + %sub = extractvalue { i8, i1 } %sub_sov, 0 + %sov = extractvalue { i8, i1 } %sub_sov, 1 + call void @use.i1(i1 %sov) + call void @use.i8(i8 %sub) + %r = icmp eq i8 %sub, 0 + ret i1 %r +} + +define i1 @extract_value_ssub_fail(i8 %x) { +; CHECK-LABEL: @extract_value_ssub_fail( +; CHECK-NEXT: [[SUB_SOV:%.*]] = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 10, i8 [[X:%.*]]) +; CHECK-NEXT: [[SUB:%.*]] = extractvalue { i8, i1 } [[SUB_SOV]], 0 +; CHECK-NEXT: [[SOV:%.*]] = extractvalue { i8, i1 } [[SUB_SOV]], 1 +; CHECK-NEXT: call void @use.i1(i1 [[SOV]]) +; CHECK-NEXT: call void @use.i8(i8 [[SUB]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SUB]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %sub_sov = call { i8, i1 } @llvm.ssub.with.overflow(i8 10, i8 %x) + %sub = extractvalue { i8, i1 } %sub_sov, 0 + %sov = extractvalue { i8, i1 } %sub_sov, 1 + call void @use.i1(i1 %sov) + call void @use.i8(i8 %sub) + %r = icmp eq i8 %sub, 0 + ret i1 %r +} + +define i1 @extract_value_umul(i8 %xx, i8 %yy) { +; CHECK-LABEL: @extract_value_umul( +; CHECK-NEXT: [[X:%.*]] = or i8 [[XX:%.*]], 1 +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[MUL_UOV:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i8, i1 } [[MUL_UOV]], 0 +; CHECK-NEXT: [[UOV:%.*]] = extractvalue { i8, i1 } [[MUL_UOV]], 1 +; CHECK-NEXT: call void @use.i1(i1 [[UOV]]) +; CHECK-NEXT: call void @use.i8(i8 [[MUL]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[MUL]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x = or i8 %xx, 1 + %y = add nuw i8 %yy, 1 + + %mul_uov = call { i8, i1 } @llvm.umul.with.overflow(i8 %x, i8 %y) + %mul = extractvalue { i8, i1 } %mul_uov, 0 + %uov = extractvalue { i8, i1 } %mul_uov, 1 + call void @use.i1(i1 %uov) + call void @use.i8(i8 %mul) + %r = icmp eq i8 %mul, 0 + ret i1 %r +} + +define i1 @extract_value_umul_fail(i8 %xx, i8 %yy) { +; CHECK-LABEL: @extract_value_umul_fail( +; CHECK-NEXT: [[X:%.*]] = or i8 [[XX:%.*]], 2 +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[MUL_UOV:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i8, i1 } [[MUL_UOV]], 0 +; CHECK-NEXT: [[UOV:%.*]] = extractvalue { i8, i1 } [[MUL_UOV]], 1 +; CHECK-NEXT: call void @use.i1(i1 [[UOV]]) +; CHECK-NEXT: call void @use.i8(i8 [[MUL]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[MUL]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x = or i8 %xx, 2 + %y = add nuw i8 %yy, 1 + + %mul_uov = call { i8, i1 } @llvm.umul.with.overflow(i8 %x, i8 %y) + %mul = extractvalue { i8, i1 } %mul_uov, 0 + %uov = extractvalue { i8, i1 } %mul_uov, 1 + call void @use.i1(i1 %uov) + call void @use.i8(i8 %mul) + %r = icmp eq i8 %mul, 0 + ret i1 %r +} + +define i1 @extract_value_smul(i8 %xx, i8 %yy) { +; CHECK-LABEL: @extract_value_smul( +; CHECK-NEXT: [[X:%.*]] = or i8 [[XX:%.*]], 1 +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[MUL_SOV:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[Y]], i8 [[X]]) +; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i8, i1 } [[MUL_SOV]], 0 +; CHECK-NEXT: [[SOV:%.*]] = extractvalue { i8, i1 } [[MUL_SOV]], 1 +; CHECK-NEXT: call void @use.i1(i1 [[SOV]]) +; CHECK-NEXT: call void @use.i8(i8 [[MUL]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[MUL]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x = or i8 %xx, 1 + %y = add nuw i8 %yy, 1 + + %mul_sov = call { i8, i1 } @llvm.smul.with.overflow(i8 %y, i8 %x) + %mul = extractvalue { i8, i1 } %mul_sov, 0 + %sov = extractvalue { i8, i1 } %mul_sov, 1 + call void @use.i1(i1 %sov) + call void @use.i8(i8 %mul) + %r = icmp eq i8 %mul, 0 + ret i1 %r +} + +define i1 @extract_value_smul_fail(i8 %xx, i8 %yy) { +; CHECK-LABEL: @extract_value_smul_fail( +; CHECK-NEXT: [[X:%.*]] = or i8 [[XX:%.*]], 1 +; CHECK-NEXT: [[Y:%.*]] = add i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[MUL_SOV:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[Y]], i8 [[X]]) +; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i8, i1 } [[MUL_SOV]], 0 +; CHECK-NEXT: [[SOV:%.*]] = extractvalue { i8, i1 } [[MUL_SOV]], 1 +; CHECK-NEXT: call void @use.i1(i1 [[SOV]]) +; CHECK-NEXT: call void @use.i8(i8 [[MUL]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[MUL]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x = or i8 %xx, 1 + %y = add i8 %yy, 1 + + %mul_sov = call { i8, i1 } @llvm.smul.with.overflow(i8 %y, i8 %x) + %mul = extractvalue { i8, i1 } %mul_sov, 0 + %sov = extractvalue { i8, i1 } %mul_sov, 1 + call void @use.i1(i1 %sov) + call void @use.i8(i8 %mul) + %r = icmp eq i8 %mul, 0 + ret i1 %r +} + declare void @use(i1) declare void @sink(i8) From f0a487d7e2085e21f3691393070f54110d889fb6 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 3 Apr 2024 15:33:18 -0500 Subject: [PATCH 023/886] [ValueTracking] Split `isNonZero(mul)` logic to a helper; NFC --- llvm/lib/Analysis/ValueTracking.cpp | 57 ++++++++++++++++------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 4120876889dec..2c9ea8aa38512 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2471,6 +2471,34 @@ static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth, return ::isKnownNonEqual(X, Y, Depth, Q); } +static bool isNonZeroMul(const APInt &DemandedElts, unsigned Depth, + const SimplifyQuery &Q, unsigned BitWidth, Value *X, + Value *Y, bool NSW, bool NUW) { + // If X and Y are non-zero then so is X * Y as long as the multiplication + // does not overflow. + if (NSW || NUW) + return isKnownNonZero(X, DemandedElts, Depth, Q) && + isKnownNonZero(Y, DemandedElts, Depth, Q); + + // If either X or Y is odd, then if the other is non-zero the result can't + // be zero. + KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q); + if (XKnown.One[0]) + return isKnownNonZero(Y, DemandedElts, Depth, Q); + + KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q); + if (YKnown.One[0]) + return XKnown.isNonZero() || isKnownNonZero(X, DemandedElts, Depth, Q); + + // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is + // non-zero, then X * Y is non-zero. We can find sX and sY by just taking + // the lowest known One of X and Y. If they are non-zero, the result + // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing + // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth. + return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) < + BitWidth; +} + static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts, unsigned Depth, const SimplifyQuery &Q, const KnownBits &KnownVal) { @@ -2666,33 +2694,10 @@ static bool isKnownNonZeroFromOperator(const Operator *I, Q.IIQ.hasNoUnsignedWrap(BO)); } case Instruction::Mul: { - // If X and Y are non-zero then so is X * Y as long as the multiplication - // does not overflow. const OverflowingBinaryOperator *BO = cast(I); - if (Q.IIQ.hasNoSignedWrap(BO) || Q.IIQ.hasNoUnsignedWrap(BO)) - return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q) && - isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q); - - // If either X or Y is odd, then if the other is non-zero the result can't - // be zero. - KnownBits XKnown = - computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); - if (XKnown.One[0]) - return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q); - - KnownBits YKnown = - computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); - if (YKnown.One[0]) - return XKnown.isNonZero() || - isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); - - // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is - // non-zero, then X * Y is non-zero. We can find sX and sY by just taking - // the lowest known One of X and Y. If they are non-zero, the result - // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing - // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth. - return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) < - BitWidth; + return isNonZeroMul(DemandedElts, Depth, Q, BitWidth, I->getOperand(0), + I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO), + Q.IIQ.hasNoUnsignedWrap(BO)); } case Instruction::Select: { // (C ? X : Y) != 0 if X != 0 and Y != 0. From 37ca6fa1e26e86c85c544023b18695be420e80dd Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 3 Apr 2024 15:35:17 -0500 Subject: [PATCH 024/886] [ValueTracking] Add support for overflow detection functions is `isKnownNonZero` Adds support for: `{s,u}{add,sub,mul}.with.overflow` The logic is identical to the the non-overflow binops, we where just missing the cases. Closes #87701 --- llvm/lib/Analysis/ValueTracking.cpp | 23 ++++++++++++++++ .../test/Transforms/InstCombine/known-bits.ll | 26 +++++-------------- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 2c9ea8aa38512..b32dc493ace91 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2798,6 +2798,29 @@ static bool isKnownNonZeroFromOperator(const Operator *I, // handled in isKnownNonZero. return false; } + case Instruction::ExtractValue: { + const WithOverflowInst *WO; + if (match(I, m_ExtractValue<0>(m_WithOverflowInst(WO)))) { + switch (WO->getBinaryOp()) { + default: + break; + case Instruction::Add: + return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, + WO->getArgOperand(0), WO->getArgOperand(1), + /*NSW=*/false, + /*NUW=*/false); + case Instruction::Sub: + return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, + WO->getArgOperand(0), WO->getArgOperand(1)); + case Instruction::Mul: + return isNonZeroMul(DemandedElts, Depth, Q, BitWidth, + WO->getArgOperand(0), WO->getArgOperand(1), + /*NSW=*/false, /*NUW=*/false); + break; + } + } + break; + } case Instruction::Call: case Instruction::Invoke: { const auto *Call = cast(I); diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll index ddd5970ccabdb..e27e4a3eddfbb 100644 --- a/llvm/test/Transforms/InstCombine/known-bits.ll +++ b/llvm/test/Transforms/InstCombine/known-bits.ll @@ -642,12 +642,9 @@ define i1 @extract_value_uadd(<2 x i8> %xx, <2 x i8> %yy) { ; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[X0]], ; CHECK-NEXT: [[Y:%.*]] = add nuw <2 x i8> [[Y0]], ; CHECK-NEXT: [[ADD_UOV:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[X]], <2 x i8> [[Y]]) -; CHECK-NEXT: [[ADD:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 0 ; CHECK-NEXT: [[UOV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 1 ; CHECK-NEXT: call void @use.2xi1(<2 x i1> [[UOV]]) -; CHECK-NEXT: [[ADD_ELE:%.*]] = extractelement <2 x i8> [[ADD]], i64 0 -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[ADD_ELE]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %x0 = and <2 x i8> %xx, %y0 = and <2 x i8> %yy, @@ -670,12 +667,9 @@ define i1 @extract_value_uadd2(<2 x i8> %xx, <2 x i8> %yy) { ; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[X0]], ; CHECK-NEXT: [[Y:%.*]] = add nuw <2 x i8> [[Y0]], ; CHECK-NEXT: [[ADD_UOV:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[X]], <2 x i8> [[Y]]) -; CHECK-NEXT: [[ADD:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 0 ; CHECK-NEXT: [[UOV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 1 ; CHECK-NEXT: call void @use.2xi1(<2 x i1> [[UOV]]) -; CHECK-NEXT: [[ADD_ELE:%.*]] = extractelement <2 x i8> [[ADD]], i64 1 -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[ADD_ELE]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %x0 = and <2 x i8> %xx, %y0 = and <2 x i8> %yy, @@ -784,11 +778,9 @@ define i1 @extract_value_sadd(i8 %xx, i8 %yy) { ; CHECK-NEXT: call void @llvm.assume(i1 [[X_LEMMA]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[Y_LEMMA]]) ; CHECK-NEXT: [[ADD_SOV:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: [[ADD:%.*]] = extractvalue { i8, i1 } [[ADD_SOV]], 0 ; CHECK-NEXT: [[SOV:%.*]] = extractvalue { i8, i1 } [[ADD_SOV]], 1 ; CHECK-NEXT: call void @use.i1(i1 [[SOV]]) -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[ADD]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %x = add nuw i8 %xx, 1 %y = add nuw i8 %yy, 1 @@ -836,8 +828,7 @@ define i1 @extract_value_usub(i8 %x, i8 %zz) { ; CHECK-NEXT: [[UOV:%.*]] = extractvalue { i8, i1 } [[SUB_UOV]], 1 ; CHECK-NEXT: call void @use.i1(i1 [[UOV]]) ; CHECK-NEXT: call void @use.i8(i8 [[SUB]]) -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SUB]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %z = add nuw i8 %zz, 1 %y = add i8 %x, %z @@ -881,8 +872,7 @@ define i1 @extract_value_ssub(i8 %x, i8 %zz) { ; CHECK-NEXT: [[SOV:%.*]] = extractvalue { i8, i1 } [[SUB_SOV]], 1 ; CHECK-NEXT: call void @use.i1(i1 [[SOV]]) ; CHECK-NEXT: call void @use.i8(i8 [[SUB]]) -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SUB]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %z = add nuw i8 %zz, 1 %y = add i8 %x, %z @@ -924,8 +914,7 @@ define i1 @extract_value_umul(i8 %xx, i8 %yy) { ; CHECK-NEXT: [[UOV:%.*]] = extractvalue { i8, i1 } [[MUL_UOV]], 1 ; CHECK-NEXT: call void @use.i1(i1 [[UOV]]) ; CHECK-NEXT: call void @use.i8(i8 [[MUL]]) -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[MUL]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %x = or i8 %xx, 1 %y = add nuw i8 %yy, 1 @@ -972,8 +961,7 @@ define i1 @extract_value_smul(i8 %xx, i8 %yy) { ; CHECK-NEXT: [[SOV:%.*]] = extractvalue { i8, i1 } [[MUL_SOV]], 1 ; CHECK-NEXT: call void @use.i1(i1 [[SOV]]) ; CHECK-NEXT: call void @use.i8(i8 [[MUL]]) -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[MUL]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %x = or i8 %xx, 1 %y = add nuw i8 %yy, 1 From 2ff82c2c6490a1478e4311f60f1ce80af0957403 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Tue, 9 Apr 2024 12:36:03 -0500 Subject: [PATCH 025/886] [ValueTracking] Add tests for improving `isKnownNonZero` of `smax`; NFC --- .../Transforms/InstSimplify/known-non-zero.ll | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/known-non-zero.ll b/llvm/test/Transforms/InstSimplify/known-non-zero.ll index b647f11af4461..6ebc4e0f31a9c 100644 --- a/llvm/test/Transforms/InstSimplify/known-non-zero.ll +++ b/llvm/test/Transforms/InstSimplify/known-non-zero.ll @@ -166,3 +166,18 @@ A: B: ret i1 0 } + +define i1 @smax_non_zero(i8 %xx, i8 %y) { +; CHECK-LABEL: @smax_non_zero( +; CHECK-NEXT: [[X0:%.*]] = and i8 [[XX:%.*]], 63 +; CHECK-NEXT: [[X:%.*]] = add i8 [[X0]], 1 +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %x0 = and i8 %xx, 63 + %x = add i8 %x0, 1 + %v = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %r = icmp eq i8 %v, 0 + ret i1 %r +} From f1ee458ddb45c9887b3df583ce9a4ba12aae8b3b Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Tue, 9 Apr 2024 11:58:03 -0500 Subject: [PATCH 026/886] [ValueTracking] improve `isKnownNonZero` precision for `smax` Instead of relying on known-bits for strictly positive, use the `isKnownPositive` API. This will use `isKnownNonZero` which is more accurate. Closes #88170 --- llvm/lib/Analysis/ValueTracking.cpp | 41 ++++++++++++++----- .../Transforms/InstSimplify/known-non-zero.ll | 6 +-- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index b32dc493ace91..5ef1969893b42 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2870,23 +2870,44 @@ static bool isKnownNonZeroFromOperator(const Operator *I, case Intrinsic::uadd_sat: return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) || isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); - case Intrinsic::smin: case Intrinsic::smax: { - auto KnownOpImpliesNonZero = [&](const KnownBits &K) { - return II->getIntrinsicID() == Intrinsic::smin - ? K.isNegative() - : K.isStrictlyPositive(); + // If either arg is strictly positive the result is non-zero. Otherwise + // the result is non-zero if both ops are non-zero. + auto IsNonZero = [&](Value *Op, std::optional &OpNonZero, + const KnownBits &OpKnown) { + if (!OpNonZero.has_value()) + OpNonZero = OpKnown.isNonZero() || + isKnownNonZero(Op, DemandedElts, Depth, Q); + return *OpNonZero; }; - KnownBits XKnown = + // Avoid re-computing isKnownNonZero. + std::optional Op0NonZero, Op1NonZero; + KnownBits Op1Known = + computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q); + if (Op1Known.isNonNegative() && + IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known)) + return true; + KnownBits Op0Known = computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q); - if (KnownOpImpliesNonZero(XKnown)) + if (Op0Known.isNonNegative() && + IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known)) return true; - KnownBits YKnown = + return IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known) && + IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known); + } + case Intrinsic::smin: { + // If either arg is negative the result is non-zero. Otherwise + // the result is non-zero if both ops are non-zero. + KnownBits Op1Known = computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q); - if (KnownOpImpliesNonZero(YKnown)) + if (Op1Known.isNegative()) + return true; + KnownBits Op0Known = + computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q); + if (Op0Known.isNegative()) return true; - if (XKnown.isNonZero() && YKnown.isNonZero()) + if (Op1Known.isNonZero() && Op0Known.isNonZero()) return true; } [[fallthrough]]; diff --git a/llvm/test/Transforms/InstSimplify/known-non-zero.ll b/llvm/test/Transforms/InstSimplify/known-non-zero.ll index 6ebc4e0f31a9c..51f80f62c2f34 100644 --- a/llvm/test/Transforms/InstSimplify/known-non-zero.ll +++ b/llvm/test/Transforms/InstSimplify/known-non-zero.ll @@ -169,11 +169,7 @@ B: define i1 @smax_non_zero(i8 %xx, i8 %y) { ; CHECK-LABEL: @smax_non_zero( -; CHECK-NEXT: [[X0:%.*]] = and i8 [[XX:%.*]], 63 -; CHECK-NEXT: [[X:%.*]] = add i8 [[X0]], 1 -; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %x0 = and i8 %xx, 63 %x = add i8 %x0, 1 From 7d60232b38b66138dae1b31027d73ee5b9df5c58 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 10 Apr 2024 10:41:20 -0500 Subject: [PATCH 027/886] [flang][Frontend] Implement printing defined macros via -dM (#87627) This should work the same way as in clang. --- clang/docs/tools/clang-formatted-files.txt | 4 +- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/ToolChains/Flang.cpp | 5 +- .../flang/Frontend/PreprocessorOptions.h | 3 + flang/include/flang/Parser/parsing.h | 3 + .../flang}/Parser/preprocessor.h | 15 +++- .../flang}/Parser/token-sequence.h | 4 +- flang/lib/Frontend/CompilerInvocation.cpp | 1 + flang/lib/Frontend/FrontendActions.cpp | 4 +- flang/lib/Parser/parsing.cpp | 17 +++-- flang/lib/Parser/preprocessor.cpp | 68 +++++++++++++++++-- flang/lib/Parser/prescan.cpp | 4 +- flang/lib/Parser/prescan.h | 2 +- flang/lib/Parser/token-sequence.cpp | 3 +- flang/test/Driver/driver-help-hidden.f90 | 1 + flang/test/Driver/driver-help.f90 | 2 + flang/test/Preprocessing/show-macros1.F90 | 14 ++++ flang/test/Preprocessing/show-macros2.F90 | 6 ++ flang/test/Preprocessing/show-macros3.F90 | 9 +++ 19 files changed, 139 insertions(+), 28 deletions(-) rename flang/{lib => include/flang}/Parser/preprocessor.h (88%) rename flang/{lib => include/flang}/Parser/token-sequence.h (97%) create mode 100644 flang/test/Preprocessing/show-macros1.F90 create mode 100644 flang/test/Preprocessing/show-macros2.F90 create mode 100644 flang/test/Preprocessing/show-macros3.F90 diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt index 70687c23b15e6..8fd4fed25a32a 100644 --- a/clang/docs/tools/clang-formatted-files.txt +++ b/clang/docs/tools/clang-formatted-files.txt @@ -2147,8 +2147,10 @@ flang/include/flang/Parser/message.h flang/include/flang/Parser/parse-state.h flang/include/flang/Parser/parse-tree-visitor.h flang/include/flang/Parser/parsing.h +flang/include/flang/Parser/preprocessor.h flang/include/flang/Parser/provenance.h flang/include/flang/Parser/source.h +flang/include/flang/Parser/token-sequence.h flang/include/flang/Parser/tools.h flang/include/flang/Parser/unparse.h flang/include/flang/Parser/user-state.h @@ -2319,7 +2321,6 @@ flang/lib/Parser/openmp-parsers.cpp flang/lib/Parser/parse-tree.cpp flang/lib/Parser/parsing.cpp flang/lib/Parser/preprocessor.cpp -flang/lib/Parser/preprocessor.h flang/lib/Parser/prescan.cpp flang/lib/Parser/prescan.h flang/lib/Parser/program-parsers.cpp @@ -2328,7 +2329,6 @@ flang/lib/Parser/source.cpp flang/lib/Parser/stmt-parser.h flang/lib/Parser/token-parsers.h flang/lib/Parser/token-sequence.cpp -flang/lib/Parser/token-sequence.h flang/lib/Parser/tools.cpp flang/lib/Parser/type-parser-implementation.h flang/lib/Parser/type-parsers.h diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f745e573eb268..0a74e6c75f95b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1449,7 +1449,7 @@ def dD : Flag<["-"], "dD">, Group, Visibility<[ClangOption, CC1Option]> def dI : Flag<["-"], "dI">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Print include directives in -E mode in addition to normal output">, MarshallingInfoFlag>; -def dM : Flag<["-"], "dM">, Group, Visibility<[ClangOption, CC1Option]>, +def dM : Flag<["-"], "dM">, Group, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, HelpText<"Print macro definitions in -E mode instead of normal output">; def dead__strip : Flag<["-"], "dead_strip">; def dependency_file : Separate<["-"], "dependency-file">, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 2c83f70eb7887..9699443603d36 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -679,7 +679,10 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(TripleStr)); if (isa(JA)) { - CmdArgs.push_back("-E"); + CmdArgs.push_back("-E"); + if (Args.getLastArg(options::OPT_dM)) { + CmdArgs.push_back("-dM"); + } } else if (isa(JA) || isa(JA)) { if (JA.getType() == types::TY_Nothing) { CmdArgs.push_back("-fsyntax-only"); diff --git a/flang/include/flang/Frontend/PreprocessorOptions.h b/flang/include/flang/Frontend/PreprocessorOptions.h index b2e9ac0e963b7..13a91ee9a184f 100644 --- a/flang/include/flang/Frontend/PreprocessorOptions.h +++ b/flang/include/flang/Frontend/PreprocessorOptions.h @@ -56,6 +56,9 @@ struct PreprocessorOptions { // -fno-reformat: Emit cooked character stream as -E output bool noReformat{false}; + // -dM: Show macro definitions with -dM -E + bool showMacros{false}; + void addMacroDef(llvm::StringRef name) { macros.emplace_back(std::string(name), false); } diff --git a/flang/include/flang/Parser/parsing.h b/flang/include/flang/Parser/parsing.h index e80d8f724ac8f..4d329c189cb80 100644 --- a/flang/include/flang/Parser/parsing.h +++ b/flang/include/flang/Parser/parsing.h @@ -15,6 +15,7 @@ #include "parse-tree.h" #include "provenance.h" #include "flang/Common/Fortran-features.h" +#include "flang/Parser/preprocessor.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -59,6 +60,7 @@ class Parsing { const SourceFile *Prescan(const std::string &path, Options); void EmitPreprocessedSource( llvm::raw_ostream &, bool lineDirectives = true) const; + void EmitPreprocessorMacros(llvm::raw_ostream &) const; void DumpCookedChars(llvm::raw_ostream &) const; void DumpProvenance(llvm::raw_ostream &) const; void DumpParsingLog(llvm::raw_ostream &) const; @@ -83,6 +85,7 @@ class Parsing { const char *finalRestingPlace_{nullptr}; std::optional parseTree_; ParsingLog log_; + Preprocessor preprocessor_{allCooked_.allSources()}; }; } // namespace Fortran::parser #endif // FORTRAN_PARSER_PARSING_H_ diff --git a/flang/lib/Parser/preprocessor.h b/flang/include/flang/Parser/preprocessor.h similarity index 88% rename from flang/lib/Parser/preprocessor.h rename to flang/include/flang/Parser/preprocessor.h index b61f1577727be..630d5273d427c 100644 --- a/flang/lib/Parser/preprocessor.h +++ b/flang/include/flang/Parser/preprocessor.h @@ -15,9 +15,10 @@ // performed, so that special compiler command options &/or source file name // extensions for preprocessing will not be necessary. -#include "token-sequence.h" #include "flang/Parser/char-block.h" #include "flang/Parser/provenance.h" +#include "flang/Parser/token-sequence.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -39,7 +40,7 @@ class Definition { Definition(const std::string &predefined, AllSources &); bool isFunctionLike() const { return isFunctionLike_; } - std::size_t argumentCount() const { return argumentCount_; } + std::size_t argumentCount() const { return argNames_.size(); } bool isVariadic() const { return isVariadic_; } bool isDisabled() const { return isDisabled_; } bool isPredefined() const { return isPredefined_; } @@ -49,15 +50,21 @@ class Definition { TokenSequence Apply(const std::vector &args, Prescanner &); + void Print(llvm::raw_ostream &out, const char *macroName = "") const; + private: static TokenSequence Tokenize(const std::vector &argNames, const TokenSequence &token, std::size_t firstToken, std::size_t tokens); + // For a given token, return the index of the argument to which the token + // corresponds, or `argumentCount` if the token does not correspond to any + // argument. + std::size_t GetArgumentIndex(const CharBlock &token) const; bool isFunctionLike_{false}; - std::size_t argumentCount_{0}; bool isVariadic_{false}; bool isDisabled_{false}; bool isPredefined_{false}; + std::vector argNames_; TokenSequence replacement_; }; @@ -89,6 +96,8 @@ class Preprocessor { // Implements a preprocessor directive. void Directive(const TokenSequence &, Prescanner &); + void PrintMacros(llvm::raw_ostream &out) const; + private: enum class IsElseActive { No, Yes }; enum class CanDeadElseAppear { No, Yes }; diff --git a/flang/lib/Parser/token-sequence.h b/flang/include/flang/Parser/token-sequence.h similarity index 97% rename from flang/lib/Parser/token-sequence.h rename to flang/include/flang/Parser/token-sequence.h index 3df403d41e636..849240d8ec62c 100644 --- a/flang/lib/Parser/token-sequence.h +++ b/flang/include/flang/Parser/token-sequence.h @@ -42,8 +42,8 @@ class TokenSequence { } TokenSequence(TokenSequence &&that) : start_{std::move(that.start_)}, nextStart_{that.nextStart_}, - char_{std::move(that.char_)}, provenances_{ - std::move(that.provenances_)} {} + char_{std::move(that.char_)}, + provenances_{std::move(that.provenances_)} {} TokenSequence(const std::string &s, Provenance p) { Put(s, p); } TokenSequence &operator=(const TokenSequence &that) { diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index c830c7af2462c..8ce6ab7baf481 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -772,6 +772,7 @@ static void parsePreprocessorArgs(Fortran::frontend::PreprocessorOptions &opts, opts.noReformat = args.hasArg(clang::driver::options::OPT_fno_reformat); opts.noLineDirectives = args.hasArg(clang::driver::options::OPT_P); + opts.showMacros = args.hasArg(clang::driver::options::OPT_dM); } /// Parses all semantic related arguments and populates the variables diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 849b3c8e4dc02..8f251997ed401 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -399,7 +399,9 @@ void PrintPreprocessedAction::executeAction() { // Format or dump the prescanner's output CompilerInstance &ci = this->getInstance(); - if (ci.getInvocation().getPreprocessorOpts().noReformat) { + if (ci.getInvocation().getPreprocessorOpts().showMacros) { + ci.getParsing().EmitPreprocessorMacros(outForPP); + } else if (ci.getInvocation().getPreprocessorOpts().noReformat) { ci.getParsing().DumpCookedChars(outForPP); } else { ci.getParsing().EmitPreprocessedSource( diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp index a55d33bf6b91d..43a898ff120c5 100644 --- a/flang/lib/Parser/parsing.cpp +++ b/flang/lib/Parser/parsing.cpp @@ -7,10 +7,10 @@ //===----------------------------------------------------------------------===// #include "flang/Parser/parsing.h" -#include "preprocessor.h" #include "prescan.h" #include "type-parsers.h" #include "flang/Parser/message.h" +#include "flang/Parser/preprocessor.h" #include "flang/Parser/provenance.h" #include "flang/Parser/source.h" #include "llvm/Support/raw_ostream.h" @@ -60,20 +60,19 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) { } } - Preprocessor preprocessor{allSources}; if (!options.predefinitions.empty()) { - preprocessor.DefineStandardMacros(); + preprocessor_.DefineStandardMacros(); for (const auto &predef : options.predefinitions) { if (predef.second) { - preprocessor.Define(predef.first, *predef.second); + preprocessor_.Define(predef.first, *predef.second); } else { - preprocessor.Undefine(predef.first); + preprocessor_.Undefine(predef.first); } } } currentCooked_ = &allCooked_.NewCookedSource(); Prescanner prescanner{ - messages_, *currentCooked_, preprocessor, options.features}; + messages_, *currentCooked_, preprocessor_, options.features}; prescanner.set_fixedForm(options.isFixedForm) .set_fixedFormColumnLimit(options.fixedFormColumns) .AddCompilerDirectiveSentinel("dir$"); @@ -87,7 +86,7 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) { if (options.features.IsEnabled(LanguageFeature::CUDA)) { prescanner.AddCompilerDirectiveSentinel("$cuf"); prescanner.AddCompilerDirectiveSentinel("@cuf"); - preprocessor.Define("_CUDA", "1"); + preprocessor_.Define("_CUDA", "1"); } ProvenanceRange range{allSources.AddIncludedFile( *sourceFile, ProvenanceRange{}, options.isModuleFile)}; @@ -107,6 +106,10 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) { return sourceFile; } +void Parsing::EmitPreprocessorMacros(llvm::raw_ostream &out) const { + preprocessor_.PrintMacros(out); +} + void Parsing::EmitPreprocessedSource( llvm::raw_ostream &out, bool lineDirectives) const { const std::string *sourcePath{nullptr}; diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp index 515b8f62daf9a..2fba28b0c0c7d 100644 --- a/flang/lib/Parser/preprocessor.cpp +++ b/flang/lib/Parser/preprocessor.cpp @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "preprocessor.h" +#include "flang/Parser/preprocessor.h" + #include "prescan.h" #include "flang/Common/idioms.h" #include "flang/Parser/characters.h" @@ -21,6 +22,7 @@ #include #include #include +#include namespace Fortran::parser { @@ -31,8 +33,7 @@ Definition::Definition( Definition::Definition(const std::vector &argNames, const TokenSequence &repl, std::size_t firstToken, std::size_t tokens, bool isVariadic) - : isFunctionLike_{true}, - argumentCount_(argNames.size()), isVariadic_{isVariadic}, + : isFunctionLike_{true}, isVariadic_{isVariadic}, argNames_{argNames}, replacement_{Tokenize(argNames, repl, firstToken, tokens)} {} Definition::Definition(const std::string &predefined, AllSources &sources) @@ -46,6 +47,37 @@ bool Definition::set_isDisabled(bool disable) { return was; } +void Definition::Print(llvm::raw_ostream &out, const char *macroName) const { + if (!isFunctionLike_) { + // If it's not a function-like macro, then just print the replacement. + out << ' ' << replacement_.ToString(); + return; + } + + size_t argCount{argumentCount()}; + + out << '('; + for (size_t i{0}; i != argCount; ++i) { + if (i != 0) { + out << ", "; + } + out << argNames_[i]; + } + if (isVariadic_) { + out << ", ..."; + } + out << ") "; + + for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) { + std::string tok{replacement_.TokenAt(i).ToString()}; + if (size_t idx{GetArgumentIndex(tok)}; idx < argCount) { + out << argNames_[idx]; + } else { + out << tok; + } + } +} + static bool IsLegalIdentifierStart(const CharBlock &cpl) { return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]); } @@ -73,6 +105,13 @@ TokenSequence Definition::Tokenize(const std::vector &argNames, return result; } +std::size_t Definition::GetArgumentIndex(const CharBlock &token) const { + if (token.size() >= 2 && token[0] == '~') { + return static_cast(token[1] - 'A'); + } + return argumentCount(); +} + static TokenSequence Stringify( const TokenSequence &tokens, AllSources &allSources) { TokenSequence result; @@ -159,7 +198,7 @@ TokenSequence Definition::Apply( continue; } if (bytes == 2 && token[0] == '~') { // argument substitution - std::size_t index = token[1] - 'A'; + std::size_t index{GetArgumentIndex(token)}; if (index >= args.size()) { continue; } @@ -202,8 +241,8 @@ TokenSequence Definition::Apply( Provenance commaProvenance{ prescanner.preprocessor().allSources().CompilerInsertionProvenance( ',')}; - for (std::size_t k{argumentCount_}; k < args.size(); ++k) { - if (k > argumentCount_) { + for (std::size_t k{argumentCount()}; k < args.size(); ++k) { + if (k > argumentCount()) { result.Put(","s, commaProvenance); } result.Put(args[k]); @@ -212,7 +251,7 @@ TokenSequence Definition::Apply( j + 2 < tokens && replacement_.TokenAt(j + 1).OnlyNonBlank() == '(' && parenthesesNesting == 0) { parenthesesNesting = 1; - skipping = args.size() == argumentCount_; + skipping = args.size() == argumentCount(); ++j; } else { if (parenthesesNesting > 0) { @@ -713,6 +752,21 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) { } } +void Preprocessor::PrintMacros(llvm::raw_ostream &out) const { + // std::set is ordered. Use that to print the macros in an + // alphabetical order. + std::set macroNames; + for (const auto &[name, _] : definitions_) { + macroNames.insert(name.ToString()); + } + + for (const std::string &name : macroNames) { + out << "#define " << name; + definitions_.at(name).Print(out, name.c_str()); + out << '\n'; + } +} + CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) { names_.push_back(t.ToString()); return {names_.back().data(), names_.back().size()}; diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index e9b23172ed2e2..96db3955299f3 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// #include "prescan.h" -#include "preprocessor.h" -#include "token-sequence.h" #include "flang/Common/idioms.h" #include "flang/Parser/characters.h" #include "flang/Parser/message.h" +#include "flang/Parser/preprocessor.h" #include "flang/Parser/source.h" +#include "flang/Parser/token-sequence.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h index 7442b5d226335..581980001bcc2 100644 --- a/flang/lib/Parser/prescan.h +++ b/flang/lib/Parser/prescan.h @@ -16,11 +16,11 @@ // fixed form character literals on truncated card images, file // inclusion, and driving the Fortran source preprocessor. -#include "token-sequence.h" #include "flang/Common/Fortran-features.h" #include "flang/Parser/characters.h" #include "flang/Parser/message.h" #include "flang/Parser/provenance.h" +#include "flang/Parser/token-sequence.h" #include #include #include diff --git a/flang/lib/Parser/token-sequence.cpp b/flang/lib/Parser/token-sequence.cpp index 799d13a423660..d0254ecd5aaef 100644 --- a/flang/lib/Parser/token-sequence.cpp +++ b/flang/lib/Parser/token-sequence.cpp @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "token-sequence.h" +#include "flang/Parser/token-sequence.h" + #include "prescan.h" #include "flang/Parser/characters.h" #include "flang/Parser/message.h" diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90 index 48f48f5384fdc..35b188ad3a9ec 100644 --- a/flang/test/Driver/driver-help-hidden.f90 +++ b/flang/test/Driver/driver-help-hidden.f90 @@ -21,6 +21,7 @@ ! CHECK-NEXT: -ccc-print-phases Dump list of actions to perform ! CHECK-NEXT: -cpp Enable predefined and command line preprocessor macros ! CHECK-NEXT: -c Only run preprocess, compile, and assemble steps +! CHECK-NEXT: -dM Print macro definitions in -E mode instead of normal output ! CHECK-NEXT: -dumpmachine Display the compiler's target processor ! CHECK-NEXT: -dumpversion Display the version of the compiler ! CHECK-NEXT: -D = Define to (or 1 if omitted) diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 index 38f74395a678a..d4dab55f40e8b 100644 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -17,6 +17,7 @@ ! HELP-NEXT: -### Print (but do not run) the commands to run for this compilation ! HELP-NEXT: -cpp Enable predefined and command line preprocessor macros ! HELP-NEXT: -c Only run preprocess, compile, and assemble steps +! HELP-NEXT: -dM Print macro definitions in -E mode instead of normal output ! HELP-NEXT: -dumpmachine Display the compiler's target processor ! HELP-NEXT: -dumpversion Display the version of the compiler ! HELP-NEXT: -D = Define to (or 1 if omitted) @@ -155,6 +156,7 @@ ! HELP-FC1-NEXT:OPTIONS: ! HELP-FC1-NEXT: -cpp Enable predefined and command line preprocessor macros ! HELP-FC1-NEXT: --dependent-lib= Add dependent library +! HELP-FC1-NEXT: -dM Print macro definitions in -E mode instead of normal output ! HELP-FC1-NEXT: -D = Define to (or 1 if omitted) ! HELP-FC1-NEXT: -emit-fir Build the parse tree, then lower it to FIR ! HELP-FC1-NEXT: -emit-hlfir Build the parse tree, then lower it to HLFIR diff --git a/flang/test/Preprocessing/show-macros1.F90 b/flang/test/Preprocessing/show-macros1.F90 new file mode 100644 index 0000000000000..8e3d59a7849f7 --- /dev/null +++ b/flang/test/Preprocessing/show-macros1.F90 @@ -0,0 +1,14 @@ +! RUN: %flang -dM -E -o - %s | FileCheck %s + +! Check the default macros. Omit certain ones such as __LINE__ +! or __FILE__, or target-specific ones, like __x86_64__. + +! Macros are printed in the alphabetical order. + +! CHECK: #define __DATE__ +! CHECK: #define __TIME__ +! CHECK: #define __flang__ +! CHECK: #define __flang_major__ +! CHECK: #define __flang_minor__ +! CHECK: #define __flang_patchlevel__ + diff --git a/flang/test/Preprocessing/show-macros2.F90 b/flang/test/Preprocessing/show-macros2.F90 new file mode 100644 index 0000000000000..baf52ba8161f1 --- /dev/null +++ b/flang/test/Preprocessing/show-macros2.F90 @@ -0,0 +1,6 @@ +! RUN: %flang -DFOO -DBAR=FOO -dM -E -o - %s | FileCheck %s + +! Check command line definitions + +! CHECK: #define BAR FOO +! CHECK: #define FOO 1 diff --git a/flang/test/Preprocessing/show-macros3.F90 b/flang/test/Preprocessing/show-macros3.F90 new file mode 100644 index 0000000000000..951a1ec5ba16f --- /dev/null +++ b/flang/test/Preprocessing/show-macros3.F90 @@ -0,0 +1,9 @@ +! RUN: %flang -dM -E -o - %s | FileCheck %s + +! Variadic macro +#define FOO1(X, Y, ...) bar(bar(X, Y), __VA_ARGS__) +! CHECK: #define FOO1(X, Y, ...) bar(bar(X, Y), __VA_ARGS__) + +! Macro with an unused parameter +#define FOO2(X, Y, Z) (X + Z) +! CHECK: #define FOO2(X, Y, Z) (X + Z) From 52aaa8a87960a7d342c5e6b7d5af82c76c8cc45d Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Wed, 10 Apr 2024 10:46:53 -0500 Subject: [PATCH 028/886] [clang][test] Avoid writing to a potentially write-protected dir (#88258) This test just checks for the stdout/stderr of clang, but it incidentally tries to write to `a.out` in the current directory, which may be write protected. Typically one would write `clang -o %t.o` for a writeable dir, but since we only care about stdout/stderr, throw away the object file and just write to /dev/null instead. --- clang/test/Driver/lld-repro.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/test/Driver/lld-repro.c b/clang/test/Driver/lld-repro.c index 9457dd334b5b9..61904c0e6df30 100644 --- a/clang/test/Driver/lld-repro.c +++ b/clang/test/Driver/lld-repro.c @@ -4,12 +4,12 @@ // RUN: echo "-nostartfiles -nostdlib -fuse-ld=lld -gen-reproducer=error -fcrash-diagnostics-dir=%t" \ // RUN: | sed -e 's/\\/\\\\/g' > %t.rsp -// RUN: not %clang %s @%t.rsp -fcrash-diagnostics=all 2>&1 \ +// RUN: not %clang %s @%t.rsp -fcrash-diagnostics=all -o /dev/null 2>&1 \ // RUN: | FileCheck %s // Test that the reproducer can still be created even when the input source cannot be preprocessed // again, like when reading from stdin. -// RUN: not %clang -x c - @%t.rsp -fcrash-diagnostics=all 2>&1 < %s \ +// RUN: not %clang -x c - @%t.rsp -fcrash-diagnostics=all -o /dev/null 2>&1 < %s \ // RUN: | FileCheck %s // check that we still get lld's output @@ -20,9 +20,9 @@ // CHECK-NEXT: note: diagnostic msg: // CHECK: ******************** -// RUN: not %clang %s @%t.rsp -fcrash-diagnostics=compiler 2>&1 \ +// RUN: not %clang %s @%t.rsp -fcrash-diagnostics=compiler -o /dev/null 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-LINKER -// RUN: not %clang %s @%t.rsp 2>&1 \ +// RUN: not %clang %s @%t.rsp -o /dev/null 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-LINKER // NO-LINKER-NOT: Preprocessed source(s) and associated run script(s) are located at: From 0ad663ead1242e908a8c5005f35e72747d136a3b Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Wed, 10 Apr 2024 17:51:02 +0200 Subject: [PATCH 029/886] [libc++] Removes Clang-16 support. (#87810) With the release of Clang-18 we no longer officially support Clang-16. --- libcxx/include/__algorithm/simd_utils.h | 2 +- libcxx/include/__config | 4 ++-- libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp | 2 +- .../range.utility/range.utility.conv/to_deduction.pass.cpp | 2 +- .../format/format.arguments/format.arg/visit.pass.cpp | 2 +- .../format.arguments/format.arg/visit.return_type.pass.cpp | 2 +- .../format.arg/visit_format_arg.deprecated.verify.cpp | 2 +- .../variant/variant.visit.member/robust_against_adl.pass.cpp | 2 +- .../std/utilities/variant/variant.visit.member/visit.pass.cpp | 2 +- .../variant/variant.visit.member/visit_return_type.pass.cpp | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/libcxx/include/__algorithm/simd_utils.h b/libcxx/include/__algorithm/simd_utils.h index 989a1957987e1..8d540ae2cce88 100644 --- a/libcxx/include/__algorithm/simd_utils.h +++ b/libcxx/include/__algorithm/simd_utils.h @@ -27,7 +27,7 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> // TODO: Find out how altivec changes things and allow vectorizations there too. -#if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 1700 && !defined(__ALTIVEC__) +#if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_CLANG_VER) && !defined(__ALTIVEC__) # define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 1 #else # define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 0 diff --git a/libcxx/include/__config b/libcxx/include/__config index 8550b1da4a278..d98b54926bbe8 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -44,8 +44,8 @@ // Warn if a compiler version is used that is not supported anymore // LLVM RELEASE Update the minimum compiler versions # if defined(_LIBCPP_CLANG_VER) -# if _LIBCPP_CLANG_VER < 1600 -# warning "Libc++ only supports Clang 16 and later" +# if _LIBCPP_CLANG_VER < 1700 +# warning "Libc++ only supports Clang 17 and later" # endif # elif defined(_LIBCPP_APPLE_CLANG_VER) # if _LIBCPP_APPLE_CLANG_VER < 1500 diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp index 479c1b93fcab9..640365889efae 100644 --- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp +++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp @@ -12,7 +12,7 @@ // UNSUPPORTED: c++03 // TODO: Investigate these failures which break the CI. -// UNSUPPORTED: clang-16, clang-17, clang-18, clang-19 +// UNSUPPORTED: clang-17, clang-18, clang-19 // TODO: Investigate this failure on GCC 13 (in Ubuntu Jammy) // UNSUPPORTED: gcc-13 diff --git a/libcxx/test/std/ranges/range.utility/range.utility.conv/to_deduction.pass.cpp b/libcxx/test/std/ranges/range.utility/range.utility.conv/to_deduction.pass.cpp index f84cedbc122a1..58307bd88d0fe 100644 --- a/libcxx/test/std/ranges/range.utility/range.utility.conv/to_deduction.pass.cpp +++ b/libcxx/test/std/ranges/range.utility/range.utility.conv/to_deduction.pass.cpp @@ -9,7 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 // There is a bug in older versions of Clang that causes trouble with constraints in classes like // `ContainerWithDirectCtr`. -// XFAIL: clang-16, apple-clang-15 +// XFAIL: apple-clang-15 // template class C, input_range R, class... Args> // constexpr auto to(R&& r, Args&&... args); // Since C++23 diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.pass.cpp index 994ccc70a38da..284b03c32cd09 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.pass.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME // The tested functionality needs deducing this. -// UNSUPPORTED: clang-16 || clang-17 +// UNSUPPORTED: clang-17 // XFAIL: apple-clang // diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.return_type.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.return_type.pass.cpp index b2c40d1604547..4c60cb0e9ae1d 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.return_type.pass.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit.return_type.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME // The tested functionality needs deducing this. -// UNSUPPORTED: clang-16 || clang-17 +// UNSUPPORTED: clang-17 // XFAIL: apple-clang // diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.deprecated.verify.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.deprecated.verify.cpp index acd9228369e60..6a3896c8965c7 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.deprecated.verify.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.deprecated.verify.cpp @@ -7,7 +7,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME -// UNSUPPORTED: clang-16 || clang-17 +// UNSUPPORTED: clang-17 // XFAIL: apple-clang // diff --git a/libcxx/test/std/utilities/variant/variant.visit.member/robust_against_adl.pass.cpp b/libcxx/test/std/utilities/variant/variant.visit.member/robust_against_adl.pass.cpp index c54f2b722d46a..bea6d949924bd 100644 --- a/libcxx/test/std/utilities/variant/variant.visit.member/robust_against_adl.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.visit.member/robust_against_adl.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23 // The tested functionality needs deducing this. -// UNSUPPORTED: clang-16 || clang-17 +// UNSUPPORTED: clang-17 // XFAIL: apple-clang // diff --git a/libcxx/test/std/utilities/variant/variant.visit.member/visit.pass.cpp b/libcxx/test/std/utilities/variant/variant.visit.member/visit.pass.cpp index d0c909985bbb3..857e85d00857a 100644 --- a/libcxx/test/std/utilities/variant/variant.visit.member/visit.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.visit.member/visit.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23 // The tested functionality needs deducing this. -// UNSUPPORTED: clang-16 || clang-17 +// UNSUPPORTED: clang-17 // XFAIL: apple-clang // diff --git a/libcxx/test/std/utilities/variant/variant.visit.member/visit_return_type.pass.cpp b/libcxx/test/std/utilities/variant/variant.visit.member/visit_return_type.pass.cpp index 3312197d8df9c..2c1cbb06e7067 100644 --- a/libcxx/test/std/utilities/variant/variant.visit.member/visit_return_type.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.visit.member/visit_return_type.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23 // The tested functionality needs deducing this. -// UNSUPPORTED: clang-16 || clang-17 +// UNSUPPORTED: clang-17 // XFAIL: apple-clang // From fc3dff9b4637bb5960fe70add90cd27e6842d58b Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 10 Apr 2024 09:08:01 -0700 Subject: [PATCH 030/886] [clang][modules] Stop eagerly reading files with diagnostic pragmas (#87442) This makes it so that the importer doesn't need to stat all input files of a module that contain diagnostic pragmas, reducing file system traffic. --- clang/lib/Serialization/ASTReader.cpp | 2 -- clang/test/Modules/home-is-cwd-search-paths.c | 34 +++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 clang/test/Modules/home-is-cwd-search-paths.c diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 679302e7a838f..2e73a0a714010 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -6626,8 +6626,6 @@ void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) { "Invalid data, missing pragma diagnostic states"); FileID FID = ReadFileID(F, Record, Idx); assert(FID.isValid() && "invalid FileID for transition"); - // FIXME: Remove this once we don't need the side-effects. - (void)SourceMgr.getSLocEntryOrNull(FID); unsigned Transitions = Record[Idx++]; // Note that we don't need to set up Parent/ParentOffset here, because diff --git a/clang/test/Modules/home-is-cwd-search-paths.c b/clang/test/Modules/home-is-cwd-search-paths.c new file mode 100644 index 0000000000000..0b8954e691bc0 --- /dev/null +++ b/clang/test/Modules/home-is-cwd-search-paths.c @@ -0,0 +1,34 @@ +// This test demonstrates how -fmodule-map-file-home-is-cwd with -fmodules-embed-all-files +// extend the importer search paths by relying on the side effects of pragma diagnostic +// mappings deserialization. + +// RUN: rm -rf %t +// RUN: split-file %s %t + +//--- dir1/a.modulemap +module a { header "a.h" } +//--- dir1/a.h +#include "search.h" +// The first compilation is configured such that -I search does contain the search.h header. +//--- dir1/search/search.h +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wparentheses" +#pragma clang diagnostic pop +// RUN: cd %t/dir1 && %clang_cc1 -fmodules -I search \ +// RUN: -emit-module -fmodule-name=a a.modulemap -o %t/a.pcm \ +// RUN: -fmodules-embed-all-files -fmodule-map-file-home-is-cwd + +//--- dir2/b.modulemap +module b { header "b.h" } +//--- dir2/b.h +#include "search.h" // expected-error{{'search.h' file not found}} +// The second compilation is configured such that -I search is an empty directory. +// However, since b.pcm simply embeds the headers as "search/search.h", this compilation +// ends up seeing it too. This relies solely on ASTReader::ReadPragmaDiagnosticMappings() +// eagerly reading the corresponding INPUT_FILE record before header search happens. +// Removing the eager deserialization makes this header invisible and so does removing +// the pragma directives. +// RUN: mkdir %t/dir2/search +// RUN: cd %t/dir2 && %clang_cc1 -fmodules -I search \ +// RUN: -emit-module -fmodule-name=b b.modulemap -o %t/b.pcm \ +// RUN: -fmodule-file=%t/a.pcm -verify From 51786eb5bfc30e7eff998323a9ce433ec4620383 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 10 Apr 2024 09:08:40 -0700 Subject: [PATCH 031/886] [clang][modules] Only compute affecting module maps with implicit search (#87849) When writing out a PCM, we compute the set of module maps that did affect the compilation and we strip the rest to make the output independent of them. The most common way to read a module map that is not affecting is with implicit module map search. The other option is to pass a bunch of unnecessary `-fmodule-map-file=` arguments on the command-line, in which case the client should probably not give those to Clang anyway. This makes serialization of explicit modules faster, mostly due to reduced file system traffic. --- clang/lib/Serialization/ASTWriter.cpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 4cd74b1ba9d72..d2afe378bb0c3 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -163,8 +163,13 @@ static TypeCode getTypeCodeForTypeClass(Type::TypeClass id) { namespace { -std::set GetAffectingModuleMaps(const Preprocessor &PP, - Module *RootModule) { +std::optional> +GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) { + // Without implicit module map search, there's no good reason to know about + // any module maps that are not affecting. + if (!PP.getHeaderSearchInfo().getHeaderSearchOpts().ImplicitModuleMaps) + return std::nullopt; + SmallVector ModulesToProcess{RootModule}; const HeaderSearch &HS = PP.getHeaderSearchInfo(); @@ -4735,8 +4740,16 @@ void ASTWriter::computeNonAffectingInputFiles() { if (!Cache->OrigEntry) continue; - if (!isModuleMap(File.getFileCharacteristic()) || - llvm::is_contained(AffectingModuleMaps, *Cache->OrigEntry)) + // Don't prune anything other than module maps. + if (!isModuleMap(File.getFileCharacteristic())) + continue; + + // Don't prune module maps if all are guaranteed to be affecting. + if (!AffectingModuleMaps) + continue; + + // Don't prune module maps that are affecting. + if (llvm::is_contained(*AffectingModuleMaps, *Cache->OrigEntry)) continue; IsSLocAffecting[I] = false; From 323d3ab2574ba9d371926bb1b5c67dbe7b2b4ec3 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 10 Apr 2024 09:08:50 -0700 Subject: [PATCH 032/886] [RISCV] Optimize undef Even vector in getWideningInterleave. (#88221) We recently optimized the code when the Odd vector was undef to fix a poison bug. There are additional optimizations we can do if the even vector is undef. With Zvbb, we can use a single vwsll. Without Zvbb, we can use a vzext.vf2 and a vsll. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 13 ++++++++++-- .../CodeGen/RISCV/rvv/vector-interleave.ll | 20 +++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6e97575c167cd..944d8b6de895d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4638,8 +4638,17 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, Subtarget.getXLenVT())); Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV, OffsetVec, Passthru, Mask, VL); - Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT, - Interleaved, EvenV, Passthru, Mask, VL); + if (!EvenV.isUndef()) + Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT, + Interleaved, EvenV, Passthru, Mask, VL); + } else if (EvenV.isUndef()) { + Interleaved = + DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL); + + SDValue OffsetVec = + DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT); + Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT, + Interleaved, OffsetVec, Passthru, Mask, VL); } else { // FIXME: We should freeze the odd vector here. We already handled the case // of provably undef/poison above. diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll index 0992c9fe495f4..4b6ad0f27214d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll @@ -674,6 +674,26 @@ define @vector_interleave_nxv8i32_nxv4i32_poison( %res } +define @vector_interleave_nxv8i32_nxv4i32_poison2( %a) { +; CHECK-LABEL: vector_interleave_nxv8i32_nxv4i32_poison2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsll.vx v8, v12, a0 +; CHECK-NEXT: ret +; +; ZVBB-LABEL: vector_interleave_nxv8i32_nxv4i32_poison2: +; ZVBB: # %bb.0: +; ZVBB-NEXT: li a0, 32 +; ZVBB-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVBB-NEXT: vwsll.vx v12, v8, a0 +; ZVBB-NEXT: vmv4r.v v8, v12 +; ZVBB-NEXT: ret + %res = call @llvm.experimental.vector.interleave2.nxv8i32( poison, %a) + ret %res +} + declare @llvm.experimental.vector.interleave2.nxv64f16(, ) declare @llvm.experimental.vector.interleave2.nxv32f32(, ) declare @llvm.experimental.vector.interleave2.nxv16f64(, ) From e72c949c15208ba3dd53a9cebfee02734965a678 Mon Sep 17 00:00:00 2001 From: Evgenii Stepanov Date: Wed, 10 Apr 2024 09:12:25 -0700 Subject: [PATCH 033/886] [msan] Overflow intrinsics. (#88210) --- .../Instrumentation/MemorySanitizer.cpp | 24 ++++ .../MemorySanitizer/overflow.ll | 103 ++++++------------ 2 files changed, 59 insertions(+), 68 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 46b9181c8922e..ee3531bbd68df 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3715,8 +3715,32 @@ struct MemorySanitizerVisitor : public InstVisitor { setOrigin(&I, getOrigin(&I, 0)); } + void handleArithmeticWithOverflow(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Value *Shadow0 = getShadow(&I, 0); + Value *Shadow1 = getShadow(&I, 1); + Value *ShadowElt0 = IRB.CreateOr(Shadow0, Shadow1); + Value *ShadowElt1 = + IRB.CreateICmpNE(ShadowElt0, getCleanShadow(ShadowElt0)); + + Value *Shadow = PoisonValue::get(getShadowTy(&I)); + Shadow = IRB.CreateInsertValue(Shadow, ShadowElt0, 0); + Shadow = IRB.CreateInsertValue(Shadow, ShadowElt1, 1); + + setShadow(&I, Shadow); + setOriginForNaryOp(I); + } + void visitIntrinsicInst(IntrinsicInst &I) { switch (I.getIntrinsicID()) { + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + handleArithmeticWithOverflow(I); + break; case Intrinsic::abs: handleAbsIntrinsic(I); break; diff --git a/llvm/test/Instrumentation/MemorySanitizer/overflow.ll b/llvm/test/Instrumentation/MemorySanitizer/overflow.ll index b1304faec3df0..0cfae0008263f 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/overflow.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/overflow.ll @@ -10,16 +10,12 @@ define {i64, i1} @test_sadd_with_overflow(i64 %a, i64 %b) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i64, i1 } [[TMP5]], i1 [[TMP4]], 1 ; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 [[A]], i64 [[B]]) -; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store { i64, i1 } [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret { i64, i1 } [[RES]] ; %res = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) @@ -32,16 +28,12 @@ define {i64, i1} @test_uadd_with_overflow(i64 %a, i64 %b) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i64, i1 } [[TMP5]], i1 [[TMP4]], 1 ; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A]], i64 [[B]]) -; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store { i64, i1 } [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret { i64, i1 } [[RES]] ; %res = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) @@ -54,16 +46,12 @@ define {i64, i1} @test_smul_with_overflow(i64 %a, i64 %b) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i64, i1 } [[TMP5]], i1 [[TMP4]], 1 ; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 [[A]], i64 [[B]]) -; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store { i64, i1 } [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret { i64, i1 } [[RES]] ; %res = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %a, i64 %b) @@ -75,16 +63,12 @@ define {i64, i1} @test_umul_with_overflow(i64 %a, i64 %b) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i64, i1 } [[TMP5]], i1 [[TMP4]], 1 ; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A]], i64 [[B]]) -; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store { i64, i1 } [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret { i64, i1 } [[RES]] ; %res = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 %b) @@ -96,16 +80,12 @@ define {i64, i1} @test_ssub_with_overflow(i64 %a, i64 %b) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i64, i1 } [[TMP5]], i1 [[TMP4]], 1 ; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 [[A]], i64 [[B]]) -; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store { i64, i1 } [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret { i64, i1 } [[RES]] ; %res = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) @@ -117,16 +97,12 @@ define {i64, i1} @test_usub_with_overflow(i64 %a, i64 %b) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i64, i1 } [[TMP5]], i1 [[TMP4]], 1 ; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[A]], i64 [[B]]) -; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store { i64, i1 } [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret { i64, i1 } [[RES]] ; %res = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) @@ -139,18 +115,12 @@ define {<4 x i32>, <4 x i1>} @test_sadd_with_overflow_vec(<4 x i32> %a, <4 x i32 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { <4 x i32>, <4 x i1> } poison, <4 x i32> [[TMP3]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { <4 x i32>, <4 x i1> } [[TMP5]], <4 x i1> [[TMP4]], 1 ; CHECK-NEXT: [[RES:%.*]] = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> [[A]], <4 x i32> [[B]]) -; CHECK-NEXT: store { <4 x i32>, <4 x i1> } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store { <4 x i32>, <4 x i1> } [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret { <4 x i32>, <4 x i1> } [[RES]] ; %res = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> %a, <4 x i32> %b) @@ -158,6 +128,3 @@ define {<4 x i32>, <4 x i1>} @test_sadd_with_overflow_vec(<4 x i32> %a, <4 x i32 } attributes #0 = { sanitize_memory } -;. -; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1000} -;. From 43b2b2ebce635bec1e3c060092ea75db858ee3fd Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 10 Apr 2024 18:25:16 +0200 Subject: [PATCH 034/886] Revert "Fix complex log1p accuracy with large abs values." (#88290) Reverts llvm/llvm-project#88260 The test fails on the GCC7 buildbot. --- .../ComplexToStandard/ComplexToStandard.cpp | 50 +++++++++---------- .../convert-to-standard.mlir | 48 +++++++----------- 2 files changed, 41 insertions(+), 57 deletions(-) diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp index 0aa1de5fa5d9a..9c3c4d96a301e 100644 --- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp +++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp @@ -570,39 +570,37 @@ struct Log1pOpConversion : public OpConversionPattern { ConversionPatternRewriter &rewriter) const override { auto type = cast(adaptor.getComplex().getType()); auto elementType = cast(type.getElementType()); - arith::FastMathFlags fmf = op.getFastMathFlagsAttr().getValue(); + arith::FastMathFlagsAttr fmf = op.getFastMathFlagsAttr(); mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter); - Value real = b.create(adaptor.getComplex()); - Value imag = b.create(adaptor.getComplex()); + Value real = b.create(elementType, adaptor.getComplex()); + Value imag = b.create(elementType, adaptor.getComplex()); Value half = b.create(elementType, b.getFloatAttr(elementType, 0.5)); Value one = b.create(elementType, b.getFloatAttr(elementType, 1)); - Value realPlusOne = b.create(real, one, fmf); - Value absRealPlusOne = b.create(realPlusOne, fmf); - Value absImag = b.create(imag, fmf); - - Value maxAbs = b.create(absRealPlusOne, absImag, fmf); - Value minAbs = b.create(absRealPlusOne, absImag, fmf); - - Value maxAbsOfRealPlusOneAndImagMinusOne = b.create( - b.create(arith::CmpFPredicate::OGT, realPlusOne, absImag, - fmf), - real, b.create(maxAbs, one, fmf)); - Value minMaxRatio = b.create(minAbs, maxAbs, fmf); - Value logOfMaxAbsOfRealPlusOneAndImag = - b.create(maxAbsOfRealPlusOneAndImagMinusOne, fmf); - Value logOfSqrtPart = b.create( - b.create(minMaxRatio, minMaxRatio, fmf), fmf); - Value r = b.create( - b.create(half, logOfSqrtPart, fmf), - logOfMaxAbsOfRealPlusOneAndImag, fmf); - Value resultReal = b.create( - b.create(arith::CmpFPredicate::UNO, r, r, fmf), minAbs, - r); - Value resultImag = b.create(imag, realPlusOne, fmf); + Value two = b.create(elementType, + b.getFloatAttr(elementType, 2)); + + // log1p(a+bi) = .5*log((a+1)^2+b^2) + i*atan2(b, a + 1) + // log((a+1)+bi) = .5*log(a*a + 2*a + 1 + b*b) + i*atan2(b, a+1) + // log((a+1)+bi) = .5*log1p(a*a + 2*a + b*b) + i*atan2(b, a+1) + Value sumSq = b.create(real, real, fmf.getValue()); + sumSq = b.create( + sumSq, b.create(real, two, fmf.getValue()), + fmf.getValue()); + sumSq = b.create( + sumSq, b.create(imag, imag, fmf.getValue()), + fmf.getValue()); + Value logSumSq = + b.create(elementType, sumSq, fmf.getValue()); + Value resultReal = b.create(logSumSq, half, fmf.getValue()); + + Value realPlusOne = b.create(real, one, fmf.getValue()); + + Value resultImag = + b.create(elementType, imag, realPlusOne, fmf.getValue()); rewriter.replaceOpWithNewOp(op, type, resultReal, resultImag); return success(); diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir index 43918904a09f4..f5d9499eadda4 100644 --- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir +++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir @@ -300,22 +300,15 @@ func.func @complex_log1p(%arg: complex) -> complex { // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex // CHECK: %[[ONE_HALF:.*]] = arith.constant 5.000000e-01 : f32 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 +// CHECK: %[[TWO:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK: %[[SQ_SUM_0:.*]] = arith.mulf %[[REAL]], %[[REAL]] : f32 +// CHECK: %[[TWO_REAL:.*]] = arith.mulf %[[REAL]], %[[TWO]] : f32 +// CHECK: %[[SQ_SUM_1:.*]] = arith.addf %[[SQ_SUM_0]], %[[TWO_REAL]] : f32 +// CHECK: %[[SQ_IMAG:.*]] = arith.mulf %[[IMAG]], %[[IMAG]] : f32 +// CHECK: %[[SQ_SUM_2:.*]] = arith.addf %[[SQ_SUM_1]], %[[SQ_IMAG]] : f32 +// CHECK: %[[LOG_SQ_SUM:.*]] = math.log1p %[[SQ_SUM_2]] : f32 +// CHECK: %[[RESULT_REAL:.*]] = arith.mulf %[[LOG_SQ_SUM]], %[[ONE_HALF]] : f32 // CHECK: %[[REAL_PLUS_ONE:.*]] = arith.addf %[[REAL]], %[[ONE]] : f32 -// CHECK: %[[ABS_REAL_PLUS_ONE:.*]] = math.absf %[[REAL_PLUS_ONE]] : f32 -// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] : f32 -// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] : f32 -// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] : f32 -// CHECK: %[[CMPF:.*]] = arith.cmpf ogt, %[[REAL_PLUS_ONE]], %[[ABS_IMAG]] : f32 -// CHECK: %[[MAX_MINUS_ONE:.*]] = arith.subf %[[MAX]], %cst_0 : f32 -// CHECK: %[[SELECT:.*]] = arith.select %[[CMPF]], %0, %[[MAX_MINUS_ONE]] : f32 -// CHECK: %[[MIN_MAX_RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] : f32 -// CHECK: %[[LOG_1:.*]] = math.log1p %[[SELECT]] : f32 -// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[MIN_MAX_RATIO]], %[[MIN_MAX_RATIO]] : f32 -// CHECK: %[[LOG_SQ:.*]] = math.log1p %[[RATIO_SQ]] : f32 -// CHECK: %[[HALF_LOG_SQ:.*]] = arith.mulf %cst, %[[LOG_SQ]] : f32 -// CHECK: %[[R:.*]] = arith.addf %[[HALF_LOG_SQ]], %[[LOG_1]] : f32 -// CHECK: %[[ISNAN:.*]] = arith.cmpf uno, %[[R]], %[[R]] : f32 -// CHECK: %[[RESULT_REAL:.*]] = arith.select %[[ISNAN]], %[[MIN]], %[[R]] : f32 // CHECK: %[[RESULT_IMAG:.*]] = math.atan2 %[[IMAG]], %[[REAL_PLUS_ONE]] : f32 // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex @@ -970,22 +963,15 @@ func.func @complex_log1p_with_fmf(%arg: complex) -> complex { // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex // CHECK: %[[ONE_HALF:.*]] = arith.constant 5.000000e-01 : f32 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 -// CHECK: %[[REAL_PLUS_ONE:.*]] = arith.addf %[[REAL]], %[[ONE]] fastmath : f32 -// CHECK: %[[ABS_REAL_PLUS_ONE:.*]] = math.absf %[[REAL_PLUS_ONE]] fastmath : f32 -// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] fastmath : f32 -// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] fastmath : f32 -// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] fastmath : f32 -// CHECK: %[[CMPF:.*]] = arith.cmpf ogt, %[[REAL_PLUS_ONE]], %[[ABS_IMAG]] fastmath : f32 -// CHECK: %[[MAX_MINUS_ONE:.*]] = arith.subf %[[MAX]], %cst_0 fastmath : f32 -// CHECK: %[[SELECT:.*]] = arith.select %[[CMPF]], %0, %[[MAX_MINUS_ONE]] : f32 -// CHECK: %[[MIN_MAX_RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 -// CHECK: %[[LOG_1:.*]] = math.log1p %[[SELECT]] fastmath : f32 -// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[MIN_MAX_RATIO]], %[[MIN_MAX_RATIO]] fastmath : f32 -// CHECK: %[[LOG_SQ:.*]] = math.log1p %[[RATIO_SQ]] fastmath : f32 -// CHECK: %[[HALF_LOG_SQ:.*]] = arith.mulf %cst, %[[LOG_SQ]] fastmath : f32 -// CHECK: %[[R:.*]] = arith.addf %[[HALF_LOG_SQ]], %[[LOG_1]] fastmath : f32 -// CHECK: %[[ISNAN:.*]] = arith.cmpf uno, %[[R]], %[[R]] fastmath : f32 -// CHECK: %[[RESULT_REAL:.*]] = arith.select %[[ISNAN]], %[[MIN]], %[[R]] : f32 +// CHECK: %[[TWO:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK: %[[SQ_SUM_0:.*]] = arith.mulf %[[REAL]], %[[REAL]] fastmath : f32 +// CHECK: %[[TWO_REAL:.*]] = arith.mulf %[[REAL]], %[[TWO]] fastmath : f32 +// CHECK: %[[SQ_SUM_1:.*]] = arith.addf %[[SQ_SUM_0]], %[[TWO_REAL]] fastmath : f32 +// CHECK: %[[SQ_IMAG:.*]] = arith.mulf %[[IMAG]], %[[IMAG]] fastmath : f32 +// CHECK: %[[SQ_SUM_2:.*]] = arith.addf %[[SQ_SUM_1]], %[[SQ_IMAG]] fastmath : f32 +// CHECK: %[[LOG_SQ_SUM:.*]] = math.log1p %[[SQ_SUM_2]] fastmath : f32 +// CHECK: %[[RESULT_REAL:.*]] = arith.mulf %[[LOG_SQ_SUM]], %[[ONE_HALF]] fastmath : f32 +// CHECK: %[[REAL_PLUS_ONE:.*]] = arith.addf %[[REAL]], %[[ONE]] fastmath : f32 // CHECK: %[[RESULT_IMAG:.*]] = math.atan2 %[[IMAG]], %[[REAL_PLUS_ONE]] fastmath : f32 // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex From 48c5c70fdd3bec2929e2e903e3bf4494a65f7a92 Mon Sep 17 00:00:00 2001 From: erichkeane Date: Wed, 10 Apr 2024 09:23:23 -0700 Subject: [PATCH 035/886] [NFC] Update SemaRef.Diag to just Diag in OpenACC implementation I missed these two in my last patch as the two patches crossed in review, so correct this now. --- clang/lib/Sema/SemaOpenACC.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index b6afb80b873e2..a6f4453e525d0 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -102,10 +102,10 @@ SemaOpenACC::ActOnClause(ArrayRef ExistingClauses, }); if (Itr != ExistingClauses.end()) { - SemaRef.Diag(Clause.getBeginLoc(), + Diag(Clause.getBeginLoc(), diag::err_acc_duplicate_clause_disallowed) << Clause.getDirectiveKind() << Clause.getClauseKind(); - SemaRef.Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here); + Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here); return nullptr; } From 3d468566eb395995ac54fcf90d3afb9b9f822eb3 Mon Sep 17 00:00:00 2001 From: erichkeane Date: Wed, 10 Apr 2024 09:37:01 -0700 Subject: [PATCH 036/886] [NFC] Remove unneeded 'maybe_unused' attributes This was added while we only had a partial implementation of clauses, so we don't need these anymore. --- clang/lib/Serialization/ASTReader.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 2e73a0a714010..0ca7f6600eee3 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11754,11 +11754,8 @@ void ASTRecordReader::readOMPChildren(OMPChildren *Data) { OpenACCClause *ASTRecordReader::readOpenACCClause() { OpenACCClauseKind ClauseKind = readEnum(); - // TODO OpenACC: We don't have these used anywhere, but eventually we should - // be constructing the Clauses with them, so these attributes can go away at - // that point. - [[maybe_unused]] SourceLocation BeginLoc = readSourceLocation(); - [[maybe_unused]] SourceLocation EndLoc = readSourceLocation(); + SourceLocation BeginLoc = readSourceLocation(); + SourceLocation EndLoc = readSourceLocation(); switch (ClauseKind) { case OpenACCClauseKind::Default: { From f388a3a446ef2566d73b6a73ba300738f8c2c002 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Wed, 10 Apr 2024 09:42:12 -0700 Subject: [PATCH 037/886] [mlir][sparse] update doc and examples of the [dis]assemble operations (#88213) The doc and examples of the [dis]assemble operations did not reflect all the recent changes on order of the operands. Also clarified some of the text. --- .../SparseTensor/IR/SparseTensorOps.td | 89 +++++++++---------- mlir/test/Dialect/SparseTensor/invalid.mlir | 6 +- mlir/test/Dialect/SparseTensor/roundtrip.mlir | 14 +-- .../Dialect/SparseTensor/CPU/sparse_pack.mlir | 10 ++- 4 files changed, 61 insertions(+), 58 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td index 5df8a176459b7..0cfc64f9988a0 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -58,37 +58,35 @@ def SparseTensor_AssembleOp : SparseTensor_Op<"assemble", [Pure]>, Arguments<(ins Variadic>:$levels, TensorOf<[AnyType]>:$values)>, Results<(outs AnySparseTensor: $result)> { - let summary = "Returns a sparse tensor assembled from the given values and levels"; + let summary = "Returns a sparse tensor assembled from the given levels and values"; let description = [{ - Assembles the values and per-level coordinate or postion arrays into a sparse tensor. - The order and types of provided levels must be consistent with the actual storage - layout of the returned sparse tensor described below. + Assembles the per-level position and coordinate arrays together with + the values arrays into a sparse tensor. The order and types of the + provided levels must be consistent with the actual storage layout of + the returned sparse tensor described below. - - `values : tensor` - supplies the value for each stored element in the sparse tensor. - `levels: [tensor, ...]` - each supplies the sparse tensor coordinates scheme in the sparse tensor for - the corresponding level as specifed by `sparse_tensor::StorageLayout`. - - This operation can be used to assemble a sparse tensor from external - sources; e.g., when passing two numpy arrays from Python. - - Disclaimer: This is the user's responsibility to provide input that can be - correctly interpreted by the sparsifier, which does not perform - any sanity test during runtime to verify data integrity. + supplies the sparse tensor position and coordinate arrays + of the sparse tensor for the corresponding level as specifed by + `sparse_tensor::StorageLayout`. + - `values : tensor` + supplies the values array for the stored elements in the sparse tensor. - TODO: The returned tensor is allowed (in principle) to have non-identity - dimOrdering/higherOrdering mappings. However, the current implementation - does not yet support them. + This operation can be used to assemble a sparse tensor from an + external source; e.g., by passing numpy arrays from Python. It + is the user's responsibility to provide input that can be correctly + interpreted by the sparsifier, which does not perform any sanity + test to verify data integrity. Example: ```mlir - %values = arith.constant dense<[ 1.1, 2.2, 3.3 ]> : tensor<3xf64> - %coordinates = arith.constant dense<[[0,0], [1,2], [1,3]]> : tensor<3x2xindex> - %st = sparse_tensor.assemble %values, %coordinates - : tensor<3xf64>, tensor<3x2xindex> to tensor<3x4xf64, #COO> + %pos = arith.constant dense<[0, 3]> : tensor<2xindex> + %index = arith.constant dense<[[0,0], [1,2], [1,3]]> : tensor<3x2xindex> + %values = arith.constant dense<[ 1.1, 2.2, 3.3 ]> : tensor<3xf64> + %s = sparse_tensor.assemble (%pos, %index), %values + : (tensor<2xindex>, tensor<3x2xindex>), tensor<3xf64> to tensor<3x4xf64, #COO> // yields COO format |1.1, 0.0, 0.0, 0.0| // of 3x4 matrix |0.0, 0.0, 2.2, 3.3| // |0.0, 0.0, 0.0, 0.0| @@ -96,8 +94,8 @@ def SparseTensor_AssembleOp : SparseTensor_Op<"assemble", [Pure]>, }]; let assemblyFormat = - "` ` `(` $levels `)` `,` $values attr-dict" - " `:` `(` type($levels) `)` `,` type($values) `to` type($result)"; + "` ` `(` $levels `)` `,` $values attr-dict `:`" + " `(` type($levels) `)` `,` type($values) `to` type($result)"; let hasVerifier = 1; } @@ -110,21 +108,20 @@ def SparseTensor_DisassembleOp : SparseTensor_Op<"disassemble", [Pure, SameVaria TensorOf<[AnyType]>:$ret_values, Variadic:$lvl_lens, AnyIndexingScalarLike:$val_len)> { - let summary = "Returns the (values, coordinates) pair disassembled from the input tensor"; + let summary = "Copies the levels and values of the given sparse tensor"; let description = [{ The disassemble operation is the inverse of `sparse_tensor::assemble`. - It returns the values and per-level position and coordinate array to the - user from the sparse tensor along with the actual length of the memory used - in each returned buffer. This operation can be used for returning an - disassembled MLIR sparse tensor to frontend; e.g., returning two numpy arrays - to Python. - - Disclaimer: This is the user's responsibility to allocate large enough buffers - to hold the sparse tensor. The sparsifier simply copies each fields - of the sparse tensor into the user-supplied buffer without bound checking. + It copies the per-level position and coordinate arrays together with + the values array of the given sparse tensor into the user-supplied buffers + along with the actual length of the memory used in each returned buffer. - TODO: the current implementation does not yet support non-identity mappings. + This operation can be used for returning a disassembled MLIR sparse tensor; + e.g., copying the sparse tensor contents into pre-allocated numpy arrays + back to Python. It is the user's responsibility to allocate large enough + buffers of the appropriate types to hold the sparse tensor contents. + The sparsifier simply copies all fields of the sparse tensor into the + user-supplied buffers without any sanity test to verify data integrity. Example: @@ -132,26 +129,26 @@ def SparseTensor_DisassembleOp : SparseTensor_Op<"disassemble", [Pure, SameVaria // input COO format |1.1, 0.0, 0.0, 0.0| // of 3x4 matrix |0.0, 0.0, 2.2, 3.3| // |0.0, 0.0, 0.0, 0.0| - %v, %p, %c, %v_len, %p_len, %c_len = - sparse_tensor.disassemble %sp : tensor<3x4xf64, #COO> - out_lvls(%op, %oi) : tensor<2xindex>, tensor<3x2xindex>, - out_vals(%od) : tensor<3xf64> -> - tensor<3xf64>, (tensor<2xindex>, tensor<3x2xindex>), index, (index, index) - // %v = arith.constant dense<[ 1.1, 2.2, 3.3 ]> : tensor<3xf64> + %p, %c, %v, %p_len, %c_len, %v_len = + sparse_tensor.disassemble %s : tensor<3x4xf64, #COO> + out_lvls(%op, %oi : tensor<2xindex>, tensor<3x2xindex>) + out_vals(%od : tensor<3xf64>) -> + (tensor<2xindex>, tensor<3x2xindex>), tensor<3xf64>, (index, index), index // %p = arith.constant dense<[ 0, 3 ]> : tensor<2xindex> // %c = arith.constant dense<[[0,0], [1,2], [1,3]]> : tensor<3x2xindex> - // %v_len = 3 + // %v = arith.constant dense<[ 1.1, 2.2, 3.3 ]> : tensor<3xf64> // %p_len = 2 // %c_len = 6 (3x2) + // %v_len = 3 ``` }]; let assemblyFormat = - "$tensor `:` type($tensor) " + "$tensor attr-dict `:` type($tensor)" "`out_lvls` `(` $out_levels `:` type($out_levels) `)` " - "`out_vals` `(` $out_values `:` type($out_values) `)` attr-dict" - "`->` `(` type($ret_levels) `)` `,` type($ret_values) `,` " - "`(` type($lvl_lens) `)` `,` type($val_len)"; + "`out_vals` `(` $out_values `:` type($out_values) `)` `->`" + "`(` type($ret_levels) `)` `,` type($ret_values) `,` " + "`(` type($lvl_lens) `)` `,` type($val_len)"; let hasVerifier = 1; } diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir index 18851f29d8eaa..7f5c05190fc9a 100644 --- a/mlir/test/Dialect/SparseTensor/invalid.mlir +++ b/mlir/test/Dialect/SparseTensor/invalid.mlir @@ -60,7 +60,7 @@ func.func @invalid_pack_mis_position(%values: tensor<6xf64>, %coordinates: tenso func.func @invalid_unpack_type(%sp: tensor<100xf32, #SparseVector>, %values: tensor<6xf64>, %pos: tensor<2xi32>, %coordinates: tensor<6x1xi32>) { // expected-error@+1 {{input/output element-types don't match}} - %rv, %rp, %rc, %vl, %pl, %cl = sparse_tensor.disassemble %sp : tensor<100xf32, #SparseVector> + %rp, %rc, %rv, %pl, %cl, %vl = sparse_tensor.disassemble %sp : tensor<100xf32, #SparseVector> out_lvls(%pos, %coordinates : tensor<2xi32>, tensor<6x1xi32>) out_vals(%values : tensor<6xf64>) -> (tensor<2xi32>, tensor<6x1xi32>), tensor<6xf64>, (index, index), index @@ -73,7 +73,7 @@ func.func @invalid_unpack_type(%sp: tensor<100xf32, #SparseVector>, %values: ten func.func @invalid_unpack_type(%sp: tensor<100x2xf64, #SparseVector>, %values: tensor<6xf64>, %pos: tensor<2xi32>, %coordinates: tensor<6x3xi32>) { // expected-error@+1 {{input/output trailing COO level-ranks don't match}} - %rv, %rp, %rc, %vl, %pl, %cl = sparse_tensor.disassemble %sp : tensor<100x2xf64, #SparseVector> + %rp, %rc, %rv, %pl, %cl, %vl = sparse_tensor.disassemble %sp : tensor<100x2xf64, #SparseVector> out_lvls(%pos, %coordinates : tensor<2xi32>, tensor<6x3xi32> ) out_vals(%values : tensor<6xf64>) -> (tensor<2xi32>, tensor<6x3xi32>), tensor<6xf64>, (index, index), index @@ -86,7 +86,7 @@ func.func @invalid_unpack_type(%sp: tensor<100x2xf64, #SparseVector>, %values: t func.func @invalid_unpack_mis_position(%sp: tensor<2x100xf64, #CSR>, %values: tensor<6xf64>, %coordinates: tensor<6xi32>) { // expected-error@+1 {{inconsistent number of fields between input/output}} - %rv, %rc, %vl, %pl = sparse_tensor.disassemble %sp : tensor<2x100xf64, #CSR> + %rc, %rv, %cl, %vl = sparse_tensor.disassemble %sp : tensor<2x100xf64, #CSR> out_lvls(%coordinates : tensor<6xi32>) out_vals(%values : tensor<6xf64>) -> (tensor<6xi32>), tensor<6xf64>, (index), index diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir index a47a3d5119f96..12f69c1d37b9c 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir @@ -33,21 +33,21 @@ func.func @sparse_pack(%pos: tensor<2xi32>, %index: tensor<6x1xi32>, %data: tens #SparseVector = #sparse_tensor.encoding<{map = (d0) -> (d0 : compressed), crdWidth=32}> // CHECK-LABEL: func @sparse_unpack( // CHECK-SAME: %[[T:.*]]: tensor<100xf64, # -// CHECK-SAME: %[[OD:.*]]: tensor<6xf64> -// CHECK-SAME: %[[OP:.*]]: tensor<2xindex> -// CHECK-SAME: %[[OI:.*]]: tensor<6x1xi32> +// CHECK-SAME: %[[OP:.*]]: tensor<2xindex>, +// CHECK-SAME: %[[OI:.*]]: tensor<6x1xi32>, +// CHECK-SAME: %[[OD:.*]]: tensor<6xf64>) // CHECK: %[[P:.*]]:2, %[[D:.*]], %[[PL:.*]]:2, %[[DL:.*]] = sparse_tensor.disassemble %[[T]] // CHECK: return %[[P]]#0, %[[P]]#1, %[[D]] func.func @sparse_unpack(%sp : tensor<100xf64, #SparseVector>, - %od : tensor<6xf64>, %op : tensor<2xindex>, - %oi : tensor<6x1xi32>) + %oi : tensor<6x1xi32>, + %od : tensor<6xf64>) -> (tensor<2xindex>, tensor<6x1xi32>, tensor<6xf64>) { - %rp, %ri, %rd, %vl, %pl, %cl = sparse_tensor.disassemble %sp : tensor<100xf64, #SparseVector> + %rp, %ri, %d, %rpl, %ril, %dl = sparse_tensor.disassemble %sp : tensor<100xf64, #SparseVector> out_lvls(%op, %oi : tensor<2xindex>, tensor<6x1xi32>) out_vals(%od : tensor<6xf64>) -> (tensor<2xindex>, tensor<6x1xi32>), tensor<6xf64>, (index, index), index - return %rp, %ri, %rd : tensor<2xindex>, tensor<6x1xi32>, tensor<6xf64> + return %rp, %ri, %d : tensor<2xindex>, tensor<6x1xi32>, tensor<6xf64> } // ----- diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir index 7ecccad212cdb..5415625ff05d6 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir @@ -231,7 +231,7 @@ module { %od = tensor.empty() : tensor<3xf64> %op = tensor.empty() : tensor<2xi32> %oi = tensor.empty() : tensor<3x2xi32> - %p, %i, %d, %dl, %pl, %il = sparse_tensor.disassemble %s5 : tensor<10x10xf64, #SortedCOOI32> + %p, %i, %d, %pl, %il, %dl = sparse_tensor.disassemble %s5 : tensor<10x10xf64, #SortedCOOI32> out_lvls(%op, %oi : tensor<2xi32>, tensor<3x2xi32>) out_vals(%od : tensor<3xf64>) -> (tensor<2xi32>, tensor<3x2xi32>), tensor<3xf64>, (i32, i64), index @@ -244,10 +244,13 @@ module { %vi = vector.transfer_read %i[%c0, %c0], %i0 : tensor<3x2xi32>, vector<3x2xi32> vector.print %vi : vector<3x2xi32> + // CHECK-NEXT: 3 + vector.print %dl : index + %d_csr = tensor.empty() : tensor<4xf64> %p_csr = tensor.empty() : tensor<3xi32> %i_csr = tensor.empty() : tensor<3xi32> - %rp_csr, %ri_csr, %rd_csr, %ld_csr, %lp_csr, %li_csr = sparse_tensor.disassemble %csr : tensor<2x2xf64, #CSR> + %rp_csr, %ri_csr, %rd_csr, %lp_csr, %li_csr, %ld_csr = sparse_tensor.disassemble %csr : tensor<2x2xf64, #CSR> out_lvls(%p_csr, %i_csr : tensor<3xi32>, tensor<3xi32>) out_vals(%d_csr : tensor<4xf64>) -> (tensor<3xi32>, tensor<3xi32>), tensor<4xf64>, (i32, i64), index @@ -256,6 +259,9 @@ module { %vd_csr = vector.transfer_read %rd_csr[%c0], %f0 : tensor<4xf64>, vector<3xf64> vector.print %vd_csr : vector<3xf64> + // CHECK-NEXT: 3 + vector.print %ld_csr : index + %bod = tensor.empty() : tensor<6xf64> %bop = tensor.empty() : tensor<4xindex> %boi = tensor.empty() : tensor<6x2xindex> From 798e04f93769318db857b27f51020e7115e00301 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 10 Apr 2024 17:50:13 +0100 Subject: [PATCH 038/886] Fix MSVC "not all control paths return a value" warning. NFC. --- clang/include/clang/Basic/OpenACCKinds.h | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/include/clang/Basic/OpenACCKinds.h b/clang/include/clang/Basic/OpenACCKinds.h index e191e9e0a5a15..3414df9999170 100644 --- a/clang/include/clang/Basic/OpenACCKinds.h +++ b/clang/include/clang/Basic/OpenACCKinds.h @@ -430,6 +430,7 @@ inline StreamTy &printOpenACCDefaultClauseKind(StreamTy &Out, case OpenACCDefaultClauseKind::Invalid: return Out << ""; } + llvm_unreachable("Unknown OpenACCDefaultClauseKind enum"); } inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &Out, From 335d5d5f47b883055e676ffe5f981469a5f5f4f6 Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy Date: Wed, 10 Apr 2024 19:04:31 +0200 Subject: [PATCH 039/886] [SPIRV] Tweak parsing of base type name in builtins (#88255) This PR is a small improvement of parsing of base type name in builtins, allowing to understand `unsigned ...` types. The test case that fails without the fix is attached. --- llvm/lib/Target/SPIRV/SPIRVUtils.cpp | 16 ++++++++++++---- llvm/test/CodeGen/SPIRV/SampledImageRetType.ll | 9 +++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index c87c1293c622f..299a4341193bf 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -374,13 +374,21 @@ Type *parseBasicTypeName(StringRef TypeName, LLVMContext &Ctx) { return Type::getVoidTy(Ctx); else if (TypeName.consume_front("bool")) return Type::getIntNTy(Ctx, 1); - else if (TypeName.consume_front("char") || TypeName.consume_front("uchar")) + else if (TypeName.consume_front("char") || + TypeName.consume_front("unsigned char") || + TypeName.consume_front("uchar")) return Type::getInt8Ty(Ctx); - else if (TypeName.consume_front("short") || TypeName.consume_front("ushort")) + else if (TypeName.consume_front("short") || + TypeName.consume_front("unsigned short") || + TypeName.consume_front("ushort")) return Type::getInt16Ty(Ctx); - else if (TypeName.consume_front("int") || TypeName.consume_front("uint")) + else if (TypeName.consume_front("int") || + TypeName.consume_front("unsigned int") || + TypeName.consume_front("uint")) return Type::getInt32Ty(Ctx); - else if (TypeName.consume_front("long") || TypeName.consume_front("ulong")) + else if (TypeName.consume_front("long") || + TypeName.consume_front("unsigned long") || + TypeName.consume_front("ulong")) return Type::getInt64Ty(Ctx); else if (TypeName.consume_front("half")) return Type::getHalfTy(Ctx); diff --git a/llvm/test/CodeGen/SPIRV/SampledImageRetType.ll b/llvm/test/CodeGen/SPIRV/SampledImageRetType.ll index 1aa3af83bcd20..7af5876a023e5 100644 --- a/llvm/test/CodeGen/SPIRV/SampledImageRetType.ll +++ b/llvm/test/CodeGen/SPIRV/SampledImageRetType.ll @@ -8,6 +8,8 @@ declare dso_local spir_func ptr addrspace(4) @_Z20__spirv_SampledImageI14ocl_ima declare dso_local spir_func <4 x float> @_Z30__spirv_ImageSampleExplicitLodIPvDv4_fiET0_T_T1_if(ptr addrspace(4) %0, i32 %1, i32 %2, float %3) local_unnamed_addr +declare dso_local spir_func <4 x i32> @_Z30__spirv_ImageSampleExplicitLodI32__spirv_SampledImage__image1d_roDv4_jfET0_T_T1_if(target("spirv.SampledImage", void, 0, 0, 0, 0, 0, 0, 0) %0, float %1, i32 %2, float %3) local_unnamed_addr + @__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(2) constant <3 x i64>, align 32 define weak_odr dso_local spir_kernel void @_ZTS17image_kernel_readILi1EE(target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 0), target("spirv.Sampler")) { @@ -25,3 +27,10 @@ define weak_odr dso_local spir_kernel void @_ZTS17image_kernel_readILi1EE(target ret void } + +define weak_odr dso_local spir_kernel void @foo_lod(target("spirv.SampledImage", void, 0, 0, 0, 0, 0, 0, 0) %_arg) { + %lod = call spir_func <4 x i32> @_Z30__spirv_ImageSampleExplicitLodI32__spirv_SampledImage__image1d_roDv4_jfET0_T_T1_if(target("spirv.SampledImage", void, 0, 0, 0, 0, 0, 0, 0) %_arg, float 0x3FE7FFEB00000000, i32 2, float 0.000000e+00) +; CHECK: %[[#sampled_image_lod:]] = OpFunctionParameter %[[#sampled_image_t]] +; CHECK: %[[#]] = OpImageSampleExplicitLod %[[#]] %[[#sampled_image_lod]] %[[#]] {{.*}} %[[#]] + ret void +} From 4dcf33b6c2806216dfe8c5e1e3582a45516dbc69 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 10 Apr 2024 18:13:57 +0100 Subject: [PATCH 040/886] [AArch64] Cleanup and GISel coverage for lrint tests. NFC --- llvm/test/CodeGen/AArch64/lrint-conv.ll | 66 ++++++++++++++--------- llvm/test/CodeGen/AArch64/vector-lrint.ll | 39 ++++++++++---- 2 files changed, 70 insertions(+), 35 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/lrint-conv.ll b/llvm/test/CodeGen/AArch64/lrint-conv.ll index 80f1e8b8fc18e..b61d6f04b400e 100644 --- a/llvm/test/CodeGen/AArch64/lrint-conv.ll +++ b/llvm/test/CodeGen/AArch64/lrint-conv.ll @@ -1,64 +1,78 @@ -; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s -; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mtriple=aarch64 | FileCheck %s --check-prefixes=FALLBACK,CHECK +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for testmswl +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for testmsll -; CHECK-LABEL: testmsws: -; CHECK: frintx [[REG:s[0-9]]], s0 -; CHECK-NEXT: fcvtzs x0, [[REG]] -; CHECK: ret -; FALLBACK-NOT: remark{{.*}}testmsws define i32 @testmsws(float %x) { +; CHECK-LABEL: testmsws: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs x0, s0 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.lrint.i64.f32(float %x) %conv = trunc i64 %0 to i32 ret i32 %conv } -; CHECK-LABEL: testmsxs: -; CHECK: frintx [[REG:s[0-9]]], s0 -; CHECK-NEXT: fcvtzs x0, [[REG]] -; CHECK-NEXT: ret -; FALLBACK-NOT: remark{{.*}}testmsxs define i64 @testmsxs(float %x) { +; CHECK-LABEL: testmsxs: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs x0, s0 +; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.lrint.i64.f32(float %x) ret i64 %0 } -; CHECK-LABEL: testmswd: -; CHECK: frintx [[REG:d[0-9]]], d0 -; CHECK-NEXT: fcvtzs x0, [[REG]] -; CHECK: ret -; FALLBACK-NOT: remark{{.*}}testmswd define i32 @testmswd(double %x) { +; CHECK-LABEL: testmswd: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs x0, d0 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.lrint.i64.f64(double %x) %conv = trunc i64 %0 to i32 ret i32 %conv } -; CHECK-LABEL: testmsxd: -; CHECK: frintx [[REG:d[0-9]]], d0 -; CHECK-NEXT: fcvtzs x0, [[REG]] -; CHECK-NEXT: ret -; FALLBACK-NOT: remark{{.*}}testmsxd define i64 @testmsxd(double %x) { +; CHECK-LABEL: testmsxd: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs x0, d0 +; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.lrint.i64.f64(double %x) ret i64 %0 } -; CHECK-LABEL: testmswl: -; CHECK: bl lrintl define dso_local i32 @testmswl(fp128 %x) { +; CHECK-LABEL: testmswl: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl lrintl +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x) %conv = trunc i64 %0 to i32 ret i32 %conv } -; CHECK-LABEL: testmsll: -; CHECK: b lrintl define dso_local i64 @testmsll(fp128 %x) { +; CHECK-LABEL: testmsll: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b lrintl entry: %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x) ret i64 %0 diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll index 9c46cf69cb0bf..b7fcd11ba8d16 100644 --- a/llvm/test/CodeGen/AArch64/vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll @@ -1,6 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=aarch64 -mattr=+neon | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=aarch64 -mattr=+neon | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for lrint_v1f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f64 define <1 x i64> @lrint_v1f16(<1 x half> %x) { ; CHECK-LABEL: lrint_v1f16: @@ -372,13 +386,20 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>) define <1 x i64> @lrint_v1f32(<1 x float> %x) { -; CHECK-LABEL: lrint_v1f32: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: lrint_v1f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: frintx s0, s0 +; CHECK-SD-NEXT: fcvtzs x8, s0 +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: lrint_v1f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintx s0, s0 +; CHECK-GI-NEXT: fcvtzs x8, s0 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %a = call <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float> %x) ret <1 x i64> %a } From 04bf1a4090c535e3a1033ab9a8ef92068166461f Mon Sep 17 00:00:00 2001 From: Kojo Acquah Date: Wed, 10 Apr 2024 10:18:47 -0700 Subject: [PATCH 041/886] Update `LowerContractionToSMMLAPattern` to ingnore matvec (#88288) Patterns in `LowerContractionToSMMLAPattern` are designed to handle vector-to-matrix multiplication but not matrix-to-vector. This leads to the following error when processing `rhs` with rank < 2: ``` iree-compile: /usr/local/google/home/kooljblack/code/iree-build/llvm-project/tools/mlir/include/mlir/IR/BuiltinTypeInterfaces.h.inc:268: int64_t mlir::detail::ShapedTypeTrait::getDimSize(unsigned int) const [ConcreteType = mlir::VectorType]: Assertion `idx < getRank() && "invalid index for shaped type"' failed. ``` Updates to explicitly check the rhs rank and fail cases that cannot process. --- .../Transforms/LowerContractionToSMMLAPattern.cpp | 3 +++ mlir/test/Dialect/ArmNeon/lower-to-arm-neon.mlir | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToSMMLAPattern.cpp b/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToSMMLAPattern.cpp index 13740225749e4..3ae894692089b 100644 --- a/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToSMMLAPattern.cpp +++ b/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToSMMLAPattern.cpp @@ -54,6 +54,9 @@ class LowerContractionToSMMLAPattern // Note: RHS is not transposed. mlir::VectorType lhsType = op.getLhsType(); mlir::VectorType rhsType = op.getRhsType(); + // Avoid 0-D vectors and 1-D rhs: + if (!lhsType.hasRank() || !rhsType.hasRank() || rhsType.getRank() < 2) + return failure(); auto dimM = lhsType.getRank() == 1 ? 1 : lhsType.getDimSize(0); auto dimN = rhsType.getDimSize(0); auto dimK = rhsType.getDimSize(1); diff --git a/mlir/test/Dialect/ArmNeon/lower-to-arm-neon.mlir b/mlir/test/Dialect/ArmNeon/lower-to-arm-neon.mlir index 46c4026d13b66..c276a5b0c2a14 100644 --- a/mlir/test/Dialect/ArmNeon/lower-to-arm-neon.mlir +++ b/mlir/test/Dialect/ArmNeon/lower-to-arm-neon.mlir @@ -258,3 +258,14 @@ func.func @test_lower_vector_arm_neon_vecmat_unroll_leading_dim(%lhs: vector<1x8 %res = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_extsi, %rhs_extsi, %acc : vector<1x8xi32>, vector<8x8xi32> into vector<1x8xi32> return %res : vector<1x8xi32> } + +// ----- + +// CHECK-LABEL: func.func @test_lower_vector_arm_neon_matvec +// CHECK-NOT: arm_neon.intr.smmla +func.func @test_lower_vector_arm_neon_matvec(%lhs: vector<8x8xi8>, %rhs: vector<8xi8>, %acc : vector<8xi32>) -> vector<8xi32> { + %rhs_extsi= arith.extsi %rhs : vector<8xi8> to vector<8xi32> + %lhs_extsi = arith.extsi %lhs : vector<8x8xi8> to vector<8x8xi32> + %res = vector.contract {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"], kind = #vector.kind} %lhs_extsi, %rhs_extsi, %acc : vector<8x8xi32>, vector<8xi32> into vector<8xi32> + return %res : vector<8xi32> +} From c54afe5c33ca6159841d909fb8fe20e5d4e0069b Mon Sep 17 00:00:00 2001 From: higher-performance <113926381+higher-performance@users.noreply.github.com> Date: Wed, 10 Apr 2024 13:24:19 -0400 Subject: [PATCH 042/886] Fix quadratic slowdown in AST matcher parent map generation (#87824) Avoids the need to linearly re-scan all seen parent nodes to check for duplicates, which previously caused a slowdown for ancestry checks in Clang AST matchers. Fixes: #86881 --- clang/docs/ReleaseNotes.rst | 3 +++ clang/lib/AST/ParentMapContext.cpp | 25 ++++++++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index f5359afe1f099..c4a4893aec5cd 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -646,6 +646,9 @@ Fixed Point Support in Clang AST Matchers ------------ +- Fixes a long-standing performance issue in parent map generation for + ancestry-based matchers such as ``hasParent`` and ``hasAncestor``, making + them significantly faster. - ``isInStdNamespace`` now supports Decl declared with ``extern "C++"``. - Add ``isExplicitObjectMemberFunction``. - Fixed ``forEachArgumentWithParam`` and ``forEachArgumentWithParamType`` to diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp index 21cfd5b1de6e9..9723c0cfa83bb 100644 --- a/clang/lib/AST/ParentMapContext.cpp +++ b/clang/lib/AST/ParentMapContext.cpp @@ -61,7 +61,26 @@ class ParentMapContext::ParentMap { template friend struct ::MatchParents; /// Contains parents of a node. - using ParentVector = llvm::SmallVector; + class ParentVector { + public: + ParentVector() = default; + explicit ParentVector(size_t N, const DynTypedNode &Value) { + Items.reserve(N); + for (; N > 0; --N) + push_back(Value); + } + bool contains(const DynTypedNode &Value) { + return Seen.contains(Value); + } + void push_back(const DynTypedNode &Value) { + if (!Value.getMemoizationData() || Seen.insert(Value).second) + Items.push_back(Value); + } + llvm::ArrayRef view() const { return Items; } + private: + llvm::SmallVector Items; + llvm::SmallDenseSet Seen; + }; /// Maps from a node to its parents. This is used for nodes that have /// pointer identity only, which are more common and we can save space by @@ -99,7 +118,7 @@ class ParentMapContext::ParentMap { return llvm::ArrayRef(); } if (const auto *V = I->second.template dyn_cast()) { - return llvm::ArrayRef(*V); + return V->view(); } return getSingleDynTypedNodeFromParentMap(I->second); } @@ -252,7 +271,7 @@ class ParentMapContext::ParentMap { const auto *S = It->second.dyn_cast(); if (!S) { if (auto *Vec = It->second.dyn_cast()) - return llvm::ArrayRef(*Vec); + return Vec->view(); return getSingleDynTypedNodeFromParentMap(It->second); } const auto *P = dyn_cast(S); From f27f3697108470c3e995cf3cb454641c22ec1fa9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 10 Apr 2024 10:28:54 -0700 Subject: [PATCH 043/886] [RISCV] Remove interrupt handler special case from RISCVFrameLowering::determineCalleeSaves. (#88069) This code was trying to save temporary argument registers in interrupt handler functions that contain calls. With the exception that all FP registers are saved including the normally callee saved registers. If all of the callees use an FP ABI and the interrupt handler doesn't touch the normally callee saved FP registers, we don't need to save them. It doesn't appear that we need to special case functions with calls. The normal callee saved register handling will already check each of the calls and consider a register clobbered if the call doesn't explicitly say it is preserved. All of the test changes are from the removal of the FP callee saved registers. There are tests for interrupt handlers with F and D extension that use ilp32 or lp64 ABIs that are not affected by this change. They still save the FP callee saved registers as they should. gcc appears to have a bug where the D extension being enabled with the ilp32f or lp64f ABI does not save the FP callee saved regs. The callee would only save/restore the lower 32 bits and clobber the upper bits. LLVM saves the FP callee saved regs in this case and there is an unchanged test for it. The unnecessary save/restore was raised in this thread https://discourse.llvm.org/t/has-bugs-when-optimizing-save-restore-csrs-by-changing-csr-xlen-f32-interrupt/78200/1 --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 40 - .../CodeGen/RISCV/interrupt-attr-nocall.ll | 318 ++--- llvm/test/CodeGen/RISCV/interrupt-attr.ll | 1272 +++++++---------- 3 files changed, 675 insertions(+), 955 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 39075c81b2921..71672ed7b4ae7 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1001,46 +1001,6 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, // Mark BP as used if function has dedicated base pointer. if (hasBP(MF)) SavedRegs.set(RISCVABI::getBPReg()); - - // If interrupt is enabled and there are calls in the handler, - // unconditionally save all Caller-saved registers and - // all FP registers, regardless whether they are used. - MachineFrameInfo &MFI = MF.getFrameInfo(); - auto &Subtarget = MF.getSubtarget(); - - if (MF.getFunction().hasFnAttribute("interrupt") && MFI.hasCalls()) { - - static const MCPhysReg CSRegs[] = { RISCV::X1, /* ra */ - RISCV::X5, RISCV::X6, RISCV::X7, /* t0-t2 */ - RISCV::X10, RISCV::X11, /* a0-a1, a2-a7 */ - RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17, - RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31 /* t3-t6 */ - }; - - for (auto Reg : CSRegs) - SavedRegs.set(Reg); - - // According to psABI, if ilp32e/lp64e ABIs are used with an ISA that - // has any of the registers x16-x31 and f0-f31, then these registers are - // considered temporaries, so we should also save x16-x31 here. - if (STI.getTargetABI() == RISCVABI::ABI_ILP32E || - STI.getTargetABI() == RISCVABI::ABI_LP64E) { - for (MCPhysReg Reg = RISCV::X16; Reg <= RISCV::X31; Reg++) - SavedRegs.set(Reg); - } - - if (Subtarget.hasStdExtF()) { - - // If interrupt is enabled, this list contains all FP registers. - const MCPhysReg * Regs = MF.getRegInfo().getCalleeSavedRegs(); - - for (unsigned i = 0; Regs[i]; ++i) - if (RISCV::FPR16RegClass.contains(Regs[i]) || - RISCV::FPR32RegClass.contains(Regs[i]) || - RISCV::FPR64RegClass.contains(Regs[i])) - SavedRegs.set(Regs[i]); - } - } } std::pair diff --git a/llvm/test/CodeGen/RISCV/interrupt-attr-nocall.ll b/llvm/test/CodeGen/RISCV/interrupt-attr-nocall.ll index 263743d39a8e6..fa6ac96b57b1e 100644 --- a/llvm/test/CodeGen/RISCV/interrupt-attr-nocall.ll +++ b/llvm/test/CodeGen/RISCV/interrupt-attr-nocall.ll @@ -412,51 +412,39 @@ define void @foo_double() nounwind #0 { ; ; CHECK-RV32IF-LABEL: foo_double: ; CHECK-RV32IF: # %bb.0: -; CHECK-RV32IF-NEXT: addi sp, sp, -192 -; CHECK-RV32IF-NEXT: sw ra, 188(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw t0, 184(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw t1, 180(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw t2, 176(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a0, 172(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a1, 168(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a2, 164(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a3, 160(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a4, 156(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a5, 152(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a6, 148(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a7, 144(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw t3, 140(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw t4, 136(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw t5, 132(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw t6, 128(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs0, 92(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs1, 88(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa0, 84(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa1, 80(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa2, 76(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa3, 72(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa4, 68(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa6, 60(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa7, 56(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs2, 52(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs3, 48(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs4, 44(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs5, 40(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs6, 36(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs7, 32(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs8, 28(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs9, 24(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs10, 20(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs11, 16(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: addi sp, sp, -144 +; CHECK-RV32IF-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw t0, 136(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw t1, 132(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw t2, 128(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a0, 124(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a1, 120(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a2, 116(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a3, 112(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a4, 108(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a5, 104(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a6, 100(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a7, 96(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw t3, 92(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw t4, 88(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw t5, 84(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw t6, 80(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft0, 76(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft1, 72(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft2, 68(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft3, 64(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft4, 60(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft5, 56(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft6, 52(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft7, 48(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa0, 44(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa1, 40(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa2, 36(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa3, 32(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa4, 28(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa5, 24(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa6, 20(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa7, 16(sp) # 4-byte Folded Spill ; CHECK-RV32IF-NEXT: fsw ft8, 12(sp) # 4-byte Folded Spill ; CHECK-RV32IF-NEXT: fsw ft9, 8(sp) # 4-byte Folded Spill ; CHECK-RV32IF-NEXT: fsw ft10, 4(sp) # 4-byte Folded Spill @@ -471,55 +459,43 @@ define void @foo_double() nounwind #0 { ; CHECK-RV32IF-NEXT: lui a2, %hi(g) ; CHECK-RV32IF-NEXT: sw a1, %lo(g+4)(a2) ; CHECK-RV32IF-NEXT: sw a0, %lo(g)(a2) -; CHECK-RV32IF-NEXT: lw ra, 188(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw t0, 184(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw t1, 180(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw t2, 176(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a0, 172(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a1, 168(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a2, 164(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a3, 160(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a4, 156(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a5, 152(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a6, 148(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a7, 144(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw t3, 140(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw t4, 136(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw t5, 132(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw t6, 128(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs0, 92(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs1, 88(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa0, 84(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa1, 80(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa2, 76(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa3, 72(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa4, 68(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa6, 60(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa7, 56(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs2, 52(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs3, 48(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs4, 44(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs5, 40(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs6, 36(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs7, 32(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs8, 28(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs9, 24(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs10, 20(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs11, 16(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw t0, 136(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw t1, 132(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw t2, 128(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a0, 124(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a1, 120(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a2, 116(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a3, 112(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a4, 108(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a5, 104(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a6, 100(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a7, 96(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw t3, 92(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw t4, 88(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw t5, 84(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw t6, 80(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft0, 76(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft1, 72(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft2, 68(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft3, 64(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft4, 60(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft5, 56(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft6, 52(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft7, 48(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa0, 44(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa1, 40(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa2, 36(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa3, 32(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa4, 28(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa5, 24(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa6, 20(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa7, 16(sp) # 4-byte Folded Reload ; CHECK-RV32IF-NEXT: flw ft8, 12(sp) # 4-byte Folded Reload ; CHECK-RV32IF-NEXT: flw ft9, 8(sp) # 4-byte Folded Reload ; CHECK-RV32IF-NEXT: flw ft10, 4(sp) # 4-byte Folded Reload ; CHECK-RV32IF-NEXT: flw ft11, 0(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: addi sp, sp, 192 +; CHECK-RV32IF-NEXT: addi sp, sp, 144 ; CHECK-RV32IF-NEXT: mret ; ; CHECK-RV32IFD-LABEL: foo_double: @@ -604,57 +580,45 @@ define void @foo_fp_double() nounwind #1 { ; ; CHECK-RV32IF-LABEL: foo_fp_double: ; CHECK-RV32IF: # %bb.0: -; CHECK-RV32IF-NEXT: addi sp, sp, -208 -; CHECK-RV32IF-NEXT: sw ra, 204(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw t0, 200(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw t1, 196(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw t2, 192(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw s0, 188(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a0, 184(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a1, 180(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a2, 176(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a3, 172(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a4, 168(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a5, 164(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a6, 160(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw a7, 156(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw t3, 152(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw t4, 148(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw t5, 144(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: sw t6, 140(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft0, 136(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft1, 132(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft2, 128(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft3, 124(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft4, 120(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft5, 116(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft6, 112(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw ft7, 108(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs0, 104(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs1, 100(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa0, 96(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa1, 92(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa2, 88(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa3, 84(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa4, 80(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa5, 76(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa6, 72(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fa7, 68(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs2, 64(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs3, 60(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs4, 56(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs5, 52(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs6, 48(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs7, 44(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs8, 40(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs9, 36(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs10, 32(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: fsw fs11, 28(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: addi sp, sp, -160 +; CHECK-RV32IF-NEXT: sw ra, 156(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw t0, 152(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw t1, 148(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw t2, 144(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw s0, 140(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a0, 136(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a1, 132(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a2, 128(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a3, 124(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a4, 120(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a5, 116(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a6, 112(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw a7, 108(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw t3, 104(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw t4, 100(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw t5, 96(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: sw t6, 92(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft0, 88(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft1, 84(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft2, 80(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft3, 76(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft4, 72(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft5, 68(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft6, 64(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw ft7, 60(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa0, 56(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa1, 52(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa2, 48(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa3, 44(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa4, 40(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa5, 36(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa6, 32(sp) # 4-byte Folded Spill +; CHECK-RV32IF-NEXT: fsw fa7, 28(sp) # 4-byte Folded Spill ; CHECK-RV32IF-NEXT: fsw ft8, 24(sp) # 4-byte Folded Spill ; CHECK-RV32IF-NEXT: fsw ft9, 20(sp) # 4-byte Folded Spill ; CHECK-RV32IF-NEXT: fsw ft10, 16(sp) # 4-byte Folded Spill ; CHECK-RV32IF-NEXT: fsw ft11, 12(sp) # 4-byte Folded Spill -; CHECK-RV32IF-NEXT: addi s0, sp, 208 +; CHECK-RV32IF-NEXT: addi s0, sp, 160 ; CHECK-RV32IF-NEXT: lui a1, %hi(h) ; CHECK-RV32IF-NEXT: lw a0, %lo(h)(a1) ; CHECK-RV32IF-NEXT: lw a1, %lo(h+4)(a1) @@ -665,56 +629,44 @@ define void @foo_fp_double() nounwind #1 { ; CHECK-RV32IF-NEXT: lui a2, %hi(g) ; CHECK-RV32IF-NEXT: sw a1, %lo(g+4)(a2) ; CHECK-RV32IF-NEXT: sw a0, %lo(g)(a2) -; CHECK-RV32IF-NEXT: lw ra, 204(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw t0, 200(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw t1, 196(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw t2, 192(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw s0, 188(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a0, 184(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a1, 180(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a2, 176(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a3, 172(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a4, 168(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a5, 164(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a6, 160(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw a7, 156(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw t3, 152(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw t4, 148(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw t5, 144(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: lw t6, 140(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft0, 136(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft1, 132(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft2, 128(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft3, 124(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft4, 120(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft5, 116(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft6, 112(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw ft7, 108(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs0, 104(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs1, 100(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa0, 96(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa1, 92(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa2, 88(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa3, 84(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa4, 80(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa5, 76(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa6, 72(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fa7, 68(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs2, 64(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs3, 60(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs4, 56(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs5, 52(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs6, 48(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs7, 44(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs8, 40(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs9, 36(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs10, 32(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: flw fs11, 28(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw ra, 156(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw t0, 152(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw t1, 148(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw t2, 144(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw s0, 140(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a0, 136(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a1, 132(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a2, 128(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a3, 124(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a4, 120(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a5, 116(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a6, 112(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw a7, 108(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw t3, 104(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw t4, 100(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw t5, 96(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: lw t6, 92(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft0, 88(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft1, 84(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft2, 80(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft3, 76(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft4, 72(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft5, 68(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft6, 64(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw ft7, 60(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa0, 56(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa1, 52(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa2, 48(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa3, 44(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa4, 40(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa5, 36(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa6, 32(sp) # 4-byte Folded Reload +; CHECK-RV32IF-NEXT: flw fa7, 28(sp) # 4-byte Folded Reload ; CHECK-RV32IF-NEXT: flw ft8, 24(sp) # 4-byte Folded Reload ; CHECK-RV32IF-NEXT: flw ft9, 20(sp) # 4-byte Folded Reload ; CHECK-RV32IF-NEXT: flw ft10, 16(sp) # 4-byte Folded Reload ; CHECK-RV32IF-NEXT: flw ft11, 12(sp) # 4-byte Folded Reload -; CHECK-RV32IF-NEXT: addi sp, sp, 208 +; CHECK-RV32IF-NEXT: addi sp, sp, 160 ; CHECK-RV32IF-NEXT: mret ; ; CHECK-RV32IFD-LABEL: foo_fp_double: diff --git a/llvm/test/CodeGen/RISCV/interrupt-attr.ll b/llvm/test/CodeGen/RISCV/interrupt-attr.ll index 50c789f8f86dc..739c9d8d0b0ac 100644 --- a/llvm/test/CodeGen/RISCV/interrupt-attr.ll +++ b/llvm/test/CodeGen/RISCV/interrupt-attr.ll @@ -115,208 +115,160 @@ define void @foo_with_call() #1 { ; ; CHECK-RV32-F-LABEL: foo_with_call: ; CHECK-RV32-F: # %bb.0: -; CHECK-RV32-F-NEXT: addi sp, sp, -192 -; CHECK-RV32-F-NEXT: sw ra, 188(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw t0, 184(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw t1, 180(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw t2, 176(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a0, 172(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a1, 168(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a2, 164(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a3, 160(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a4, 156(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a5, 152(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a6, 148(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a7, 144(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw t3, 140(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw t4, 136(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw t5, 132(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw t6, 128(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs0, 92(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs1, 88(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa0, 84(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa1, 80(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa2, 76(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa3, 72(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa4, 68(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa6, 60(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa7, 56(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs2, 52(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs3, 48(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs4, 44(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs5, 40(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs6, 36(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs7, 32(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs8, 28(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs9, 24(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs10, 20(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs11, 16(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: addi sp, sp, -144 +; CHECK-RV32-F-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw t0, 136(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw t1, 132(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw t2, 128(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a0, 124(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a1, 120(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a2, 116(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a3, 112(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a4, 108(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a5, 104(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a6, 100(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a7, 96(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw t3, 92(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw t4, 88(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw t5, 84(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw t6, 80(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft0, 76(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft1, 72(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft2, 68(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft3, 64(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft4, 60(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft5, 56(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft6, 52(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft7, 48(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa0, 44(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa1, 40(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa2, 36(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa3, 32(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa4, 28(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa5, 24(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa6, 20(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa7, 16(sp) # 4-byte Folded Spill ; CHECK-RV32-F-NEXT: fsw ft8, 12(sp) # 4-byte Folded Spill ; CHECK-RV32-F-NEXT: fsw ft9, 8(sp) # 4-byte Folded Spill ; CHECK-RV32-F-NEXT: fsw ft10, 4(sp) # 4-byte Folded Spill ; CHECK-RV32-F-NEXT: fsw ft11, 0(sp) # 4-byte Folded Spill ; CHECK-RV32-F-NEXT: call otherfoo -; CHECK-RV32-F-NEXT: lw ra, 188(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw t0, 184(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw t1, 180(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw t2, 176(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a0, 172(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a1, 168(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a2, 164(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a3, 160(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a4, 156(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a5, 152(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a6, 148(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a7, 144(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw t3, 140(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw t4, 136(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw t5, 132(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw t6, 128(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs0, 92(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs1, 88(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa0, 84(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa1, 80(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa2, 76(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa3, 72(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa4, 68(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa6, 60(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa7, 56(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs2, 52(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs3, 48(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs4, 44(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs5, 40(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs6, 36(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs7, 32(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs8, 28(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs9, 24(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs10, 20(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs11, 16(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw t0, 136(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw t1, 132(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw t2, 128(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a0, 124(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a1, 120(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a2, 116(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a3, 112(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a4, 108(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a5, 104(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a6, 100(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a7, 96(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw t3, 92(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw t4, 88(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw t5, 84(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw t6, 80(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft0, 76(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft1, 72(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft2, 68(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft3, 64(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft4, 60(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft5, 56(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft6, 52(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft7, 48(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa0, 44(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa1, 40(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa2, 36(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa3, 32(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa4, 28(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa5, 24(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa6, 20(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa7, 16(sp) # 4-byte Folded Reload ; CHECK-RV32-F-NEXT: flw ft8, 12(sp) # 4-byte Folded Reload ; CHECK-RV32-F-NEXT: flw ft9, 8(sp) # 4-byte Folded Reload ; CHECK-RV32-F-NEXT: flw ft10, 4(sp) # 4-byte Folded Reload ; CHECK-RV32-F-NEXT: flw ft11, 0(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: addi sp, sp, 192 +; CHECK-RV32-F-NEXT: addi sp, sp, 144 ; CHECK-RV32-F-NEXT: mret ; ; CHECK-RV32-FD-LABEL: foo_with_call: ; CHECK-RV32-FD: # %bb.0: -; CHECK-RV32-FD-NEXT: addi sp, sp, -320 -; CHECK-RV32-FD-NEXT: sw ra, 316(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw t0, 312(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw t1, 308(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw t2, 304(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a0, 300(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a1, 296(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a2, 292(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a3, 288(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a4, 284(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a5, 280(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a6, 276(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a7, 272(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw t3, 268(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw t4, 264(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw t5, 260(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw t6, 256(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft0, 248(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft1, 240(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft2, 232(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft3, 224(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft4, 216(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft5, 208(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft6, 200(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft7, 192(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs0, 184(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs1, 176(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa0, 168(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa1, 160(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa2, 152(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa3, 144(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa4, 136(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa5, 128(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa6, 120(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa7, 112(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs2, 104(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs3, 96(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs4, 88(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs5, 80(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs6, 72(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs7, 64(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs8, 56(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs9, 48(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs10, 40(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs11, 32(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: addi sp, sp, -224 +; CHECK-RV32-FD-NEXT: sw ra, 220(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw t0, 216(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw t1, 212(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw t2, 208(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a0, 204(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a1, 200(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a2, 196(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a3, 192(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a4, 188(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a5, 184(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a6, 180(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a7, 176(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw t3, 172(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw t4, 168(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw t5, 164(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw t6, 160(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft0, 152(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft1, 144(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft2, 136(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft3, 128(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft4, 120(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft5, 112(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft6, 104(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft7, 96(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa0, 88(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa1, 80(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa2, 72(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa3, 64(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa4, 56(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa5, 48(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa6, 40(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa7, 32(sp) # 8-byte Folded Spill ; CHECK-RV32-FD-NEXT: fsd ft8, 24(sp) # 8-byte Folded Spill ; CHECK-RV32-FD-NEXT: fsd ft9, 16(sp) # 8-byte Folded Spill ; CHECK-RV32-FD-NEXT: fsd ft10, 8(sp) # 8-byte Folded Spill ; CHECK-RV32-FD-NEXT: fsd ft11, 0(sp) # 8-byte Folded Spill ; CHECK-RV32-FD-NEXT: call otherfoo -; CHECK-RV32-FD-NEXT: lw ra, 316(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw t0, 312(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw t1, 308(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw t2, 304(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a0, 300(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a1, 296(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a2, 292(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a3, 288(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a4, 284(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a5, 280(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a6, 276(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a7, 272(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw t3, 268(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw t4, 264(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw t5, 260(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw t6, 256(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft0, 248(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft1, 240(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft2, 232(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft3, 224(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft4, 216(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft5, 208(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft6, 200(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft7, 192(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs0, 184(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs1, 176(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa0, 168(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa1, 160(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa2, 152(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa3, 144(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa4, 136(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa5, 128(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa6, 120(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa7, 112(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs2, 104(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs3, 96(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs4, 88(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs5, 80(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs6, 72(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs7, 64(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs8, 56(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs9, 48(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs10, 40(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs11, 32(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw ra, 220(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw t0, 216(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw t1, 212(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw t2, 208(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a0, 204(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a1, 200(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a2, 196(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a3, 192(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a4, 188(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a5, 184(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a6, 180(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a7, 176(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw t3, 172(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw t4, 168(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw t5, 164(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw t6, 160(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft0, 152(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft1, 144(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft2, 136(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft3, 128(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft4, 120(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft5, 112(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft6, 104(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft7, 96(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa0, 88(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa1, 80(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa2, 72(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa3, 64(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa4, 56(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa5, 48(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa6, 40(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa7, 32(sp) # 8-byte Folded Reload ; CHECK-RV32-FD-NEXT: fld ft8, 24(sp) # 8-byte Folded Reload ; CHECK-RV32-FD-NEXT: fld ft9, 16(sp) # 8-byte Folded Reload ; CHECK-RV32-FD-NEXT: fld ft10, 8(sp) # 8-byte Folded Reload ; CHECK-RV32-FD-NEXT: fld ft11, 0(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: addi sp, sp, 320 +; CHECK-RV32-FD-NEXT: addi sp, sp, 224 ; CHECK-RV32-FD-NEXT: mret ; ; CHECK-RV32-F-ILP3-LABEL: foo_with_call: @@ -846,208 +798,160 @@ define void @foo_with_call() #1 { ; ; CHECK-RV64-F-LABEL: foo_with_call: ; CHECK-RV64-F: # %bb.0: -; CHECK-RV64-F-NEXT: addi sp, sp, -256 -; CHECK-RV64-F-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd t0, 240(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd t1, 232(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd t2, 224(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a0, 216(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a1, 208(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a2, 200(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a3, 192(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a4, 184(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a5, 176(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a6, 168(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a7, 160(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd t3, 152(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd t4, 144(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd t5, 136(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd t6, 128(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs0, 92(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs1, 88(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa0, 84(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa1, 80(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa2, 76(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa3, 72(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa4, 68(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa6, 60(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa7, 56(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs2, 52(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs3, 48(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs4, 44(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs5, 40(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs6, 36(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs7, 32(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs8, 28(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs9, 24(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs10, 20(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs11, 16(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: addi sp, sp, -208 +; CHECK-RV64-F-NEXT: sd ra, 200(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd t0, 192(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd t1, 184(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd t2, 176(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a2, 152(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a3, 144(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a4, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a5, 128(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a6, 120(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a7, 112(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd t3, 104(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd t4, 96(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd t5, 88(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd t6, 80(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft0, 76(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft1, 72(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft2, 68(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft3, 64(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft4, 60(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft5, 56(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft6, 52(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft7, 48(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa0, 44(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa1, 40(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa2, 36(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa3, 32(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa4, 28(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa5, 24(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa6, 20(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa7, 16(sp) # 4-byte Folded Spill ; CHECK-RV64-F-NEXT: fsw ft8, 12(sp) # 4-byte Folded Spill ; CHECK-RV64-F-NEXT: fsw ft9, 8(sp) # 4-byte Folded Spill ; CHECK-RV64-F-NEXT: fsw ft10, 4(sp) # 4-byte Folded Spill ; CHECK-RV64-F-NEXT: fsw ft11, 0(sp) # 4-byte Folded Spill ; CHECK-RV64-F-NEXT: call otherfoo -; CHECK-RV64-F-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld t0, 240(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld t1, 232(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld t2, 224(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a0, 216(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a1, 208(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a2, 200(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a3, 192(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a4, 184(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a5, 176(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a6, 168(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a7, 160(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld t3, 152(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld t4, 144(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld t5, 136(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld t6, 128(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs0, 92(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs1, 88(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa0, 84(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa1, 80(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa2, 76(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa3, 72(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa4, 68(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa6, 60(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa7, 56(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs2, 52(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs3, 48(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs4, 44(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs5, 40(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs6, 36(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs7, 32(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs8, 28(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs9, 24(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs10, 20(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs11, 16(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: ld ra, 200(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld t0, 192(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld t1, 184(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld t2, 176(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a2, 152(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a3, 144(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a4, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a5, 128(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a6, 120(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a7, 112(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld t3, 104(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld t4, 96(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld t5, 88(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld t6, 80(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft0, 76(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft1, 72(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft2, 68(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft3, 64(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft4, 60(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft5, 56(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft6, 52(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft7, 48(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa0, 44(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa1, 40(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa2, 36(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa3, 32(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa4, 28(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa5, 24(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa6, 20(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa7, 16(sp) # 4-byte Folded Reload ; CHECK-RV64-F-NEXT: flw ft8, 12(sp) # 4-byte Folded Reload ; CHECK-RV64-F-NEXT: flw ft9, 8(sp) # 4-byte Folded Reload ; CHECK-RV64-F-NEXT: flw ft10, 4(sp) # 4-byte Folded Reload ; CHECK-RV64-F-NEXT: flw ft11, 0(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: addi sp, sp, 256 +; CHECK-RV64-F-NEXT: addi sp, sp, 208 ; CHECK-RV64-F-NEXT: mret ; ; CHECK-RV64-FD-LABEL: foo_with_call: ; CHECK-RV64-FD: # %bb.0: -; CHECK-RV64-FD-NEXT: addi sp, sp, -384 -; CHECK-RV64-FD-NEXT: sd ra, 376(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd t0, 368(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd t1, 360(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd t2, 352(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a0, 344(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a1, 336(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a2, 328(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a3, 320(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a4, 312(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a5, 304(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a6, 296(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a7, 288(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd t3, 280(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd t4, 272(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd t5, 264(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd t6, 256(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft0, 248(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft1, 240(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft2, 232(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft3, 224(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft4, 216(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft5, 208(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft6, 200(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft7, 192(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs0, 184(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs1, 176(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa0, 168(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa1, 160(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa2, 152(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa3, 144(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa4, 136(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa5, 128(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa6, 120(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa7, 112(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs2, 104(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs3, 96(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs4, 88(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs5, 80(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs6, 72(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs7, 64(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs8, 56(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs9, 48(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs10, 40(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs11, 32(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: addi sp, sp, -288 +; CHECK-RV64-FD-NEXT: sd ra, 280(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd t0, 272(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd t1, 264(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd t2, 256(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a0, 248(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a2, 232(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a3, 224(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a4, 216(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a5, 208(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a6, 200(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a7, 192(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd t3, 184(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd t4, 176(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd t5, 168(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd t6, 160(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft0, 152(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft1, 144(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft2, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft3, 128(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft4, 120(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft5, 112(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft6, 104(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft7, 96(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa0, 88(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa1, 80(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa2, 72(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa3, 64(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa4, 56(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa5, 48(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa6, 40(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa7, 32(sp) # 8-byte Folded Spill ; CHECK-RV64-FD-NEXT: fsd ft8, 24(sp) # 8-byte Folded Spill ; CHECK-RV64-FD-NEXT: fsd ft9, 16(sp) # 8-byte Folded Spill ; CHECK-RV64-FD-NEXT: fsd ft10, 8(sp) # 8-byte Folded Spill ; CHECK-RV64-FD-NEXT: fsd ft11, 0(sp) # 8-byte Folded Spill ; CHECK-RV64-FD-NEXT: call otherfoo -; CHECK-RV64-FD-NEXT: ld ra, 376(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld t0, 368(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld t1, 360(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld t2, 352(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a0, 344(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a1, 336(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a2, 328(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a3, 320(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a4, 312(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a5, 304(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a6, 296(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a7, 288(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld t3, 280(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld t4, 272(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld t5, 264(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld t6, 256(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft0, 248(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft1, 240(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft2, 232(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft3, 224(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft4, 216(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft5, 208(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft6, 200(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft7, 192(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs0, 184(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs1, 176(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa0, 168(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa1, 160(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa2, 152(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa3, 144(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa4, 136(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa5, 128(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa6, 120(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa7, 112(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs2, 104(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs3, 96(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs4, 88(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs5, 80(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs6, 72(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs7, 64(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs8, 56(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs9, 48(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs10, 40(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs11, 32(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld ra, 280(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld t0, 272(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld t1, 264(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld t2, 256(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a1, 240(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a2, 232(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a3, 224(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a4, 216(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a5, 208(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a6, 200(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a7, 192(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld t3, 184(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld t4, 176(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld t5, 168(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld t6, 160(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft0, 152(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft1, 144(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft2, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft3, 128(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft4, 120(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft5, 112(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft6, 104(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft7, 96(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa0, 88(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa1, 80(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa2, 72(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa3, 64(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa4, 56(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa5, 48(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa6, 40(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa7, 32(sp) # 8-byte Folded Reload ; CHECK-RV64-FD-NEXT: fld ft8, 24(sp) # 8-byte Folded Reload ; CHECK-RV64-FD-NEXT: fld ft9, 16(sp) # 8-byte Folded Reload ; CHECK-RV64-FD-NEXT: fld ft10, 8(sp) # 8-byte Folded Reload ; CHECK-RV64-FD-NEXT: fld ft11, 0(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: addi sp, sp, 384 +; CHECK-RV64-FD-NEXT: addi sp, sp, 288 ; CHECK-RV64-FD-NEXT: mret ; ; CHECK-RV64-F-LP64-LABEL: foo_with_call: @@ -1711,214 +1615,166 @@ define void @foo_fp_with_call() #2 { ; ; CHECK-RV32-F-LABEL: foo_fp_with_call: ; CHECK-RV32-F: # %bb.0: -; CHECK-RV32-F-NEXT: addi sp, sp, -208 -; CHECK-RV32-F-NEXT: sw ra, 204(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw t0, 200(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw t1, 196(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw t2, 192(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw s0, 188(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a0, 184(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a1, 180(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a2, 176(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a3, 172(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a4, 168(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a5, 164(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a6, 160(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw a7, 156(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw t3, 152(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw t4, 148(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw t5, 144(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: sw t6, 140(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft0, 136(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft1, 132(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft2, 128(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft3, 124(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft4, 120(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft5, 116(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft6, 112(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw ft7, 108(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs0, 104(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs1, 100(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa0, 96(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa1, 92(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa2, 88(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa3, 84(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa4, 80(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa5, 76(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa6, 72(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fa7, 68(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs2, 64(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs3, 60(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs4, 56(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs5, 52(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs6, 48(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs7, 44(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs8, 40(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs9, 36(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs10, 32(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: fsw fs11, 28(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: addi sp, sp, -160 +; CHECK-RV32-F-NEXT: sw ra, 156(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw t0, 152(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw t1, 148(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw t2, 144(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw s0, 140(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a0, 136(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a1, 132(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a2, 128(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a3, 124(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a4, 120(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a5, 116(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a6, 112(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw a7, 108(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw t3, 104(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw t4, 100(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw t5, 96(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: sw t6, 92(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft0, 88(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft1, 84(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft2, 80(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft3, 76(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft4, 72(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft5, 68(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft6, 64(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw ft7, 60(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa0, 56(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa1, 52(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa2, 48(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa3, 44(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa4, 40(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa5, 36(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa6, 32(sp) # 4-byte Folded Spill +; CHECK-RV32-F-NEXT: fsw fa7, 28(sp) # 4-byte Folded Spill ; CHECK-RV32-F-NEXT: fsw ft8, 24(sp) # 4-byte Folded Spill ; CHECK-RV32-F-NEXT: fsw ft9, 20(sp) # 4-byte Folded Spill ; CHECK-RV32-F-NEXT: fsw ft10, 16(sp) # 4-byte Folded Spill ; CHECK-RV32-F-NEXT: fsw ft11, 12(sp) # 4-byte Folded Spill -; CHECK-RV32-F-NEXT: addi s0, sp, 208 +; CHECK-RV32-F-NEXT: addi s0, sp, 160 ; CHECK-RV32-F-NEXT: call otherfoo -; CHECK-RV32-F-NEXT: lw ra, 204(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw t0, 200(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw t1, 196(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw t2, 192(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw s0, 188(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a0, 184(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a1, 180(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a2, 176(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a3, 172(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a4, 168(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a5, 164(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a6, 160(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw a7, 156(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw t3, 152(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw t4, 148(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw t5, 144(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: lw t6, 140(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft0, 136(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft1, 132(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft2, 128(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft3, 124(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft4, 120(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft5, 116(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft6, 112(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw ft7, 108(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs0, 104(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs1, 100(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa0, 96(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa1, 92(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa2, 88(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa3, 84(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa4, 80(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa5, 76(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa6, 72(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fa7, 68(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs2, 64(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs3, 60(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs4, 56(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs5, 52(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs6, 48(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs7, 44(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs8, 40(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs9, 36(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs10, 32(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: flw fs11, 28(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw ra, 156(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw t0, 152(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw t1, 148(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw t2, 144(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw s0, 140(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a0, 136(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a1, 132(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a2, 128(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a3, 124(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a4, 120(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a5, 116(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a6, 112(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw a7, 108(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw t3, 104(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw t4, 100(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw t5, 96(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: lw t6, 92(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft0, 88(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft1, 84(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft2, 80(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft3, 76(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft4, 72(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft5, 68(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft6, 64(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw ft7, 60(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa0, 56(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa1, 52(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa2, 48(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa3, 44(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa4, 40(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa5, 36(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa6, 32(sp) # 4-byte Folded Reload +; CHECK-RV32-F-NEXT: flw fa7, 28(sp) # 4-byte Folded Reload ; CHECK-RV32-F-NEXT: flw ft8, 24(sp) # 4-byte Folded Reload ; CHECK-RV32-F-NEXT: flw ft9, 20(sp) # 4-byte Folded Reload ; CHECK-RV32-F-NEXT: flw ft10, 16(sp) # 4-byte Folded Reload ; CHECK-RV32-F-NEXT: flw ft11, 12(sp) # 4-byte Folded Reload -; CHECK-RV32-F-NEXT: addi sp, sp, 208 +; CHECK-RV32-F-NEXT: addi sp, sp, 160 ; CHECK-RV32-F-NEXT: mret ; ; CHECK-RV32-FD-LABEL: foo_fp_with_call: ; CHECK-RV32-FD: # %bb.0: -; CHECK-RV32-FD-NEXT: addi sp, sp, -336 -; CHECK-RV32-FD-NEXT: sw ra, 332(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw t0, 328(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw t1, 324(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw t2, 320(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw s0, 316(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a0, 312(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a1, 308(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a2, 304(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a3, 300(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a4, 296(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a5, 292(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a6, 288(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw a7, 284(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw t3, 280(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw t4, 276(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw t5, 272(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: sw t6, 268(sp) # 4-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft0, 256(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft1, 248(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft2, 240(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft3, 232(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft4, 224(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft5, 216(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft6, 208(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd ft7, 200(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs0, 192(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs1, 184(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa0, 176(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa1, 168(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa2, 160(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa3, 152(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa4, 144(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa5, 136(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa6, 128(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fa7, 120(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs2, 112(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs3, 104(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs4, 96(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs5, 88(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs6, 80(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs7, 72(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs8, 64(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs9, 56(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs10, 48(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: fsd fs11, 40(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: addi sp, sp, -240 +; CHECK-RV32-FD-NEXT: sw ra, 236(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw t0, 232(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw t1, 228(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw t2, 224(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw s0, 220(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a0, 216(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a1, 212(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a2, 208(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a3, 204(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a4, 200(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a5, 196(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a6, 192(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw a7, 188(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw t3, 184(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw t4, 180(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw t5, 176(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: sw t6, 172(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft0, 160(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft1, 152(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft2, 144(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft3, 136(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft4, 128(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft5, 120(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft6, 112(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd ft7, 104(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa0, 96(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa1, 88(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa2, 80(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa3, 72(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa4, 64(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa5, 56(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa6, 48(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-NEXT: fsd fa7, 40(sp) # 8-byte Folded Spill ; CHECK-RV32-FD-NEXT: fsd ft8, 32(sp) # 8-byte Folded Spill ; CHECK-RV32-FD-NEXT: fsd ft9, 24(sp) # 8-byte Folded Spill ; CHECK-RV32-FD-NEXT: fsd ft10, 16(sp) # 8-byte Folded Spill ; CHECK-RV32-FD-NEXT: fsd ft11, 8(sp) # 8-byte Folded Spill -; CHECK-RV32-FD-NEXT: addi s0, sp, 336 +; CHECK-RV32-FD-NEXT: addi s0, sp, 240 ; CHECK-RV32-FD-NEXT: call otherfoo -; CHECK-RV32-FD-NEXT: lw ra, 332(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw t0, 328(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw t1, 324(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw t2, 320(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw s0, 316(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a0, 312(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a1, 308(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a2, 304(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a3, 300(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a4, 296(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a5, 292(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a6, 288(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw a7, 284(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw t3, 280(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw t4, 276(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw t5, 272(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: lw t6, 268(sp) # 4-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft0, 256(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft1, 248(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft2, 240(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft3, 232(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft4, 224(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft5, 216(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft6, 208(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld ft7, 200(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs0, 192(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs1, 184(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa0, 176(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa1, 168(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa2, 160(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa3, 152(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa4, 144(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa5, 136(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa6, 128(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fa7, 120(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs2, 112(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs3, 104(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs4, 96(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs5, 88(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs6, 80(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs7, 72(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs8, 64(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs9, 56(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs10, 48(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: fld fs11, 40(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw ra, 236(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw t0, 232(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw t1, 228(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw t2, 224(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw s0, 220(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a0, 216(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a1, 212(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a2, 208(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a3, 204(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a4, 200(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a5, 196(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a6, 192(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw a7, 188(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw t3, 184(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw t4, 180(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw t5, 176(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: lw t6, 172(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft0, 160(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft1, 152(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft2, 144(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft3, 136(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft4, 128(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft5, 120(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft6, 112(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld ft7, 104(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa0, 96(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa1, 88(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa2, 80(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa3, 72(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa4, 64(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa5, 56(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa6, 48(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-NEXT: fld fa7, 40(sp) # 8-byte Folded Reload ; CHECK-RV32-FD-NEXT: fld ft8, 32(sp) # 8-byte Folded Reload ; CHECK-RV32-FD-NEXT: fld ft9, 24(sp) # 8-byte Folded Reload ; CHECK-RV32-FD-NEXT: fld ft10, 16(sp) # 8-byte Folded Reload ; CHECK-RV32-FD-NEXT: fld ft11, 8(sp) # 8-byte Folded Reload -; CHECK-RV32-FD-NEXT: addi sp, sp, 336 +; CHECK-RV32-FD-NEXT: addi sp, sp, 240 ; CHECK-RV32-FD-NEXT: mret ; ; CHECK-RV32-F-ILP3-LABEL: foo_fp_with_call: @@ -2469,214 +2325,166 @@ define void @foo_fp_with_call() #2 { ; ; CHECK-RV64-F-LABEL: foo_fp_with_call: ; CHECK-RV64-F: # %bb.0: -; CHECK-RV64-F-NEXT: addi sp, sp, -272 -; CHECK-RV64-F-NEXT: sd ra, 264(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd t0, 256(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd t1, 248(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd t2, 240(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd s0, 232(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a0, 224(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a1, 216(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a2, 208(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a3, 200(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a4, 192(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a5, 184(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a6, 176(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd a7, 168(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd t3, 160(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd t4, 152(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd t5, 144(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: sd t6, 136(sp) # 8-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft0, 132(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft1, 128(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft2, 124(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft3, 120(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft4, 116(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft5, 112(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft6, 108(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw ft7, 104(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs0, 100(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs1, 96(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa0, 92(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa1, 88(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa2, 84(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa3, 80(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa4, 76(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa6, 68(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fa7, 64(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs2, 60(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs3, 56(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs4, 52(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs5, 48(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs6, 44(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs7, 40(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs8, 36(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs9, 32(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs10, 28(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: fsw fs11, 24(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: addi sp, sp, -224 +; CHECK-RV64-F-NEXT: sd ra, 216(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd t0, 208(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd t1, 200(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd t2, 192(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd s0, 184(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a0, 176(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a2, 160(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a3, 152(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a4, 144(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a5, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a6, 128(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd a7, 120(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd t3, 112(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd t4, 104(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd t5, 96(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: sd t6, 88(sp) # 8-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft0, 84(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft1, 80(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft2, 76(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft3, 72(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft4, 68(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft5, 64(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft6, 60(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw ft7, 56(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa0, 52(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa1, 48(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa2, 44(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa3, 40(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa4, 36(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa5, 32(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa6, 28(sp) # 4-byte Folded Spill +; CHECK-RV64-F-NEXT: fsw fa7, 24(sp) # 4-byte Folded Spill ; CHECK-RV64-F-NEXT: fsw ft8, 20(sp) # 4-byte Folded Spill ; CHECK-RV64-F-NEXT: fsw ft9, 16(sp) # 4-byte Folded Spill ; CHECK-RV64-F-NEXT: fsw ft10, 12(sp) # 4-byte Folded Spill ; CHECK-RV64-F-NEXT: fsw ft11, 8(sp) # 4-byte Folded Spill -; CHECK-RV64-F-NEXT: addi s0, sp, 272 +; CHECK-RV64-F-NEXT: addi s0, sp, 224 ; CHECK-RV64-F-NEXT: call otherfoo -; CHECK-RV64-F-NEXT: ld ra, 264(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld t0, 256(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld t1, 248(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld t2, 240(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld s0, 232(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a0, 224(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a1, 216(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a2, 208(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a3, 200(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a4, 192(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a5, 184(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a6, 176(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld a7, 168(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld t3, 160(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld t4, 152(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld t5, 144(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: ld t6, 136(sp) # 8-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft0, 132(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft1, 128(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft2, 124(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft3, 120(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft4, 116(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft5, 112(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft6, 108(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw ft7, 104(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs0, 100(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs1, 96(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa0, 92(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa1, 88(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa2, 84(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa3, 80(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa4, 76(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa5, 72(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa6, 68(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fa7, 64(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs2, 60(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs3, 56(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs4, 52(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs5, 48(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs6, 44(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs7, 40(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs8, 36(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs9, 32(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs10, 28(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: flw fs11, 24(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: ld ra, 216(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld t0, 208(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld t1, 200(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld t2, 192(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld s0, 184(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a1, 168(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a2, 160(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a3, 152(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a4, 144(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a5, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a6, 128(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld a7, 120(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld t3, 112(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld t4, 104(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld t5, 96(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: ld t6, 88(sp) # 8-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft0, 84(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft1, 80(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft2, 76(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft3, 72(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft4, 68(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft5, 64(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft6, 60(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw ft7, 56(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa0, 52(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa1, 48(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa2, 44(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa3, 40(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa4, 36(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa5, 32(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa6, 28(sp) # 4-byte Folded Reload +; CHECK-RV64-F-NEXT: flw fa7, 24(sp) # 4-byte Folded Reload ; CHECK-RV64-F-NEXT: flw ft8, 20(sp) # 4-byte Folded Reload ; CHECK-RV64-F-NEXT: flw ft9, 16(sp) # 4-byte Folded Reload ; CHECK-RV64-F-NEXT: flw ft10, 12(sp) # 4-byte Folded Reload ; CHECK-RV64-F-NEXT: flw ft11, 8(sp) # 4-byte Folded Reload -; CHECK-RV64-F-NEXT: addi sp, sp, 272 +; CHECK-RV64-F-NEXT: addi sp, sp, 224 ; CHECK-RV64-F-NEXT: mret ; ; CHECK-RV64-FD-LABEL: foo_fp_with_call: ; CHECK-RV64-FD: # %bb.0: -; CHECK-RV64-FD-NEXT: addi sp, sp, -400 -; CHECK-RV64-FD-NEXT: sd ra, 392(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd t0, 384(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd t1, 376(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd t2, 368(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd s0, 360(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a0, 352(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a1, 344(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a2, 336(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a3, 328(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a4, 320(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a5, 312(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a6, 304(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd a7, 296(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd t3, 288(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd t4, 280(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd t5, 272(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: sd t6, 264(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft0, 256(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft1, 248(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft2, 240(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft3, 232(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft4, 224(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft5, 216(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft6, 208(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd ft7, 200(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs0, 192(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs1, 184(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa0, 176(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa1, 168(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa2, 160(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa3, 152(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa4, 144(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa5, 136(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa6, 128(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fa7, 120(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs2, 112(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs3, 104(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs4, 96(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs5, 88(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs6, 80(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs7, 72(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs8, 64(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs9, 56(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs10, 48(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: fsd fs11, 40(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: addi sp, sp, -304 +; CHECK-RV64-FD-NEXT: sd ra, 296(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd t0, 288(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd t1, 280(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd t2, 272(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd s0, 264(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a0, 256(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a2, 240(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a3, 232(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a4, 224(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a5, 216(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a6, 208(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd a7, 200(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd t3, 192(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd t4, 184(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd t5, 176(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: sd t6, 168(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft0, 160(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft1, 152(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft2, 144(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft3, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft4, 128(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft5, 120(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft6, 112(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd ft7, 104(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa0, 96(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa1, 88(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa2, 80(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa3, 72(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa4, 64(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa5, 56(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa6, 48(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-NEXT: fsd fa7, 40(sp) # 8-byte Folded Spill ; CHECK-RV64-FD-NEXT: fsd ft8, 32(sp) # 8-byte Folded Spill ; CHECK-RV64-FD-NEXT: fsd ft9, 24(sp) # 8-byte Folded Spill ; CHECK-RV64-FD-NEXT: fsd ft10, 16(sp) # 8-byte Folded Spill ; CHECK-RV64-FD-NEXT: fsd ft11, 8(sp) # 8-byte Folded Spill -; CHECK-RV64-FD-NEXT: addi s0, sp, 400 +; CHECK-RV64-FD-NEXT: addi s0, sp, 304 ; CHECK-RV64-FD-NEXT: call otherfoo -; CHECK-RV64-FD-NEXT: ld ra, 392(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld t0, 384(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld t1, 376(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld t2, 368(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld s0, 360(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a0, 352(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a1, 344(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a2, 336(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a3, 328(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a4, 320(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a5, 312(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a6, 304(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld a7, 296(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld t3, 288(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld t4, 280(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld t5, 272(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: ld t6, 264(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft0, 256(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft1, 248(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft2, 240(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft3, 232(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft4, 224(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft5, 216(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft6, 208(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld ft7, 200(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs0, 192(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs1, 184(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa0, 176(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa1, 168(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa2, 160(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa3, 152(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa4, 144(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa5, 136(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa6, 128(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fa7, 120(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs2, 112(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs3, 104(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs4, 96(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs5, 88(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs6, 80(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs7, 72(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs8, 64(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs9, 56(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs10, 48(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: fld fs11, 40(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld ra, 296(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld t0, 288(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld t1, 280(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld t2, 272(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld s0, 264(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a1, 248(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a2, 240(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a3, 232(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a4, 224(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a5, 216(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a6, 208(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld a7, 200(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld t3, 192(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld t4, 184(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld t5, 176(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: ld t6, 168(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft0, 160(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft1, 152(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft2, 144(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft3, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft4, 128(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft5, 120(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft6, 112(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld ft7, 104(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa0, 96(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa1, 88(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa2, 80(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa3, 72(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa4, 64(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa5, 56(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa6, 48(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-NEXT: fld fa7, 40(sp) # 8-byte Folded Reload ; CHECK-RV64-FD-NEXT: fld ft8, 32(sp) # 8-byte Folded Reload ; CHECK-RV64-FD-NEXT: fld ft9, 24(sp) # 8-byte Folded Reload ; CHECK-RV64-FD-NEXT: fld ft10, 16(sp) # 8-byte Folded Reload ; CHECK-RV64-FD-NEXT: fld ft11, 8(sp) # 8-byte Folded Reload -; CHECK-RV64-FD-NEXT: addi sp, sp, 400 +; CHECK-RV64-FD-NEXT: addi sp, sp, 304 ; CHECK-RV64-FD-NEXT: mret ; ; CHECK-RV64-F-LP64-LABEL: foo_fp_with_call: From 86842e1f724fba5abae50ce438553895e69b8141 Mon Sep 17 00:00:00 2001 From: Jun Wang Date: Wed, 10 Apr 2024 10:47:04 -0700 Subject: [PATCH 044/886] [AMDGPU] New clang option for emitting a waitcnt instruction after each memory instruction (#79236) This patch introduces a new command-line option for clang, namely, amdgpu-precise-mem-op (or precise-memory in the backend). When this option is specified, a waitcnt instruction is generated after each memory load/store instruction. The counter values are always 0, but which counters are involved depends on the memory instruction. --------- Co-authored-by: Jun Wang --- clang/include/clang/Driver/Options.td | 3 + clang/lib/Driver/ToolChains/AMDGPU.cpp | 4 + clang/test/Driver/amdgpu-features.c | 6 + llvm/lib/Target/AMDGPU/AMDGPU.td | 4 + llvm/lib/Target/AMDGPU/GCNSubtarget.h | 3 + llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 8 + .../insert_waitcnt_for_precise_memory.ll | 1658 +++++++++++++++++ 7 files changed, 1686 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/insert_waitcnt_for_precise_memory.ll diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0a74e6c75f95b..7ac36222644aa 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4912,6 +4912,9 @@ defm tgsplit : SimpleMFlag<"tgsplit", "Enable", "Disable", defm wavefrontsize64 : SimpleMFlag<"wavefrontsize64", "Specify wavefront size 64", "Specify wavefront size 32", " mode (AMDGPU only)">; +defm amdgpu_precise_memory_op + : SimpleMFlag<"amdgpu-precise-memory-op", "Enable", "Disable", + " precise memory mode (AMDGPU only)">; defm unsafe_fp_atomics : BoolMOption<"unsafe-fp-atomics", TargetOpts<"AllowAMDGPUUnsafeFPAtomics">, DefaultFalse, diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index e122379e860e2..4e6362a0f4063 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -670,6 +670,10 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D, options::OPT_mno_wavefrontsize64, false)) Features.push_back("+wavefrontsize64"); + if (Args.hasFlag(options::OPT_mamdgpu_precise_memory_op, + options::OPT_mno_amdgpu_precise_memory_op, false)) + Features.push_back("+precise-memory"); + handleTargetFeaturesGroup(D, Triple, Args, Features, options::OPT_m_amdgpu_Features_Group); } diff --git a/clang/test/Driver/amdgpu-features.c b/clang/test/Driver/amdgpu-features.c index a516bc6b7ff20..864744db203e9 100644 --- a/clang/test/Driver/amdgpu-features.c +++ b/clang/test/Driver/amdgpu-features.c @@ -32,3 +32,9 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-cumode %s 2>&1 | FileCheck --check-prefix=NO-CUMODE %s // NO-CUMODE: "-target-feature" "-cumode" + +// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mamdgpu-precise-memory-op %s 2>&1 | FileCheck --check-prefix=PREC-MEM %s +// PREC-MEM: "-target-feature" "+precise-memory" + +// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-amdgpu-precise-memory-op %s 2>&1 | FileCheck --check-prefix=NO-PREC-MEM %s +// NO-PREC-MEM-NOT: {{".*precise-memory"}} diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 37dcfef3b2a3d..9b09550159993 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -168,6 +168,10 @@ def FeatureCuMode : SubtargetFeature<"cumode", "Enable CU wavefront execution mode" >; +def FeaturePreciseMemory + : SubtargetFeature<"precise-memory", "EnablePreciseMemory", + "true", "Enable precise memory mode">; + def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", "SGPRInitBug", "true", diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index e24a18a2842f6..04ff53a6647bd 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -87,6 +87,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool EnableTgSplit = false; bool EnableCuMode = false; bool TrapHandler = false; + bool EnablePreciseMemory = false; // Used as options. bool EnableLoadStoreOpt = false; @@ -599,6 +600,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return EnableCuMode; } + bool isPreciseMemoryEnabled() const { return EnablePreciseMemory; } + bool hasFlatAddressSpace() const { return FlatAddressSpace; } diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index bb499c5c8c578..556ec3e231ff1 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -2305,6 +2305,14 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, } #endif + if (ST->isPreciseMemoryEnabled() && Inst.mayLoadOrStore()) { + AMDGPU::Waitcnt Wait = WCG->getAllZeroWaitcnt( + Inst.mayStore() && !SIInstrInfo::isAtomicRet(Inst)); + ScoreBrackets.simplifyWaitcnt(Wait); + Modified |= generateWaitcnt(Wait, std::next(Inst.getIterator()), Block, + ScoreBrackets, /*OldWaitcntInstr=*/nullptr); + } + LLVM_DEBUG({ Inst.print(dbgs()); ScoreBrackets.dump(); diff --git a/llvm/test/CodeGen/AMDGPU/insert_waitcnt_for_precise_memory.ll b/llvm/test/CodeGen/AMDGPU/insert_waitcnt_for_precise_memory.ll new file mode 100644 index 0000000000000..df03e89370377 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/insert_waitcnt_for_precise_memory.ll @@ -0,0 +1,1658 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=+precise-memory < %s | FileCheck %s -check-prefixes=GFX9 +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -mattr=+precise-memory < %s | FileCheck %s -check-prefixes=GFX90A +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+precise-memory < %s | FileCheck %s -check-prefixes=GFX10 +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=+enable-flat-scratch,+precise-memory < %s | FileCheck --check-prefixes=GFX9-FLATSCR %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+precise-memory < %s | FileCheck %s -check-prefixes=GFX11 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+precise-memory < %s | FileCheck %s -check-prefixes=GFX12 + +; from atomicrmw-expand.ll +; covers flat_load, flat_atomic (atomic with return) +; +define void @syncscope_workgroup_nortn(ptr %addr, float %val) { +; GFX9-LABEL: syncscope_workgroup_nortn: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_load_dword v4, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[4:5], 0 +; GFX9-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX9-NEXT: v_add_f32_e32 v3, v4, v2 +; GFX9-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX9-NEXT: v_mov_b32_e32 v4, v3 +; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_cbranch_execnz .LBB0_1 +; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: syncscope_workgroup_nortn: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: flat_load_dword v5, v[0:1] +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_mov_b64 s[4:5], 0 +; GFX90A-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: v_add_f32_e32 v4, v5, v2 +; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX90A-NEXT: v_mov_b32_e32 v5, v3 +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: syncscope_workgroup_nortn: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_load_dword v4, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX10-NEXT: v_add_f32_e32 v3, v4, v2 +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_gl0_inv +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v4 +; GFX10-NEXT: v_mov_b32_e32 v4, v3 +; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_cbranch_execnz .LBB0_1 +; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-FLATSCR-LABEL: syncscope_workgroup_nortn: +; GFX9-FLATSCR: ; %bb.0: +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-FLATSCR-NEXT: flat_load_dword v4, v[0:1] +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-FLATSCR-NEXT: s_mov_b64 s[0:1], 0 +; GFX9-FLATSCR-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX9-FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX9-FLATSCR-NEXT: v_add_f32_e32 v3, v4, v2 +; GFX9-FLATSCR-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX9-FLATSCR-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v3 +; GFX9-FLATSCR-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX9-FLATSCR-NEXT: s_cbranch_execnz .LBB0_1 +; GFX9-FLATSCR-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX9-FLATSCR-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: syncscope_workgroup_nortn: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: flat_load_b32 v4, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s0, 0 +; GFX11-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_f32_e32 v3, v4, v2 +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: flat_atomic_cmpswap_b32 v3, v[0:1], v[3:4] glc +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_gl0_inv +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v4 +; GFX11-NEXT: v_mov_b32_e32 v4, v3 +; GFX11-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: s_cbranch_execnz .LBB0_1 +; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: syncscope_workgroup_nortn: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: flat_load_b32 v4, v[0:1] +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_mov_b32 s0, 0 +; GFX12-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_add_f32_e32 v3, v4, v2 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: flat_atomic_cmpswap_b32 v3, v[0:1], v[3:4] th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SE +; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v4 +; GFX12-NEXT: v_mov_b32_e32 v4, v3 +; GFX12-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX12-NEXT: s_cbranch_execnz .LBB0_1 +; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %res = atomicrmw fadd ptr %addr, float %val syncscope("workgroup") seq_cst + ret void +} + +; from atomicrmw-nand.ll +; covers global_atomic (atomic with return), global_load +; +define i32 @atomic_nand_i32_global(ptr addrspace(1) %ptr) nounwind { +; GFX9-LABEL: atomic_nand_i32_global: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dword v2, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_mov_b64 s[4:5], 0 +; GFX9-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX9-NEXT: v_mov_b32_e32 v3, v2 +; GFX9-NEXT: v_not_b32_e32 v2, v3 +; GFX9-NEXT: v_or_b32_e32 v2, -5, v2 +; GFX9-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_wbinvl1_vol +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_cbranch_execnz .LBB1_1 +; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: atomic_nand_i32_global: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: global_load_dword v2, v[0:1], off +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: s_mov_b64 s[4:5], 0 +; GFX90A-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_not_b32_e32 v2, v3 +; GFX90A-NEXT: v_or_b32_e32 v2, -5, v2 +; GFX90A-NEXT: buffer_wbl2 +; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_invl2 +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB1_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: v_mov_b32_e32 v0, v2 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: atomic_nand_i32_global: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dword v2, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX10-NEXT: v_mov_b32_e32 v3, v2 +; GFX10-NEXT: v_not_b32_e32 v2, v3 +; GFX10-NEXT: v_or_b32_e32 v2, -5, v2 +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_cbranch_execnz .LBB1_1 +; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_mov_b32_e32 v0, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-FLATSCR-LABEL: atomic_nand_i32_global: +; GFX9-FLATSCR: ; %bb.0: +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-FLATSCR-NEXT: global_load_dword v2, v[0:1], off +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: s_mov_b64 s[0:1], 0 +; GFX9-FLATSCR-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX9-FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v2 +; GFX9-FLATSCR-NEXT: v_not_b32_e32 v2, v3 +; GFX9-FLATSCR-NEXT: v_or_b32_e32 v2, -5, v2 +; GFX9-FLATSCR-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: buffer_wbinvl1_vol +; GFX9-FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-FLATSCR-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX9-FLATSCR-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX9-FLATSCR-NEXT: s_cbranch_execnz .LBB1_1 +; GFX9-FLATSCR-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX9-FLATSCR-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: atomic_nand_i32_global: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_b32 v2, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_mov_b32 s0, 0 +; GFX11-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX11-NEXT: v_mov_b32_e32 v3, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_not_b32_e32 v2, v3 +; GFX11-NEXT: v_or_b32_e32 v2, -5, v2 +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], v[2:3], off glc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: s_cbranch_execnz .LBB1_1 +; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: v_mov_b32_e32 v0, v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: atomic_nand_i32_global: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v2, v[0:1], off +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_mov_b32 s0, 0 +; GFX12-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX12-NEXT: v_mov_b32_e32 v3, v2 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_not_b32_e32 v2, v3 +; GFX12-NEXT: v_or_b32_e32 v2, -5, v2 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], v[2:3], off th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX12-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX12-NEXT: s_cbranch_execnz .LBB1_1 +; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX12-NEXT: v_mov_b32_e32 v0, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw nand ptr addrspace(1) %ptr, i32 4 seq_cst + ret i32 %result +} + +; from call-argument-types.ll +; covers scratch_load, scratch_store, buffer_load, buffer_store +; +declare hidden void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) +define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) { +; GFX9-LABEL: tail_call_byval_align16: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:28 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_getpc_b64 s[16:17] +; GFX9-NEXT: s_add_u32 s16, s16, byval_align16_f64_arg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s17, s17, byval_align16_f64_arg@rel32@hi+12 +; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:20 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[16:17] +; +; GFX90A-LABEL: tail_call_byval_align16: +; GFX90A: ; %bb.0: ; %entry +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:28 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:24 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_load_dword v34, off, s[0:3], s32 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: s_getpc_b64 s[16:17] +; GFX90A-NEXT: s_add_u32 s16, s16, byval_align16_f64_arg@rel32@lo+4 +; GFX90A-NEXT: s_addc_u32 s17, s17, byval_align16_f64_arg@rel32@hi+12 +; GFX90A-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:20 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:16 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_store_dword v34, off, s[0:3], s32 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: s_setpc_b64 s[16:17] +; +; GFX10-LABEL: tail_call_byval_align16: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:28 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:24 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: buffer_load_dword v34, off, s[0:3], s32 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_getpc_b64 s[16:17] +; GFX10-NEXT: s_add_u32 s16, s16, byval_align16_f64_arg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s17, s17, byval_align16_f64_arg@rel32@hi+12 +; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:20 +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:16 +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: buffer_store_dword v34, off, s[0:3], s32 +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[16:17] +; +; GFX9-FLATSCR-LABEL: tail_call_byval_align16: +; GFX9-FLATSCR: ; %bb.0: ; %entry +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_load_dword v32, off, s32 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: s_getpc_b64 s[0:1] +; GFX9-FLATSCR-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4 +; GFX9-FLATSCR-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12 +; GFX9-FLATSCR-NEXT: scratch_store_dword off, v32, s32 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_load_dwordx2 v[32:33], off, s32 offset:24 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx2 off, v[32:33], s32 offset:16 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: s_setpc_b64 s[0:1] +; +; GFX11-LABEL: tail_call_byval_align16: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_b32 v32, off, s32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_getpc_b64 s[0:1] +; GFX11-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4 +; GFX11-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12 +; GFX11-NEXT: scratch_store_b32 off, v32, s32 +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: scratch_load_b64 v[32:33], off, s32 offset:24 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32 offset:16 +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_setpc_b64 s[0:1] +; +; GFX12-LABEL: tail_call_byval_align16: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: scratch_load_b32 v32, off, s32 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_getpc_b64 s[0:1] +; GFX12-NEXT: s_sext_i32_i16 s1, s1 +; GFX12-NEXT: s_add_co_u32 s0, s0, byval_align16_f64_arg@rel32@lo+8 +; GFX12-NEXT: s_add_co_ci_u32 s1, s1, byval_align16_f64_arg@rel32@hi+16 +; GFX12-NEXT: scratch_store_b32 off, v32, s32 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_load_b64 v[32:33], off, s32 offset:24 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_store_b64 off, v[32:33], s32 offset:16 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[0:1] +entry: + %alloca = alloca double, align 8, addrspace(5) + tail call void @byval_align16_f64_arg(<32 x i32> %val, ptr addrspace(5) byval(double) align 16 %alloca) + ret void +} + +; from udiv.ll +; covers s_load +; +define amdgpu_kernel void @udiv_i32(ptr addrspace(1) %out, i32 %x, i32 %y) { +; GFX9-LABEL: udiv_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX9-NEXT: s_sub_i32 s4, 0, s3 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s5, v0 +; GFX9-NEXT: s_mul_i32 s4, s4, s5 +; GFX9-NEXT: s_mul_hi_u32 s4, s5, s4 +; GFX9-NEXT: s_add_i32 s5, s5, s4 +; GFX9-NEXT: s_mul_hi_u32 s4, s2, s5 +; GFX9-NEXT: s_mul_i32 s5, s4, s3 +; GFX9-NEXT: s_sub_i32 s2, s2, s5 +; GFX9-NEXT: s_add_i32 s6, s4, 1 +; GFX9-NEXT: s_sub_i32 s5, s2, s3 +; GFX9-NEXT: s_cmp_ge_u32 s2, s3 +; GFX9-NEXT: s_cselect_b32 s4, s6, s4 +; GFX9-NEXT: s_cselect_b32 s2, s5, s2 +; GFX9-NEXT: s_add_i32 s5, s4, 1 +; GFX9-NEXT: s_cmp_ge_u32 s2, s3 +; GFX9-NEXT: s_cselect_b32 s2, s5, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: udiv_i32: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v1, 0 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX90A-NEXT: s_sub_i32 s4, 0, s3 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX90A-NEXT: v_readfirstlane_b32 s5, v0 +; GFX90A-NEXT: s_mul_i32 s4, s4, s5 +; GFX90A-NEXT: s_mul_hi_u32 s4, s5, s4 +; GFX90A-NEXT: s_add_i32 s5, s5, s4 +; GFX90A-NEXT: s_mul_hi_u32 s4, s2, s5 +; GFX90A-NEXT: s_mul_i32 s5, s4, s3 +; GFX90A-NEXT: s_sub_i32 s2, s2, s5 +; GFX90A-NEXT: s_add_i32 s6, s4, 1 +; GFX90A-NEXT: s_sub_i32 s5, s2, s3 +; GFX90A-NEXT: s_cmp_ge_u32 s2, s3 +; GFX90A-NEXT: s_cselect_b32 s4, s6, s4 +; GFX90A-NEXT: s_cselect_b32 s2, s5, s2 +; GFX90A-NEXT: s_add_i32 s5, s4, 1 +; GFX90A-NEXT: s_cmp_ge_u32 s2, s3 +; GFX90A-NEXT: s_cselect_b32 s2, s5, s4 +; GFX90A-NEXT: v_mov_b32_e32 v0, s2 +; GFX90A-NEXT: global_store_dword v1, v0, s[0:1] +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: s_endpgm +; +; GFX10-LABEL: udiv_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX10-NEXT: s_sub_i32 s5, 0, s3 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: s_mul_i32 s5, s5, s4 +; GFX10-NEXT: s_mul_hi_u32 s5, s4, s5 +; GFX10-NEXT: s_add_i32 s4, s4, s5 +; GFX10-NEXT: s_mul_hi_u32 s4, s2, s4 +; GFX10-NEXT: s_mul_i32 s5, s4, s3 +; GFX10-NEXT: s_sub_i32 s2, s2, s5 +; GFX10-NEXT: s_add_i32 s5, s4, 1 +; GFX10-NEXT: s_sub_i32 s6, s2, s3 +; GFX10-NEXT: s_cmp_ge_u32 s2, s3 +; GFX10-NEXT: s_cselect_b32 s4, s5, s4 +; GFX10-NEXT: s_cselect_b32 s2, s6, s2 +; GFX10-NEXT: s_add_i32 s5, s4, 1 +; GFX10-NEXT: s_cmp_ge_u32 s2, s3 +; GFX10-NEXT: s_cselect_b32 s2, s5, s4 +; GFX10-NEXT: v_mov_b32_e32 v1, s2 +; GFX10-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_endpgm +; +; GFX9-FLATSCR-LABEL: udiv_i32: +; GFX9-FLATSCR: ; %bb.0: +; GFX9-FLATSCR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-FLATSCR-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX9-FLATSCR-NEXT: s_sub_i32 s4, 0, s3 +; GFX9-FLATSCR-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX9-FLATSCR-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX9-FLATSCR-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX9-FLATSCR-NEXT: v_readfirstlane_b32 s5, v0 +; GFX9-FLATSCR-NEXT: s_mul_i32 s4, s4, s5 +; GFX9-FLATSCR-NEXT: s_mul_hi_u32 s4, s5, s4 +; GFX9-FLATSCR-NEXT: s_add_i32 s5, s5, s4 +; GFX9-FLATSCR-NEXT: s_mul_hi_u32 s4, s2, s5 +; GFX9-FLATSCR-NEXT: s_mul_i32 s5, s4, s3 +; GFX9-FLATSCR-NEXT: s_sub_i32 s2, s2, s5 +; GFX9-FLATSCR-NEXT: s_add_i32 s6, s4, 1 +; GFX9-FLATSCR-NEXT: s_sub_i32 s5, s2, s3 +; GFX9-FLATSCR-NEXT: s_cmp_ge_u32 s2, s3 +; GFX9-FLATSCR-NEXT: s_cselect_b32 s4, s6, s4 +; GFX9-FLATSCR-NEXT: s_cselect_b32 s2, s5, s2 +; GFX9-FLATSCR-NEXT: s_add_i32 s5, s4, 1 +; GFX9-FLATSCR-NEXT: s_cmp_ge_u32 s2, s3 +; GFX9-FLATSCR-NEXT: s_cselect_b32 s2, s5, s4 +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-FLATSCR-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: s_endpgm +; +; GFX11-LABEL: udiv_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX11-NEXT: s_sub_i32 s5, 0, s3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_readfirstlane_b32 s4, v0 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_mul_i32 s5, s5, s4 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_mul_hi_u32 s5, s4, s5 +; GFX11-NEXT: s_add_i32 s4, s4, s5 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_mul_hi_u32 s4, s2, s4 +; GFX11-NEXT: s_mul_i32 s5, s4, s3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_sub_i32 s2, s2, s5 +; GFX11-NEXT: s_add_i32 s5, s4, 1 +; GFX11-NEXT: s_sub_i32 s6, s2, s3 +; GFX11-NEXT: s_cmp_ge_u32 s2, s3 +; GFX11-NEXT: s_cselect_b32 s4, s5, s4 +; GFX11-NEXT: s_cselect_b32 s2, s6, s2 +; GFX11-NEXT: s_add_i32 s5, s4, 1 +; GFX11-NEXT: s_cmp_ge_u32 s2, s3 +; GFX11-NEXT: s_cselect_b32 s2, s5, s4 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: v_mov_b32_e32 v1, s2 +; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: udiv_i32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_cvt_f32_u32 s4, s3 +; GFX12-NEXT: s_sub_co_i32 s5, 0, s3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(TRANS32_DEP_1) +; GFX12-NEXT: v_rcp_iflag_f32_e32 v0, s4 +; GFX12-NEXT: v_readfirstlane_b32 s4, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) +; GFX12-NEXT: s_mul_f32 s4, s4, 0x4f7ffffe +; GFX12-NEXT: s_cvt_u32_f32 s4, s4 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_mul_i32 s5, s5, s4 +; GFX12-NEXT: s_mul_hi_u32 s5, s4, s5 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_add_co_i32 s4, s4, s5 +; GFX12-NEXT: s_mul_hi_u32 s4, s2, s4 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_mul_i32 s5, s4, s3 +; GFX12-NEXT: s_sub_co_i32 s2, s2, s5 +; GFX12-NEXT: s_add_co_i32 s5, s4, 1 +; GFX12-NEXT: s_sub_co_i32 s6, s2, s3 +; GFX12-NEXT: s_cmp_ge_u32 s2, s3 +; GFX12-NEXT: s_cselect_b32 s4, s5, s4 +; GFX12-NEXT: s_cselect_b32 s2, s6, s2 +; GFX12-NEXT: s_add_co_i32 s5, s4, 1 +; GFX12-NEXT: s_cmp_ge_u32 s2, s3 +; GFX12-NEXT: s_cselect_b32 s2, s5, s4 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_endpgm + %r = udiv i32 %x, %y + store i32 %r, ptr addrspace(1) %out + ret void +} + +declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) + +; from smrd.ll +; covers s_buffer_load +; +define amdgpu_ps float @smrd_sgpr_offset(<4 x i32> inreg %desc, i32 inreg %offset) #0 { +; GFX9-LABEL: smrd_sgpr_offset: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: smrd_sgpr_offset: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v0, s0 +; GFX90A-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: smrd_sgpr_offset: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: ; return to shader part epilog +; +; GFX9-FLATSCR-LABEL: smrd_sgpr_offset: +; GFX9-FLATSCR: ; %bb.0: ; %main_body +; GFX9-FLATSCR-NEXT: s_mov_b32 s11, s5 +; GFX9-FLATSCR-NEXT: s_mov_b32 s10, s4 +; GFX9-FLATSCR-NEXT: s_mov_b32 s9, s3 +; GFX9-FLATSCR-NEXT: s_mov_b32 s8, s2 +; GFX9-FLATSCR-NEXT: s_buffer_load_dword s0, s[8:11], s6 offset:0x0 +; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-FLATSCR-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: smrd_sgpr_offset: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: ; return to shader part epilog +; +; GFX12-LABEL: smrd_sgpr_offset: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: ; return to shader part epilog +main_body: + %r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %offset, i32 0) + ret float %r +} + +; from atomic_load_add.ll +; covers s_load, ds_add (atomic without return) +; +define amdgpu_kernel void @atomic_add_local(ptr addrspace(3) %local) { +; GFX9-LABEL: atomic_add_local: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b64 s[2:3], exec +; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-NEXT: s_cbranch_execz .LBB5_2 +; GFX9-NEXT: ; %bb.1: +; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_bcnt1_i32_b64 s1, s[2:3] +; GFX9-NEXT: s_mul_i32 s1, s1, 5 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: ds_add_u32 v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: .LBB5_2: +; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: atomic_add_local: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_mov_b64 s[2:3], exec +; GFX90A-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 +; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX90A-NEXT: s_cbranch_execz .LBB5_2 +; GFX90A-NEXT: ; %bb.1: +; GFX90A-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_bcnt1_i32_b64 s1, s[2:3] +; GFX90A-NEXT: s_mul_i32 s1, s1, 5 +; GFX90A-NEXT: v_mov_b32_e32 v1, s1 +; GFX90A-NEXT: v_mov_b32_e32 v0, s0 +; GFX90A-NEXT: ds_add_u32 v0, v1 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: .LBB5_2: +; GFX90A-NEXT: s_endpgm +; +; GFX10-LABEL: atomic_add_local: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_mov_b32 s2, exec_lo +; GFX10-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-NEXT: s_and_saveexec_b32 s3, vcc_lo +; GFX10-NEXT: s_cbranch_execz .LBB5_2 +; GFX10-NEXT: ; %bb.1: +; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_bcnt1_i32_b32 s1, s2 +; GFX10-NEXT: s_mul_i32 s1, s1, 5 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: ds_add_u32 v0, v1 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: buffer_gl0_inv +; GFX10-NEXT: .LBB5_2: +; GFX10-NEXT: s_endpgm +; +; GFX9-FLATSCR-LABEL: atomic_add_local: +; GFX9-FLATSCR: ; %bb.0: +; GFX9-FLATSCR-NEXT: s_mov_b64 s[2:3], exec +; GFX9-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX9-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 +; GFX9-FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-FLATSCR-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-FLATSCR-NEXT: s_cbranch_execz .LBB5_2 +; GFX9-FLATSCR-NEXT: ; %bb.1: +; GFX9-FLATSCR-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-NEXT: s_bcnt1_i32_b64 s1, s[2:3] +; GFX9-FLATSCR-NEXT: s_mul_i32 s1, s1, 5 +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-FLATSCR-NEXT: ds_add_u32 v0, v1 +; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-NEXT: .LBB5_2: +; GFX9-FLATSCR-NEXT: s_endpgm +; +; GFX11-LABEL: atomic_add_local: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s3, exec_lo +; GFX11-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0 +; GFX11-NEXT: s_cbranch_execz .LBB5_2 +; GFX11-NEXT: ; %bb.1: +; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x24 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_bcnt1_i32_b32 s1, s2 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_mul_i32 s1, s1, 5 +; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v0, s0 +; GFX11-NEXT: ds_add_u32 v0, v1 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: buffer_gl0_inv +; GFX11-NEXT: .LBB5_2: +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_local: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_mov_b32 s2, exec_lo +; GFX12-NEXT: s_mov_b32 s3, exec_lo +; GFX12-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_cmpx_eq_u32_e32 0, v0 +; GFX12-NEXT: s_cbranch_execz .LBB5_2 +; GFX12-NEXT: ; %bb.1: +; GFX12-NEXT: s_load_b32 s0, s[0:1], 0x24 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_bcnt1_i32_b32 s1, s2 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_mul_i32 s1, s1, 5 +; GFX12-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v0, s0 +; GFX12-NEXT: ds_add_u32 v0, v1 +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SE +; GFX12-NEXT: .LBB5_2: +; GFX12-NEXT: s_endpgm + %unused = atomicrmw volatile add ptr addrspace(3) %local, i32 5 seq_cst + ret void +} + +; from flat_atomics_i32_system.ll +; covers flat_atomic_swap (atomic without return) +; +define void @flat_atomic_xchg_i32_noret(ptr %ptr, i32 %in) { +; GFX9-LABEL: flat_atomic_xchg_i32_noret: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_atomic_swap v[0:1], v2 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_wbinvl1_vol +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: flat_atomic_xchg_i32_noret: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: buffer_wbl2 +; GFX90A-NEXT: flat_atomic_swap v[0:1], v2 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: buffer_invl2 +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_atomic_xchg_i32_noret: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: flat_atomic_swap v[0:1], v2 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-FLATSCR-LABEL: flat_atomic_xchg_i32_noret: +; GFX9-FLATSCR: ; %bb.0: +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-FLATSCR-NEXT: flat_atomic_swap v[0:1], v2 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-FLATSCR-NEXT: buffer_wbinvl1_vol +; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: flat_atomic_xchg_i32_noret: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: flat_atomic_swap_b32 v[0:1], v2 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: flat_atomic_xchg_i32_noret: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: flat_atomic_swap_b32 v[0:1], v2 +; GFX12-NEXT: s_wait_storecnt_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %tmp0 = atomicrmw xchg ptr %ptr, i32 %in seq_cst + ret void +} + +; from atomic_load_add.ll +; covers s_load, ds_add_rtn (atomic with return) +; +define amdgpu_kernel void @atomic_add_ret_local(ptr addrspace(1) %out, ptr addrspace(3) %local) { +; GFX9-LABEL: atomic_add_ret_local: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b64 s[4:5], exec +; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 +; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: ; implicit-def: $vgpr1 +; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX9-NEXT: s_cbranch_execz .LBB7_2 +; GFX9-NEXT: ; %bb.1: +; GFX9-NEXT: s_load_dword s6, s[0:1], 0x2c +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX9-NEXT: s_mul_i32 s4, s4, 5 +; GFX9-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s6 +; GFX9-NEXT: ds_add_rtn_u32 v1, v1, v2 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: .LBB7_2: +; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_readfirstlane_b32 s2, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mad_u32_u24 v0, v0, 5, s2 +; GFX9-NEXT: global_store_dword v2, v0, s[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: atomic_add_ret_local: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_mov_b64 s[4:5], exec +; GFX90A-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 +; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX90A-NEXT: ; implicit-def: $vgpr1 +; GFX90A-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX90A-NEXT: s_cbranch_execz .LBB7_2 +; GFX90A-NEXT: ; %bb.1: +; GFX90A-NEXT: s_load_dword s6, s[0:1], 0x2c +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX90A-NEXT: s_mul_i32 s4, s4, 5 +; GFX90A-NEXT: v_mov_b32_e32 v2, s4 +; GFX90A-NEXT: v_mov_b32_e32 v1, s6 +; GFX90A-NEXT: ds_add_rtn_u32 v1, v1, v2 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: .LBB7_2: +; GFX90A-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_readfirstlane_b32 s2, v1 +; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_mad_u32_u24 v0, v0, 5, s2 +; GFX90A-NEXT: global_store_dword v2, v0, s[0:1] +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: s_endpgm +; +; GFX10-LABEL: atomic_add_ret_local: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_mov_b32 s3, exec_lo +; GFX10-NEXT: ; implicit-def: $vgpr1 +; GFX10-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-NEXT: s_and_saveexec_b32 s2, vcc_lo +; GFX10-NEXT: s_cbranch_execz .LBB7_2 +; GFX10-NEXT: ; %bb.1: +; GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_bcnt1_i32_b32 s3, s3 +; GFX10-NEXT: s_mul_i32 s3, s3, 5 +; GFX10-NEXT: v_mov_b32_e32 v2, s3 +; GFX10-NEXT: v_mov_b32_e32 v1, s4 +; GFX10-NEXT: ds_add_rtn_u32 v1, v1, v2 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: buffer_gl0_inv +; GFX10-NEXT: .LBB7_2: +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2 +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_readfirstlane_b32 s2, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GFX10-NEXT: v_mad_u32_u24 v0, v0, 5, s2 +; GFX10-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_endpgm +; +; GFX9-FLATSCR-LABEL: atomic_add_ret_local: +; GFX9-FLATSCR: ; %bb.0: +; GFX9-FLATSCR-NEXT: s_mov_b64 s[4:5], exec +; GFX9-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 +; GFX9-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX9-FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-FLATSCR-NEXT: ; implicit-def: $vgpr1 +; GFX9-FLATSCR-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX9-FLATSCR-NEXT: s_cbranch_execz .LBB7_2 +; GFX9-FLATSCR-NEXT: ; %bb.1: +; GFX9-FLATSCR-NEXT: s_load_dword s6, s[0:1], 0x2c +; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX9-FLATSCR-NEXT: s_mul_i32 s4, s4, 5 +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, s6 +; GFX9-FLATSCR-NEXT: ds_add_rtn_u32 v1, v1, v2 +; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-NEXT: .LBB7_2: +; GFX9-FLATSCR-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX9-FLATSCR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-NEXT: v_readfirstlane_b32 s2, v1 +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-FLATSCR-NEXT: v_mad_u32_u24 v0, v0, 5, s2 +; GFX9-FLATSCR-NEXT: global_store_dword v2, v0, s[0:1] +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: s_endpgm +; +; GFX11-LABEL: atomic_add_ret_local: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_mov_b32 s3, exec_lo +; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0 +; GFX11-NEXT: ; implicit-def: $vgpr1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0 +; GFX11-NEXT: s_cbranch_execz .LBB7_2 +; GFX11-NEXT: ; %bb.1: +; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x2c +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_bcnt1_i32_b32 s3, s3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_mul_i32 s3, s3, 5 +; GFX11-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v1, s4 +; GFX11-NEXT: ds_add_rtn_u32 v1, v1, v2 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: buffer_gl0_inv +; GFX11-NEXT: .LBB7_2: +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s2 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_readfirstlane_b32 s2, v1 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_mad_u32_u24 v0, v0, 5, s2 +; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_ret_local: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_mov_b32 s3, exec_lo +; GFX12-NEXT: s_mov_b32 s2, exec_lo +; GFX12-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0 +; GFX12-NEXT: ; implicit-def: $vgpr1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_cmpx_eq_u32_e32 0, v0 +; GFX12-NEXT: s_cbranch_execz .LBB7_2 +; GFX12-NEXT: ; %bb.1: +; GFX12-NEXT: s_load_b32 s4, s[0:1], 0x2c +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_bcnt1_i32_b32 s3, s3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_mul_i32 s3, s3, 5 +; GFX12-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v1, s4 +; GFX12-NEXT: ds_add_rtn_u32 v1, v1, v2 +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SE +; GFX12-NEXT: .LBB7_2: +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s2 +; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_readfirstlane_b32 s2, v1 +; GFX12-NEXT: v_mov_b32_e32 v1, 0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-NEXT: v_mad_u32_u24 v0, v0, 5, s2 +; GFX12-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_endpgm + %val = atomicrmw volatile add ptr addrspace(3) %local, i32 5 seq_cst + store i32 %val, ptr addrspace(1) %out + ret void +} + +declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add(i32, ptr addrspace(8), i32, i32, i32 immarg) + +; from atomic_optimizations_buffer.ll +; covers buffer_atomic (atomic with return) +; +define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace(8) %inout) { +; GFX9-LABEL: add_i32_constant: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_mov_b64 s[4:5], exec +; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 +; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: ; implicit-def: $vgpr1 +; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX9-NEXT: s_cbranch_execz .LBB8_2 +; GFX9-NEXT: ; %bb.1: +; GFX9-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x34 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX9-NEXT: s_mul_i32 s4, s4, 5 +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: buffer_atomic_add v1, off, s[8:11], 0 glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: .LBB8_2: +; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_readfirstlane_b32 s2, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mad_u32_u24 v0, v0, 5, s2 +; GFX9-NEXT: global_store_dword v2, v0, s[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: add_i32_constant: +; GFX90A: ; %bb.0: ; %entry +; GFX90A-NEXT: s_mov_b64 s[4:5], exec +; GFX90A-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 +; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX90A-NEXT: ; implicit-def: $vgpr1 +; GFX90A-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX90A-NEXT: s_cbranch_execz .LBB8_2 +; GFX90A-NEXT: ; %bb.1: +; GFX90A-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x34 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX90A-NEXT: s_mul_i32 s4, s4, 5 +; GFX90A-NEXT: v_mov_b32_e32 v1, s4 +; GFX90A-NEXT: buffer_atomic_add v1, off, s[8:11], 0 glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: .LBB8_2: +; GFX90A-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_readfirstlane_b32 s2, v1 +; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_mad_u32_u24 v0, v0, 5, s2 +; GFX90A-NEXT: global_store_dword v2, v0, s[0:1] +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: s_endpgm +; +; GFX10-LABEL: add_i32_constant: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_mov_b32 s3, exec_lo +; GFX10-NEXT: ; implicit-def: $vgpr1 +; GFX10-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-NEXT: s_and_saveexec_b32 s2, vcc_lo +; GFX10-NEXT: s_cbranch_execz .LBB8_2 +; GFX10-NEXT: ; %bb.1: +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_bcnt1_i32_b32 s3, s3 +; GFX10-NEXT: s_mul_i32 s3, s3, 5 +; GFX10-NEXT: v_mov_b32_e32 v1, s3 +; GFX10-NEXT: buffer_atomic_add v1, off, s[4:7], 0 glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: .LBB8_2: +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2 +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_readfirstlane_b32 s2, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GFX10-NEXT: v_mad_u32_u24 v0, v0, 5, s2 +; GFX10-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_endpgm +; +; GFX9-FLATSCR-LABEL: add_i32_constant: +; GFX9-FLATSCR: ; %bb.0: ; %entry +; GFX9-FLATSCR-NEXT: s_mov_b64 s[4:5], exec +; GFX9-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 +; GFX9-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX9-FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-FLATSCR-NEXT: ; implicit-def: $vgpr1 +; GFX9-FLATSCR-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX9-FLATSCR-NEXT: s_cbranch_execz .LBB8_2 +; GFX9-FLATSCR-NEXT: ; %bb.1: +; GFX9-FLATSCR-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x34 +; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX9-FLATSCR-NEXT: s_mul_i32 s4, s4, 5 +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-FLATSCR-NEXT: buffer_atomic_add v1, off, s[8:11], 0 glc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: .LBB8_2: +; GFX9-FLATSCR-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX9-FLATSCR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-NEXT: v_readfirstlane_b32 s2, v1 +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-FLATSCR-NEXT: v_mad_u32_u24 v0, v0, 5, s2 +; GFX9-FLATSCR-NEXT: global_store_dword v2, v0, s[0:1] +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: s_endpgm +; +; GFX11-LABEL: add_i32_constant: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_mov_b32 s3, exec_lo +; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0 +; GFX11-NEXT: ; implicit-def: $vgpr1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0 +; GFX11-NEXT: s_cbranch_execz .LBB8_2 +; GFX11-NEXT: ; %bb.1: +; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_bcnt1_i32_b32 s3, s3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_mul_i32 s3, s3, 5 +; GFX11-NEXT: v_mov_b32_e32 v1, s3 +; GFX11-NEXT: buffer_atomic_add_u32 v1, off, s[4:7], 0 glc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: .LBB8_2: +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s2 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_readfirstlane_b32 s2, v1 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_mad_u32_u24 v0, v0, 5, s2 +; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: add_i32_constant: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_mov_b32 s3, exec_lo +; GFX12-NEXT: s_mov_b32 s2, exec_lo +; GFX12-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0 +; GFX12-NEXT: ; implicit-def: $vgpr1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_cmpx_eq_u32_e32 0, v0 +; GFX12-NEXT: s_cbranch_execz .LBB8_2 +; GFX12-NEXT: ; %bb.1: +; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_bcnt1_i32_b32 s3, s3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_mul_i32 s3, s3, 5 +; GFX12-NEXT: v_mov_b32_e32 v1, s3 +; GFX12-NEXT: buffer_atomic_add_u32 v1, off, s[4:7], null th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: .LBB8_2: +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s2 +; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_readfirstlane_b32 s2, v1 +; GFX12-NEXT: v_mov_b32_e32 v1, 0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-NEXT: v_mad_u32_u24 v0, v0, 5, s2 +; GFX12-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_endpgm +entry: + %old = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add(i32 5, ptr addrspace(8) %inout, i32 0, i32 0, i32 0) + store i32 %old, ptr addrspace(1) %out + ret void +} + +declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) + +; from llvm.amdgcn.image.load.a16.ll +; covers image_load +; +define amdgpu_ps <4 x float> @load.f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: load.f32.1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: load.f32.1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm a16 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load.f32.1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; +; GFX9-FLATSCR-LABEL: load.f32.1d: +; GFX9-FLATSCR: ; %bb.0: ; %main_body +; GFX9-FLATSCR-NEXT: s_mov_b32 s11, s9 +; GFX9-FLATSCR-NEXT: s_mov_b32 s10, s8 +; GFX9-FLATSCR-NEXT: s_mov_b32 s9, s7 +; GFX9-FLATSCR-NEXT: s_mov_b32 s8, s6 +; GFX9-FLATSCR-NEXT: s_mov_b32 s7, s5 +; GFX9-FLATSCR-NEXT: s_mov_b32 s6, s4 +; GFX9-FLATSCR-NEXT: s_mov_b32 s5, s3 +; GFX9-FLATSCR-NEXT: s_mov_b32 s4, s2 +; GFX9-FLATSCR-NEXT: image_load v0, v0, s[4:11] dmask:0x1 unorm a16 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: load.f32.1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog +; +; GFX12-LABEL: load.f32.1d: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D a16 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: ; return to shader part epilog +main_body: + %x = extractelement <2 x i16> %coords, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 1, i16 %x, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32, i16, <8 x i32>, i32, i32) + +; from llvm.amdgcn.image.store.a16.ll +; covers image_store +; +define amdgpu_ps void @store_f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) { +; GFX9-LABEL: store_f32_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x1 unorm a16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: store_f32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 +; GFX90A-NEXT: v_mov_b32_e32 v4, v3 +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_store v[2:5], v0, s[0:7] dmask:0x1 unorm a16 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: s_endpgm +; +; GFX10-LABEL: store_f32_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16 +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_endpgm +; +; GFX9-FLATSCR-LABEL: store_f32_1d: +; GFX9-FLATSCR: ; %bb.0: ; %main_body +; GFX9-FLATSCR-NEXT: s_mov_b32 s11, s9 +; GFX9-FLATSCR-NEXT: s_mov_b32 s10, s8 +; GFX9-FLATSCR-NEXT: s_mov_b32 s9, s7 +; GFX9-FLATSCR-NEXT: s_mov_b32 s8, s6 +; GFX9-FLATSCR-NEXT: s_mov_b32 s7, s5 +; GFX9-FLATSCR-NEXT: s_mov_b32 s6, s4 +; GFX9-FLATSCR-NEXT: s_mov_b32 s5, s3 +; GFX9-FLATSCR-NEXT: s_mov_b32 s4, s2 +; GFX9-FLATSCR-NEXT: image_store v[1:4], v0, s[4:11] dmask:0x1 unorm a16 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: s_endpgm +; +; GFX11-LABEL: store_f32_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16 +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: store_f32_1d: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D a16 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_endpgm + +main_body: + %x = extractelement <2 x i16> %coords, i32 0 + call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %val, i32 1, i16 %x, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) + +; from llvm.amdgcn.image.atomic.dim.ll +; covers image_atomic (atomic with return) +; +define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX9-LABEL: atomic_swap_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_swap_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_swap v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: atomic_swap_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; +; GFX9-FLATSCR-LABEL: atomic_swap_1d: +; GFX9-FLATSCR: ; %bb.0: ; %main_body +; GFX9-FLATSCR-NEXT: s_mov_b32 s11, s9 +; GFX9-FLATSCR-NEXT: s_mov_b32 s10, s8 +; GFX9-FLATSCR-NEXT: s_mov_b32 s9, s7 +; GFX9-FLATSCR-NEXT: s_mov_b32 s8, s6 +; GFX9-FLATSCR-NEXT: s_mov_b32 s7, s5 +; GFX9-FLATSCR-NEXT: s_mov_b32 s6, s4 +; GFX9-FLATSCR-NEXT: s_mov_b32 s5, s3 +; GFX9-FLATSCR-NEXT: s_mov_b32 s4, s2 +; GFX9-FLATSCR-NEXT: image_atomic_swap v0, v1, s[4:11] dmask:0x1 unorm glc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: atomic_swap_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog +; +; GFX12-LABEL: atomic_swap_1d: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: ; return to shader part epilog +main_body: + %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %out = bitcast i32 %v to float + ret float %out +} + +; from lds-bounds.ll +; covers ds_write_b64 (atomic without return) +@compute_lds = external addrspace(3) global [512 x i32], align 16 +; +define amdgpu_cs void @store_aligned(ptr addrspace(3) %ptr) #0 { +; GFX9-LABEL: store_aligned: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: v_mov_b32_e32 v1, 42 +; GFX9-NEXT: v_mov_b32_e32 v2, 43 +; GFX9-NEXT: ds_write_b64 v0, v[1:2] +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: store_aligned: +; GFX90A: ; %bb.0: ; %entry +; GFX90A-NEXT: v_mov_b32_e32 v2, 42 +; GFX90A-NEXT: v_mov_b32_e32 v3, 43 +; GFX90A-NEXT: ds_write_b64 v0, v[2:3] +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_endpgm +; +; GFX10-LABEL: store_aligned: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: v_mov_b32_e32 v1, 42 +; GFX10-NEXT: v_mov_b32_e32 v2, 43 +; GFX10-NEXT: ds_write_b64 v0, v[1:2] +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_endpgm +; +; GFX9-FLATSCR-LABEL: store_aligned: +; GFX9-FLATSCR: ; %bb.0: ; %entry +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 42 +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 43 +; GFX9-FLATSCR-NEXT: ds_write_b64 v0, v[1:2] +; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-NEXT: s_endpgm +; +; GFX11-LABEL: store_aligned: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: v_dual_mov_b32 v1, 42 :: v_dual_mov_b32 v2, 43 +; GFX11-NEXT: ds_store_b64 v0, v[1:2] +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: store_aligned: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: v_dual_mov_b32 v1, 42 :: v_dual_mov_b32 v2, 43 +; GFX12-NEXT: ds_store_b64 v0, v[1:2] +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: s_endpgm +entry: + %ptr.gep.1 = getelementptr i32, ptr addrspace(3) %ptr, i32 1 + + store i32 42, ptr addrspace(3) %ptr, align 8 + store i32 43, ptr addrspace(3) %ptr.gep.1 + ret void +} + + +; from lds-bounds.ll +; covers ds_read_b64 +; +define amdgpu_cs <2 x float> @load_aligned(ptr addrspace(3) %ptr) #0 { +; GFX9-LABEL: load_aligned: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: ds_read_b64 v[0:1], v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: load_aligned: +; GFX90A: ; %bb.0: ; %entry +; GFX90A-NEXT: ds_read_b64 v[0:1], v0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_aligned: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: ds_read_b64 v[0:1], v0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; +; GFX9-FLATSCR-LABEL: load_aligned: +; GFX9-FLATSCR: ; %bb.0: ; %entry +; GFX9-FLATSCR-NEXT: ds_read_b64 v[0:1], v0 +; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: load_aligned: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: ds_load_b64 v[0:1], v0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: ; return to shader part epilog +; +; GFX12-LABEL: load_aligned: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: ds_load_b64 v[0:1], v0 +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: ; return to shader part epilog +entry: + %ptr.gep.1 = getelementptr i32, ptr addrspace(3) %ptr, i32 1 + + %v.0 = load i32, ptr addrspace(3) %ptr, align 8 + %v.1 = load i32, ptr addrspace(3) %ptr.gep.1 + + %r.0 = insertelement <2 x i32> poison, i32 %v.0, i32 0 + %r.1 = insertelement <2 x i32> %r.0, i32 %v.1, i32 1 + %bc = bitcast <2 x i32> %r.1 to <2 x float> + ret <2 x float> %bc +} + +; from lds-bounds.ll +; covers ds_write2_b32 +; +define amdgpu_cs void @store_global_const_idx() #0 { +; GFX9-LABEL: store_global_const_idx: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: v_mov_b32_e32 v0, compute_lds@abs32@lo +; GFX9-NEXT: v_mov_b32_e32 v1, 42 +; GFX9-NEXT: v_mov_b32_e32 v2, 43 +; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset0:3 offset1:4 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: store_global_const_idx: +; GFX90A: ; %bb.0: ; %entry +; GFX90A-NEXT: v_mov_b32_e32 v0, compute_lds@abs32@lo +; GFX90A-NEXT: v_mov_b32_e32 v1, 42 +; GFX90A-NEXT: v_mov_b32_e32 v2, 43 +; GFX90A-NEXT: ds_write2_b32 v0, v1, v2 offset0:3 offset1:4 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_endpgm +; +; GFX10-LABEL: store_global_const_idx: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: v_mov_b32_e32 v0, compute_lds@abs32@lo +; GFX10-NEXT: v_mov_b32_e32 v1, 42 +; GFX10-NEXT: v_mov_b32_e32 v2, 43 +; GFX10-NEXT: ds_write2_b32 v0, v1, v2 offset0:3 offset1:4 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_endpgm +; +; GFX9-FLATSCR-LABEL: store_global_const_idx: +; GFX9-FLATSCR: ; %bb.0: ; %entry +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, compute_lds@abs32@lo +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 42 +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 43 +; GFX9-FLATSCR-NEXT: ds_write2_b32 v0, v1, v2 offset0:3 offset1:4 +; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-NEXT: s_endpgm +; +; GFX11-LABEL: store_global_const_idx: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: v_dual_mov_b32 v0, compute_lds@abs32@lo :: v_dual_mov_b32 v1, 42 +; GFX11-NEXT: v_mov_b32_e32 v2, 43 +; GFX11-NEXT: ds_store_2addr_b32 v0, v1, v2 offset0:3 offset1:4 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: store_global_const_idx: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: v_dual_mov_b32 v0, compute_lds@abs32@lo :: v_dual_mov_b32 v1, 42 +; GFX12-NEXT: v_mov_b32_e32 v2, 43 +; GFX12-NEXT: ds_store_2addr_b32 v0, v1, v2 offset0:3 offset1:4 +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: s_endpgm +entry: + %ptr.a = getelementptr [512 x i32], ptr addrspace(3) @compute_lds, i32 0, i32 3 + %ptr.b = getelementptr [512 x i32], ptr addrspace(3) @compute_lds, i32 0, i32 4 + + store i32 42, ptr addrspace(3) %ptr.a + store i32 43, ptr addrspace(3) %ptr.b + ret void +} + +; from lds-bounds.ll +; covers ds_read2_b32 +; +define amdgpu_cs <2 x float> @load_global_const_idx() #0 { +; GFX9-LABEL: load_global_const_idx: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: v_mov_b32_e32 v0, compute_lds@abs32@lo +; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset0:3 offset1:4 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: load_global_const_idx: +; GFX90A: ; %bb.0: ; %entry +; GFX90A-NEXT: v_mov_b32_e32 v0, compute_lds@abs32@lo +; GFX90A-NEXT: ds_read2_b32 v[0:1], v0 offset0:3 offset1:4 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_global_const_idx: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: v_mov_b32_e32 v0, compute_lds@abs32@lo +; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset0:3 offset1:4 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; +; GFX9-FLATSCR-LABEL: load_global_const_idx: +; GFX9-FLATSCR: ; %bb.0: ; %entry +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, compute_lds@abs32@lo +; GFX9-FLATSCR-NEXT: ds_read2_b32 v[0:1], v0 offset0:3 offset1:4 +; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: load_global_const_idx: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: v_mov_b32_e32 v0, compute_lds@abs32@lo +; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v0 offset0:3 offset1:4 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: ; return to shader part epilog +; +; GFX12-LABEL: load_global_const_idx: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: v_mov_b32_e32 v0, compute_lds@abs32@lo +; GFX12-NEXT: ds_load_2addr_b32 v[0:1], v0 offset0:3 offset1:4 +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: ; return to shader part epilog +entry: + %ptr.a = getelementptr [512 x i32], ptr addrspace(3) @compute_lds, i32 0, i32 3 + %ptr.b = getelementptr [512 x i32], ptr addrspace(3) @compute_lds, i32 0, i32 4 + + %v.0 = load i32, ptr addrspace(3) %ptr.a + %v.1 = load i32, ptr addrspace(3) %ptr.b + + %r.0 = insertelement <2 x i32> poison, i32 %v.0, i32 0 + %r.1 = insertelement <2 x i32> %r.0, i32 %v.1, i32 1 + %bc = bitcast <2 x i32> %r.1 to <2 x float> + ret <2 x float> %bc +} + From 4d80dff819d1164775d0d55fc68bffedb90ba53c Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Wed, 10 Apr 2024 13:56:18 -0400 Subject: [PATCH 045/886] int -> uintptr_t to silence diagnostics 'int' may not be sufficiently large to store a pointer representation anyway, so this is also a correctness fix. --- clang/lib/AST/Interp/FunctionPointer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/Interp/FunctionPointer.h b/clang/lib/AST/Interp/FunctionPointer.h index e7fad8161fd9c..f61f9ded0bf00 100644 --- a/clang/lib/AST/Interp/FunctionPointer.h +++ b/clang/lib/AST/Interp/FunctionPointer.h @@ -24,7 +24,7 @@ class FunctionPointer final { public: // FIXME: We might want to track the fact that the Function pointer // has been created from an integer and is most likely garbage anyway. - FunctionPointer(int IntVal = 0, const Descriptor *Desc = nullptr) + FunctionPointer(uintptr_t IntVal = 0, const Descriptor *Desc = nullptr) : Func(reinterpret_cast(IntVal)) {} FunctionPointer(const Function *Func) : Func(Func) { assert(Func); } From 21009f466ece9f21b18e1bb03bd74b566188bae5 Mon Sep 17 00:00:00 2001 From: martinboehme Date: Wed, 10 Apr 2024 20:03:35 +0200 Subject: [PATCH 046/886] [clang][dataflow] Propagate locations from result objects to initializers. (#87320) Previously, we were propagating storage locations the other way around, i.e. from initializers to result objects, using `RecordValue::getLoc()`. This gave the wrong behavior in some cases -- see the newly added or fixed tests in this patch. In addition, this patch now unblocks removing the `RecordValue` class entirely, as we no longer need `RecordValue::getLoc()`. With this patch, the test `TransferTest.DifferentReferenceLocInJoin` started to fail because the framework now always uses the same storge location for a `MaterializeTemporaryExpr`, meaning that the code under test no longer set up the desired state where a variable of reference type is mapped to two different storage locations in environments being joined. Rather than trying to modify this test to set up the test condition again, I have chosen to replace the test with an equivalent test in DataflowEnvironmentTest.cpp that sets up the test condition directly; because this test is more direct, it will also be less brittle in the face of future changes. --- .../FlowSensitive/DataflowEnvironment.h | 64 ++- .../FlowSensitive/DataflowEnvironment.cpp | 405 +++++++++++++----- clang/lib/Analysis/FlowSensitive/Transfer.cpp | 176 ++++---- .../TypeErasedDataflowAnalysis.cpp | 13 +- .../FlowSensitive/DataflowEnvironmentTest.cpp | 43 ++ .../Analysis/FlowSensitive/TransferTest.cpp | 172 +++++--- 6 files changed, 590 insertions(+), 283 deletions(-) diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index 9a65f76cdf56b..706664d7db1c2 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -30,6 +30,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include #include #include @@ -344,17 +345,6 @@ class Environment { /// location of the result object to pass in `this`, even though prvalues are /// otherwise not associated with storage locations. /// - /// FIXME: Currently, this simply returns a stable storage location for `E`, - /// but this doesn't do the right thing in scenarios like the following: - /// ``` - /// MyClass c = some_condition()? MyClass(foo) : MyClass(bar); - /// ``` - /// Here, `MyClass(foo)` and `MyClass(bar)` will have two different storage - /// locations, when in fact their storage locations should be the same. - /// Eventually, we want to propagate storage locations from result objects - /// down to the prvalues that initialize them, similar to the way that this is - /// done in Clang's CodeGen. - /// /// Requirements: /// `E` must be a prvalue of record type. RecordStorageLocation & @@ -462,7 +452,13 @@ class Environment { /// Initializes the fields (including synthetic fields) of `Loc` with values, /// unless values of the field type are not supported or we hit one of the /// limits at which we stop producing values. - void initializeFieldsWithValues(RecordStorageLocation &Loc); + /// If `Type` is provided, initializes only those fields that are modeled for + /// `Type`; this is intended for use in cases where `Loc` is a derived type + /// and we only want to initialize the fields of a base type. + void initializeFieldsWithValues(RecordStorageLocation &Loc, QualType Type); + void initializeFieldsWithValues(RecordStorageLocation &Loc) { + initializeFieldsWithValues(Loc, Loc.getType()); + } /// Assigns `Val` as the value of `Loc` in the environment. void setValue(const StorageLocation &Loc, Value &Val); @@ -653,6 +649,9 @@ class Environment { LLVM_DUMP_METHOD void dump(raw_ostream &OS) const; private: + using PrValueToResultObject = + llvm::DenseMap; + // The copy-constructor is for use in fork() only. Environment(const Environment &) = default; @@ -682,8 +681,10 @@ class Environment { /// Initializes the fields (including synthetic fields) of `Loc` with values, /// unless values of the field type are not supported or we hit one of the /// limits at which we stop producing values (controlled by `Visited`, - /// `Depth`, and `CreatedValuesCount`). - void initializeFieldsWithValues(RecordStorageLocation &Loc, + /// `Depth`, and `CreatedValuesCount`). If `Type` is different from + /// `Loc.getType()`, initializes only those fields that are modeled for + /// `Type`. + void initializeFieldsWithValues(RecordStorageLocation &Loc, QualType Type, llvm::DenseSet &Visited, int Depth, int &CreatedValuesCount); @@ -702,22 +703,45 @@ class Environment { /// and functions referenced in `FuncDecl`. `FuncDecl` must have a body. void initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl); + static PrValueToResultObject + buildResultObjectMap(DataflowAnalysisContext *DACtx, + const FunctionDecl *FuncDecl, + RecordStorageLocation *ThisPointeeLoc, + RecordStorageLocation *LocForRecordReturnVal); + // `DACtx` is not null and not owned by this object. DataflowAnalysisContext *DACtx; - // FIXME: move the fields `CallStack`, `ReturnVal`, `ReturnLoc` and - // `ThisPointeeLoc` into a separate call-context object, shared between - // environments in the same call. + // FIXME: move the fields `CallStack`, `ResultObjectMap`, `ReturnVal`, + // `ReturnLoc` and `ThisPointeeLoc` into a separate call-context object, + // shared between environments in the same call. // https://github.com/llvm/llvm-project/issues/59005 // `DeclContext` of the block being analysed if provided. std::vector CallStack; - // Value returned by the function (if it has non-reference return type). + // Maps from prvalues of record type to their result objects. Shared between + // all environments for the same function. + // FIXME: It's somewhat unsatisfactory that we have to use a `shared_ptr` + // here, though the cost is acceptable: The overhead of a `shared_ptr` is + // incurred when it is copied, and this happens only relatively rarely (when + // we fork the environment). The need for a `shared_ptr` will go away once we + // introduce a shared call-context object (see above). + std::shared_ptr ResultObjectMap; + + // The following three member variables handle various different types of + // return values. + // - If the return type is not a reference and not a record: Value returned + // by the function. Value *ReturnVal = nullptr; - // Storage location of the reference returned by the function (if it has - // reference return type). + // - If the return type is a reference: Storage location of the reference + // returned by the function. StorageLocation *ReturnLoc = nullptr; + // - If the return type is a record or the function being analyzed is a + // constructor: Storage location into which the return value should be + // constructed. + RecordStorageLocation *LocForRecordReturnVal = nullptr; + // The storage location of the `this` pointee. Should only be null if the // function being analyzed is only a function and not a method. RecordStorageLocation *ThisPointeeLoc = nullptr; diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index 1bfa7ebcfd50c..6c796b4ad923e 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -15,6 +15,7 @@ #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Type.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" #include "clang/Analysis/FlowSensitive/Value.h" @@ -26,6 +27,8 @@ #include #include +#define DEBUG_TYPE "dataflow" + namespace clang { namespace dataflow { @@ -354,6 +357,8 @@ getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, for (auto *Child : S.children()) if (Child != nullptr) getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); + if (const auto *DefaultArg = dyn_cast(&S)) + getFieldsGlobalsAndFuncs(*DefaultArg->getExpr(), Fields, Vars, Funcs); if (const auto *DefaultInit = dyn_cast(&S)) getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); @@ -386,6 +391,186 @@ getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, } } +namespace { + +// Visitor that builds a map from record prvalues to result objects. +// This traverses the body of the function to be analyzed; for each result +// object that it encounters, it propagates the storage location of the result +// object to all record prvalues that can initialize it. +class ResultObjectVisitor : public RecursiveASTVisitor { +public: + // `ResultObjectMap` will be filled with a map from record prvalues to result + // object. If the function being analyzed returns a record by value, + // `LocForRecordReturnVal` is the location to which this record should be + // written; otherwise, it is null. + explicit ResultObjectVisitor( + llvm::DenseMap &ResultObjectMap, + RecordStorageLocation *LocForRecordReturnVal, + DataflowAnalysisContext &DACtx) + : ResultObjectMap(ResultObjectMap), + LocForRecordReturnVal(LocForRecordReturnVal), DACtx(DACtx) {} + + bool shouldVisitImplicitCode() { return true; } + + bool shouldVisitLambdaBody() const { return false; } + + // Traverse all member and base initializers of `Ctor`. This function is not + // called by `RecursiveASTVisitor`; it should be called manually if we are + // analyzing a constructor. `ThisPointeeLoc` is the storage location that + // `this` points to. + void TraverseConstructorInits(const CXXConstructorDecl *Ctor, + RecordStorageLocation *ThisPointeeLoc) { + assert(ThisPointeeLoc != nullptr); + for (const CXXCtorInitializer *Init : Ctor->inits()) { + Expr *InitExpr = Init->getInit(); + if (FieldDecl *Field = Init->getMember(); + Field != nullptr && Field->getType()->isRecordType()) { + PropagateResultObject(InitExpr, cast( + ThisPointeeLoc->getChild(*Field))); + } else if (Init->getBaseClass()) { + PropagateResultObject(InitExpr, ThisPointeeLoc); + } + + // Ensure that any result objects within `InitExpr` (e.g. temporaries) + // are also propagated to the prvalues that initialize them. + TraverseStmt(InitExpr); + + // If this is a `CXXDefaultInitExpr`, also propagate any result objects + // within the default expression. + if (auto *DefaultInit = dyn_cast(InitExpr)) + TraverseStmt(DefaultInit->getExpr()); + } + } + + bool TraverseBindingDecl(BindingDecl *BD) { + // `RecursiveASTVisitor` doesn't traverse holding variables for + // `BindingDecl`s by itself, so we need to tell it to. + if (VarDecl *HoldingVar = BD->getHoldingVar()) + TraverseDecl(HoldingVar); + return RecursiveASTVisitor::TraverseBindingDecl(BD); + } + + bool VisitVarDecl(VarDecl *VD) { + if (VD->getType()->isRecordType() && VD->hasInit()) + PropagateResultObject( + VD->getInit(), + &cast(DACtx.getStableStorageLocation(*VD))); + return true; + } + + bool VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *MTE) { + if (MTE->getType()->isRecordType()) + PropagateResultObject( + MTE->getSubExpr(), + &cast(DACtx.getStableStorageLocation(*MTE))); + return true; + } + + bool VisitReturnStmt(ReturnStmt *Return) { + Expr *RetValue = Return->getRetValue(); + if (RetValue != nullptr && RetValue->getType()->isRecordType() && + RetValue->isPRValue()) + PropagateResultObject(RetValue, LocForRecordReturnVal); + return true; + } + + bool VisitExpr(Expr *E) { + // Clang's AST can have record-type prvalues without a result object -- for + // example as full-expressions contained in a compound statement or as + // arguments of call expressions. We notice this if we get here and a + // storage location has not yet been associated with `E`. In this case, + // treat this as if it was a `MaterializeTemporaryExpr`. + if (E->isPRValue() && E->getType()->isRecordType() && + !ResultObjectMap.contains(E)) + PropagateResultObject( + E, &cast(DACtx.getStableStorageLocation(*E))); + return true; + } + + // Assigns `Loc` as the result object location of `E`, then propagates the + // location to all lower-level prvalues that initialize the same object as + // `E` (or one of its base classes or member variables). + void PropagateResultObject(Expr *E, RecordStorageLocation *Loc) { + if (!E->isPRValue() || !E->getType()->isRecordType()) { + assert(false); + // Ensure we don't propagate the result object if we hit this in a + // release build. + return; + } + + ResultObjectMap[E] = Loc; + + // The following AST node kinds are "original initializers": They are the + // lowest-level AST node that initializes a given object, and nothing + // below them can initialize the same object (or part of it). + if (isa(E) || isa(E) || isa(E) || + isa(E) || isa(E) || + isa(E)) { + return; + } + + if (auto *InitList = dyn_cast(E)) { + if (!InitList->isSemanticForm()) + return; + if (InitList->isTransparent()) { + PropagateResultObject(InitList->getInit(0), Loc); + return; + } + + RecordInitListHelper InitListHelper(InitList); + + for (auto [Base, Init] : InitListHelper.base_inits()) { + assert(Base->getType().getCanonicalType() == + Init->getType().getCanonicalType()); + + // Storage location for the base class is the same as that of the + // derived class because we "flatten" the object hierarchy and put all + // fields in `RecordStorageLocation` of the derived class. + PropagateResultObject(Init, Loc); + } + + for (auto [Field, Init] : InitListHelper.field_inits()) { + // Fields of non-record type are handled in + // `TransferVisitor::VisitInitListExpr()`. + if (!Field->getType()->isRecordType()) + continue; + PropagateResultObject( + Init, cast(Loc->getChild(*Field))); + } + return; + } + + if (auto *Op = dyn_cast(E); Op && Op->isCommaOp()) { + PropagateResultObject(Op->getRHS(), Loc); + return; + } + + if (auto *Cond = dyn_cast(E)) { + PropagateResultObject(Cond->getTrueExpr(), Loc); + PropagateResultObject(Cond->getFalseExpr(), Loc); + return; + } + + // All other expression nodes that propagate a record prvalue should have + // exactly one child. + SmallVector Children(E->child_begin(), E->child_end()); + LLVM_DEBUG({ + if (Children.size() != 1) + E->dump(); + }); + assert(Children.size() == 1); + for (Stmt *S : Children) + PropagateResultObject(cast(S), Loc); + } + +private: + llvm::DenseMap &ResultObjectMap; + RecordStorageLocation *LocForRecordReturnVal; + DataflowAnalysisContext &DACtx; +}; + +} // namespace + Environment::Environment(DataflowAnalysisContext &DACtx) : DACtx(&DACtx), FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} @@ -401,17 +586,23 @@ void Environment::initialize() { if (DeclCtx == nullptr) return; - if (const auto *FuncDecl = dyn_cast(DeclCtx)) { - assert(FuncDecl->doesThisDeclarationHaveABody()); + const auto *FuncDecl = dyn_cast(DeclCtx); + if (FuncDecl == nullptr) + return; + + assert(FuncDecl->doesThisDeclarationHaveABody()); - initFieldsGlobalsAndFuncs(FuncDecl); + initFieldsGlobalsAndFuncs(FuncDecl); - for (const auto *ParamDecl : FuncDecl->parameters()) { - assert(ParamDecl != nullptr); - setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); - } + for (const auto *ParamDecl : FuncDecl->parameters()) { + assert(ParamDecl != nullptr); + setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); } + if (FuncDecl->getReturnType()->isRecordType()) + LocForRecordReturnVal = &cast( + createStorageLocation(FuncDecl->getReturnType())); + if (const auto *MethodDecl = dyn_cast(DeclCtx)) { auto *Parent = MethodDecl->getParent(); assert(Parent != nullptr); @@ -444,6 +635,12 @@ void Environment::initialize() { initializeFieldsWithValues(ThisLoc); } } + + // We do this below the handling of `CXXMethodDecl` above so that we can + // be sure that the storage location for `this` has been set. + ResultObjectMap = std::make_shared( + buildResultObjectMap(DACtx, FuncDecl, getThisPointeeStorageLocation(), + LocForRecordReturnVal)); } // FIXME: Add support for resetting globals after function calls to enable @@ -484,13 +681,18 @@ void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { if (getStorageLocation(*D) != nullptr) continue; - setStorageLocation(*D, createObject(*D)); + // We don't run transfer functions on the initializers of global variables, + // so they won't be associated with a value or storage location. We + // therefore intentionally don't pass an initializer to `createObject()`; + // in particular, this ensures that `createObject()` will initialize the + // fields of record-type variables with values. + setStorageLocation(*D, createObject(*D, nullptr)); } for (const FunctionDecl *FD : Funcs) { if (getStorageLocation(*FD) != nullptr) continue; - auto &Loc = createStorageLocation(FD->getType()); + auto &Loc = createStorageLocation(*FD); setStorageLocation(*FD, Loc); } } @@ -519,6 +721,9 @@ Environment Environment::pushCall(const CallExpr *Call) const { } } + if (Call->getType()->isRecordType() && Call->isPRValue()) + Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call); + Env.pushCallInternal(Call->getDirectCallee(), llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); @@ -529,6 +734,7 @@ Environment Environment::pushCall(const CXXConstructExpr *Call) const { Environment Env(*this); Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call); + Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call); Env.pushCallInternal(Call->getConstructor(), llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); @@ -557,6 +763,10 @@ void Environment::pushCallInternal(const FunctionDecl *FuncDecl, const VarDecl *Param = *ParamIt; setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); } + + ResultObjectMap = std::make_shared( + buildResultObjectMap(DACtx, FuncDecl, getThisPointeeStorageLocation(), + LocForRecordReturnVal)); } void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { @@ -600,6 +810,9 @@ bool Environment::equivalentTo(const Environment &Other, if (ReturnLoc != Other.ReturnLoc) return false; + if (LocForRecordReturnVal != Other.LocForRecordReturnVal) + return false; + if (ThisPointeeLoc != Other.ThisPointeeLoc) return false; @@ -623,8 +836,10 @@ LatticeEffect Environment::widen(const Environment &PrevEnv, assert(DACtx == PrevEnv.DACtx); assert(ReturnVal == PrevEnv.ReturnVal); assert(ReturnLoc == PrevEnv.ReturnLoc); + assert(LocForRecordReturnVal == PrevEnv.LocForRecordReturnVal); assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); assert(CallStack == PrevEnv.CallStack); + assert(ResultObjectMap == PrevEnv.ResultObjectMap); auto Effect = LatticeEffect::Unchanged; @@ -656,12 +871,16 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB, Environment::ValueModel &Model, ExprJoinBehavior ExprBehavior) { assert(EnvA.DACtx == EnvB.DACtx); + assert(EnvA.LocForRecordReturnVal == EnvB.LocForRecordReturnVal); assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); assert(EnvA.CallStack == EnvB.CallStack); + assert(EnvA.ResultObjectMap == EnvB.ResultObjectMap); Environment JoinedEnv(*EnvA.DACtx); JoinedEnv.CallStack = EnvA.CallStack; + JoinedEnv.ResultObjectMap = EnvA.ResultObjectMap; + JoinedEnv.LocForRecordReturnVal = EnvA.LocForRecordReturnVal; JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { @@ -730,6 +949,12 @@ StorageLocation &Environment::createStorageLocation(const Expr &E) { void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { assert(!DeclToLoc.contains(&D)); + // The only kinds of declarations that may have a "variable" storage location + // are declarations of reference type and `BindingDecl`. For all other + // declaration, the storage location should be the stable storage location + // returned by `createStorageLocation()`. + assert(D.getType()->isReferenceType() || isa(D) || + &Loc == &createStorageLocation(D)); DeclToLoc[&D] = &Loc; } @@ -791,50 +1016,29 @@ Environment::getResultObjectLocation(const Expr &RecordPRValue) const { assert(RecordPRValue.getType()->isRecordType()); assert(RecordPRValue.isPRValue()); - // Returns a storage location that we can use if assertions fail. - auto FallbackForAssertFailure = - [this, &RecordPRValue]() -> RecordStorageLocation & { + assert(ResultObjectMap != nullptr); + RecordStorageLocation *Loc = ResultObjectMap->lookup(&RecordPRValue); + assert(Loc != nullptr); + // In release builds, use the "stable" storage location if the map lookup + // failed. + if (Loc == nullptr) return cast( DACtx->getStableStorageLocation(RecordPRValue)); - }; - - if (isOriginalRecordConstructor(RecordPRValue)) { - auto *Val = cast_or_null(getValue(RecordPRValue)); - // The builtin transfer function should have created a `RecordValue` for all - // original record constructors. - assert(Val); - if (!Val) - return FallbackForAssertFailure(); - return Val->getLoc(); - } - - if (auto *Op = dyn_cast(&RecordPRValue); - Op && Op->isCommaOp()) { - return getResultObjectLocation(*Op->getRHS()); - } - - // All other expression nodes that propagate a record prvalue should have - // exactly one child. - llvm::SmallVector children(RecordPRValue.child_begin(), - RecordPRValue.child_end()); - assert(children.size() == 1); - if (children.empty()) - return FallbackForAssertFailure(); - - return getResultObjectLocation(*cast(children[0])); + return *Loc; } PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { return DACtx->getOrCreateNullPointerValue(PointeeType); } -void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc) { +void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, + QualType Type) { llvm::DenseSet Visited; int CreatedValuesCount = 0; - initializeFieldsWithValues(Loc, Visited, 0, CreatedValuesCount); + initializeFieldsWithValues(Loc, Type, Visited, 0, CreatedValuesCount); if (CreatedValuesCount > MaxCompositeValueSize) { - llvm::errs() << "Attempting to initialize a huge value of type: " - << Loc.getType() << '\n'; + llvm::errs() << "Attempting to initialize a huge value of type: " << Type + << '\n'; } } @@ -848,8 +1052,7 @@ void Environment::setValue(const Expr &E, Value &Val) { const Expr &CanonE = ignoreCFGOmittedNodes(E); if (auto *RecordVal = dyn_cast(&Val)) { - assert(isOriginalRecordConstructor(CanonE) || - &RecordVal->getLoc() == &getResultObjectLocation(CanonE)); + assert(&RecordVal->getLoc() == &getResultObjectLocation(CanonE)); (void)RecordVal; } @@ -928,7 +1131,8 @@ Value *Environment::createValueUnlessSelfReferential( if (Type->isRecordType()) { CreatedValuesCount++; auto &Loc = cast(createStorageLocation(Type)); - initializeFieldsWithValues(Loc, Visited, Depth, CreatedValuesCount); + initializeFieldsWithValues(Loc, Loc.getType(), Visited, Depth, + CreatedValuesCount); return &refreshRecordValue(Loc, *this); } @@ -960,6 +1164,7 @@ Environment::createLocAndMaybeValue(QualType Ty, } void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, + QualType Type, llvm::DenseSet &Visited, int Depth, int &CreatedValuesCount) { @@ -967,8 +1172,8 @@ void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, if (FieldType->isRecordType()) { auto &FieldRecordLoc = cast(FieldLoc); setValue(FieldRecordLoc, create(FieldRecordLoc)); - initializeFieldsWithValues(FieldRecordLoc, Visited, Depth + 1, - CreatedValuesCount); + initializeFieldsWithValues(FieldRecordLoc, FieldRecordLoc.getType(), + Visited, Depth + 1, CreatedValuesCount); } else { if (!Visited.insert(FieldType.getCanonicalType()).second) return; @@ -979,7 +1184,7 @@ void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, } }; - for (const auto &[Field, FieldLoc] : Loc.children()) { + for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { assert(Field != nullptr); QualType FieldType = Field->getType(); @@ -988,14 +1193,12 @@ void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, &createLocAndMaybeValue(FieldType, Visited, Depth + 1, CreatedValuesCount)); } else { + StorageLocation *FieldLoc = Loc.getChild(*Field); assert(FieldLoc != nullptr); initField(FieldType, *FieldLoc); } } - for (const auto &[FieldName, FieldLoc] : Loc.synthetic_fields()) { - assert(FieldLoc != nullptr); - QualType FieldType = FieldLoc->getType(); - + for (const auto &[FieldName, FieldType] : DACtx->getSyntheticFields(Type)) { // Synthetic fields cannot have reference type, so we don't need to deal // with this case. assert(!FieldType->isReferenceType()); @@ -1022,38 +1225,36 @@ StorageLocation &Environment::createObjectInternal(const ValueDecl *D, return createObjectInternal(D, Ty.getNonReferenceType(), nullptr); } - Value *Val = nullptr; - if (InitExpr) { - // In the (few) cases where an expression is intentionally - // "uninterpreted", `InitExpr` is not associated with a value. There are - // two ways to handle this situation: propagate the status, so that - // uninterpreted initializers result in uninterpreted variables, or - // provide a default value. We choose the latter so that later refinements - // of the variable can be used for reasoning about the surrounding code. - // For this reason, we let this case be handled by the `createValue()` - // call below. - // - // FIXME. If and when we interpret all language cases, change this to - // assert that `InitExpr` is interpreted, rather than supplying a - // default value (assuming we don't update the environment API to return - // references). - Val = getValue(*InitExpr); - - if (!Val && isa(InitExpr) && - InitExpr->getType()->isPointerType()) - Val = &getOrCreateNullPointerValue(InitExpr->getType()->getPointeeType()); - } - if (!Val) - Val = createValue(Ty); - - if (Ty->isRecordType()) - return cast(Val)->getLoc(); - StorageLocation &Loc = D ? createStorageLocation(*D) : createStorageLocation(Ty); - if (Val) - setValue(Loc, *Val); + if (Ty->isRecordType()) { + auto &RecordLoc = cast(Loc); + if (!InitExpr) + initializeFieldsWithValues(RecordLoc); + refreshRecordValue(RecordLoc, *this); + } else { + Value *Val = nullptr; + if (InitExpr) + // In the (few) cases where an expression is intentionally + // "uninterpreted", `InitExpr` is not associated with a value. There are + // two ways to handle this situation: propagate the status, so that + // uninterpreted initializers result in uninterpreted variables, or + // provide a default value. We choose the latter so that later refinements + // of the variable can be used for reasoning about the surrounding code. + // For this reason, we let this case be handled by the `createValue()` + // call below. + // + // FIXME. If and when we interpret all language cases, change this to + // assert that `InitExpr` is interpreted, rather than supplying a + // default value (assuming we don't update the environment API to return + // references). + Val = getValue(*InitExpr); + if (!Val) + Val = createValue(Ty); + if (Val) + setValue(Loc, *Val); + } return Loc; } @@ -1072,6 +1273,8 @@ bool Environment::allows(const Formula &F) const { void Environment::dump(raw_ostream &OS) const { llvm::DenseMap LocToName; + if (LocForRecordReturnVal != nullptr) + LocToName[LocForRecordReturnVal] = "(returned record)"; if (ThisPointeeLoc != nullptr) LocToName[ThisPointeeLoc] = "this"; @@ -1102,6 +1305,9 @@ void Environment::dump(raw_ostream &OS) const { if (auto Iter = LocToName.find(ReturnLoc); Iter != LocToName.end()) OS << " (" << Iter->second << ")"; OS << "\n"; + } else if (Func->getReturnType()->isRecordType() || + isa(Func)) { + OS << "LocForRecordReturnVal: " << LocForRecordReturnVal << "\n"; } else if (!Func->getReturnType()->isVoidType()) { if (ReturnVal == nullptr) OS << "ReturnVal: nullptr\n"; @@ -1122,6 +1328,22 @@ void Environment::dump() const { dump(llvm::dbgs()); } +Environment::PrValueToResultObject Environment::buildResultObjectMap( + DataflowAnalysisContext *DACtx, const FunctionDecl *FuncDecl, + RecordStorageLocation *ThisPointeeLoc, + RecordStorageLocation *LocForRecordReturnVal) { + assert(FuncDecl->doesThisDeclarationHaveABody()); + + PrValueToResultObject Map; + + ResultObjectVisitor Visitor(Map, LocForRecordReturnVal, *DACtx); + if (const auto *Ctor = dyn_cast(FuncDecl)) + Visitor.TraverseConstructorInits(Ctor, ThisPointeeLoc); + Visitor.TraverseStmt(FuncDecl->getBody()); + + return Map; +} + RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE, const Environment &Env) { Expr *ImplicitObject = MCE.getImplicitObjectArgument(); @@ -1216,24 +1438,11 @@ RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) { RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) { assert(Expr.getType()->isRecordType()); - if (Expr.isPRValue()) { - if (auto *ExistingVal = Env.get(Expr)) { - auto &NewVal = Env.create(ExistingVal->getLoc()); - Env.setValue(Expr, NewVal); - Env.setValue(NewVal.getLoc(), NewVal); - return NewVal; - } + if (Expr.isPRValue()) + refreshRecordValue(Env.getResultObjectLocation(Expr), Env); - auto &NewVal = *cast(Env.createValue(Expr.getType())); - Env.setValue(Expr, NewVal); - return NewVal; - } - - if (auto *Loc = Env.get(Expr)) { - auto &NewVal = Env.create(*Loc); - Env.setValue(*Loc, NewVal); - return NewVal; - } + if (auto *Loc = Env.get(Expr)) + refreshRecordValue(*Loc, Env); auto &NewVal = *cast(Env.createValue(Expr.getType())); Env.setStorageLocation(Expr, NewVal.getLoc()); diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp index 0a2e8368d541d..88a9c0eccbebc 100644 --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -460,11 +460,9 @@ class TransferVisitor : public ConstStmtVisitor { // So make sure we have a value if we didn't propagate one above. if (S->isPRValue() && S->getType()->isRecordType()) { if (Env.getValue(*S) == nullptr) { - Value *Val = Env.createValue(S->getType()); - // We're guaranteed to always be able to create a value for record - // types. - assert(Val != nullptr); - Env.setValue(*S, *Val); + auto &Loc = Env.getResultObjectLocation(*S); + Env.initializeFieldsWithValues(Loc); + refreshRecordValue(Loc, Env); } } } @@ -472,6 +470,13 @@ class TransferVisitor : public ConstStmtVisitor { void VisitCXXDefaultInitExpr(const CXXDefaultInitExpr *S) { const Expr *InitExpr = S->getExpr(); assert(InitExpr != nullptr); + + // If this is a prvalue of record type, the handler for `*InitExpr` (if one + // exists) will initialize the result object; there is no value to propgate + // here. + if (S->getType()->isRecordType() && S->isPRValue()) + return; + propagateValueOrStorageLocation(*InitExpr, *S, Env); } @@ -479,6 +484,17 @@ class TransferVisitor : public ConstStmtVisitor { const CXXConstructorDecl *ConstructorDecl = S->getConstructor(); assert(ConstructorDecl != nullptr); + // `CXXConstructExpr` can have array type if default-initializing an array + // of records. We don't handle this specifically beyond potentially inlining + // the call. + if (!S->getType()->isRecordType()) { + transferInlineCall(S, ConstructorDecl); + return; + } + + RecordStorageLocation &Loc = Env.getResultObjectLocation(*S); + Env.setValue(*S, refreshRecordValue(Loc, Env)); + if (ConstructorDecl->isCopyOrMoveConstructor()) { // It is permissible for a copy/move constructor to have additional // parameters as long as they have default arguments defined for them. @@ -491,24 +507,14 @@ class TransferVisitor : public ConstStmtVisitor { if (ArgLoc == nullptr) return; - if (S->isElidable()) { - if (Value *Val = Env.getValue(*ArgLoc)) - Env.setValue(*S, *Val); - } else { - auto &Val = *cast(Env.createValue(S->getType())); - Env.setValue(*S, Val); - copyRecord(*ArgLoc, Val.getLoc(), Env); - } + // Even if the copy/move constructor call is elidable, we choose to copy + // the record in all cases (which isn't wrong, just potentially not + // optimal). + copyRecord(*ArgLoc, Loc, Env); return; } - // `CXXConstructExpr` can have array type if default-initializing an array - // of records, and we currently can't create values for arrays. So check if - // we've got a record type. - if (S->getType()->isRecordType()) { - auto &InitialVal = *cast(Env.createValue(S->getType())); - Env.setValue(*S, InitialVal); - } + Env.initializeFieldsWithValues(Loc, S->getType()); transferInlineCall(S, ConstructorDecl); } @@ -551,19 +557,15 @@ class TransferVisitor : public ConstStmtVisitor { if (S->isGLValue()) { Env.setStorageLocation(*S, *LocDst); } else if (S->getType()->isRecordType()) { - // Make sure that we have a `RecordValue` for this expression so that - // `Environment::getResultObjectLocation()` is able to return a location - // for it. - if (Env.getValue(*S) == nullptr) - refreshRecordValue(*S, Env); + // Assume that the assignment returns the assigned value. + copyRecord(*LocDst, Env.getResultObjectLocation(*S), Env); } return; } - // CXXOperatorCallExpr can be prvalues. Call `VisitCallExpr`() to create - // a `RecordValue` for them so that `Environment::getResultObjectLocation()` - // can return a value. + // `CXXOperatorCallExpr` can be a prvalue. Call `VisitCallExpr`() to + // initialize the prvalue's fields with values. VisitCallExpr(S); } @@ -580,11 +582,6 @@ class TransferVisitor : public ConstStmtVisitor { } } - void VisitCXXTemporaryObjectExpr(const CXXTemporaryObjectExpr *S) { - if (Value *Val = Env.createValue(S->getType())) - Env.setValue(*S, *Val); - } - void VisitCallExpr(const CallExpr *S) { // Of clang's builtins, only `__builtin_expect` is handled explicitly, since // others (like trap, debugtrap, and unreachable) are handled by CFG @@ -612,13 +609,14 @@ class TransferVisitor : public ConstStmtVisitor { } else if (const FunctionDecl *F = S->getDirectCallee()) { transferInlineCall(S, F); - // If this call produces a prvalue of record type, make sure that we have - // a `RecordValue` for it. This is required so that - // `Environment::getResultObjectLocation()` is able to return a location - // for this `CallExpr`. + // If this call produces a prvalue of record type, initialize its fields + // with values. if (S->getType()->isRecordType() && S->isPRValue()) - if (Env.getValue(*S) == nullptr) - refreshRecordValue(*S, Env); + if (Env.getValue(*S) == nullptr) { + RecordStorageLocation &Loc = Env.getResultObjectLocation(*S); + Env.initializeFieldsWithValues(Loc); + Env.setValue(*S, refreshRecordValue(Loc, Env)); + } } } @@ -666,8 +664,10 @@ class TransferVisitor : public ConstStmtVisitor { // `getLogicOperatorSubExprValue()`. if (S->isGLValue()) Env.setStorageLocation(*S, Env.createObject(S->getType())); - else if (Value *Val = Env.createValue(S->getType())) - Env.setValue(*S, *Val); + else if (!S->getType()->isRecordType()) { + if (Value *Val = Env.createValue(S->getType())) + Env.setValue(*S, *Val); + } } void VisitInitListExpr(const InitListExpr *S) { @@ -688,71 +688,51 @@ class TransferVisitor : public ConstStmtVisitor { return; } - llvm::DenseMap FieldLocs; - RecordInitListHelper InitListHelper(S); + RecordStorageLocation &Loc = Env.getResultObjectLocation(*S); + Env.setValue(*S, refreshRecordValue(Loc, Env)); - for (auto [Base, Init] : InitListHelper.base_inits()) { - assert(Base->getType().getCanonicalType() == - Init->getType().getCanonicalType()); - auto *BaseVal = Env.get(*Init); - if (!BaseVal) - BaseVal = cast(Env.createValue(Init->getType())); - // Take ownership of the fields of the `RecordValue` for the base class - // and incorporate them into the "flattened" set of fields for the - // derived class. - auto Children = BaseVal->getLoc().children(); - FieldLocs.insert(Children.begin(), Children.end()); - } + // Initialization of base classes and fields of record type happens when we + // visit the nested `CXXConstructExpr` or `InitListExpr` for that base class + // or field. We therefore only need to deal with fields of non-record type + // here. - for (auto [Field, Init] : InitListHelper.field_inits()) { - assert( - // The types are same, or - Field->getType().getCanonicalType().getUnqualifiedType() == - Init->getType().getCanonicalType().getUnqualifiedType() || - // The field's type is T&, and initializer is T - (Field->getType()->isReferenceType() && - Field->getType().getCanonicalType()->getPointeeType() == - Init->getType().getCanonicalType())); - auto& Loc = Env.createObject(Field->getType(), Init); - FieldLocs.insert({Field, &Loc}); - } + RecordInitListHelper InitListHelper(S); - // In the case of a union, we don't in general have initializers for all - // of the fields. Create storage locations for the remaining fields (but - // don't associate them with values). - if (Type->isUnionType()) { - for (const FieldDecl *Field : - Env.getDataflowAnalysisContext().getModeledFields(Type)) { - if (auto [it, inserted] = FieldLocs.insert({Field, nullptr}); inserted) - it->second = &Env.createStorageLocation(Field->getType()); + for (auto [Field, Init] : InitListHelper.field_inits()) { + if (Field->getType()->isRecordType()) + continue; + if (Field->getType()->isReferenceType()) { + assert(Field->getType().getCanonicalType()->getPointeeType() == + Init->getType().getCanonicalType()); + Loc.setChild(*Field, &Env.createObject(Field->getType(), Init)); + continue; } + assert(Field->getType().getCanonicalType().getUnqualifiedType() == + Init->getType().getCanonicalType().getUnqualifiedType()); + StorageLocation *FieldLoc = Loc.getChild(*Field); + // Locations for non-reference fields must always be non-null. + assert(FieldLoc != nullptr); + Value *Val = Env.getValue(*Init); + if (Val == nullptr && isa(Init) && + Init->getType()->isPointerType()) + Val = + &Env.getOrCreateNullPointerValue(Init->getType()->getPointeeType()); + if (Val == nullptr) + Val = Env.createValue(Field->getType()); + if (Val != nullptr) + Env.setValue(*FieldLoc, *Val); } - // Check that we satisfy the invariant that a `RecordStorageLoation` - // contains exactly the set of modeled fields for that type. - // `ModeledFields` includes fields from all the bases, but only the - // modeled ones. However, if a class type is initialized with an - // `InitListExpr`, all fields in the class, including those from base - // classes, are included in the set of modeled fields. The code above - // should therefore populate exactly the modeled fields. - assert(containsSameFields( - Env.getDataflowAnalysisContext().getModeledFields(Type), FieldLocs)); - - RecordStorageLocation::SyntheticFieldMap SyntheticFieldLocs; - for (const auto &Entry : - Env.getDataflowAnalysisContext().getSyntheticFields(Type)) { - SyntheticFieldLocs.insert( - {Entry.getKey(), &Env.createObject(Entry.getValue())}); + for (const auto &[FieldName, FieldLoc] : Loc.synthetic_fields()) { + QualType FieldType = FieldLoc->getType(); + if (FieldType->isRecordType()) { + Env.initializeFieldsWithValues(*cast(FieldLoc)); + } else { + if (Value *Val = Env.createValue(FieldType)) + Env.setValue(*FieldLoc, *Val); + } } - auto &Loc = Env.getDataflowAnalysisContext().createRecordStorageLocation( - Type, std::move(FieldLocs), std::move(SyntheticFieldLocs)); - RecordValue &RecordVal = Env.create(Loc); - - Env.setValue(Loc, RecordVal); - - Env.setValue(*S, RecordVal); - // FIXME: Implement array initialization. } diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index 595f70f819ddb..1b73c5d683016 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -369,17 +369,10 @@ builtinTransferInitializer(const CFGInitializer &Elt, ParentLoc->setChild(*Member, InitExprLoc); } else if (auto *InitExprVal = Env.getValue(*InitExpr)) { assert(MemberLoc != nullptr); - if (Member->getType()->isRecordType()) { - auto *InitValStruct = cast(InitExprVal); - // FIXME: Rather than performing a copy here, we should really be - // initializing the field in place. This would require us to propagate the - // storage location of the field to the AST node that creates the - // `RecordValue`. - copyRecord(InitValStruct->getLoc(), - *cast(MemberLoc), Env); - } else { + // Record-type initializers construct themselves directly into the result + // object, so there is no need to handle them here. + if (!Member->getType()->isRecordType()) Env.setValue(*MemberLoc, *InitExprVal); - } } } diff --git a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp index 465a8e21690c4..cc20623f881ff 100644 --- a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp @@ -24,6 +24,7 @@ namespace { using namespace clang; using namespace dataflow; +using ::clang::dataflow::test::findValueDecl; using ::clang::dataflow::test::getFieldValue; using ::testing::Contains; using ::testing::IsNull; @@ -199,6 +200,48 @@ TEST_F(EnvironmentTest, JoinRecords) { } } +TEST_F(EnvironmentTest, DifferentReferenceLocInJoin) { + // This tests the case where the storage location for a reference-type + // variable is different for two states being joined. We used to believe this + // could not happen and therefore had an assertion disallowing this; this test + // exists to demonstrate that we can handle this condition without a failing + // assertion. See also the discussion here: + // https://discourse.llvm.org/t/70086/6 + + using namespace ast_matchers; + + std::string Code = R"cc( + void f(int &ref) {} + )cc"; + + auto Unit = + tooling::buildASTFromCodeWithArgs(Code, {"-fsyntax-only", "-std=c++11"}); + auto &Context = Unit->getASTContext(); + + ASSERT_EQ(Context.getDiagnostics().getClient()->getNumErrors(), 0U); + + const ValueDecl *Ref = findValueDecl(Context, "ref"); + + Environment Env1(DAContext); + StorageLocation &Loc1 = Env1.createStorageLocation(Context.IntTy); + Env1.setStorageLocation(*Ref, Loc1); + + Environment Env2(DAContext); + StorageLocation &Loc2 = Env2.createStorageLocation(Context.IntTy); + Env2.setStorageLocation(*Ref, Loc2); + + EXPECT_NE(&Loc1, &Loc2); + + Environment::ValueModel Model; + Environment EnvJoined = + Environment::join(Env1, Env2, Model, Environment::DiscardExprState); + + // Joining environments with different storage locations for the same + // declaration results in the declaration being removed from the joined + // environment. + EXPECT_EQ(EnvJoined.getStorageLocation(*Ref), nullptr); +} + TEST_F(EnvironmentTest, InitGlobalVarsFun) { using namespace ast_matchers; diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index ca055a462a286..00dafb2988c69 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -1582,10 +1582,9 @@ TEST(TransferTest, FieldsDontHaveValuesInConstructorWithBaseClass) { [](const llvm::StringMap> &Results, ASTContext &ASTCtx) { const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); - // FIXME: The field of the base class should already have been - // initialized with a value by the base constructor. This test documents - // the current buggy behavior. - EXPECT_EQ(getFieldValue(Env.getThisPointeeStorageLocation(), "BaseVal", + // The field of the base class should already have been initialized with + // a value by the base constructor. + EXPECT_NE(getFieldValue(Env.getThisPointeeStorageLocation(), "BaseVal", ASTCtx, Env), nullptr); EXPECT_EQ(getFieldValue(Env.getThisPointeeStorageLocation(), "Val", @@ -2998,8 +2997,12 @@ TEST(TransferTest, ResultObjectLocation) { TEST(TransferTest, ResultObjectLocationForDefaultArgExpr) { std::string Code = R"( - struct S {}; - void funcWithDefaultArg(S s = S()); + struct Inner {}; + struct Outer { + Inner I = {}; + }; + + void funcWithDefaultArg(Outer O = {}); void target() { funcWithDefaultArg(); // [[p]] @@ -3058,13 +3061,7 @@ TEST(TransferTest, ResultObjectLocationForDefaultInitExpr) { RecordStorageLocation &Loc = Env.getResultObjectLocation(*DefaultInit); - // FIXME: The result object location for the `CXXDefaultInitExpr` should - // be the location of the member variable being initialized, but we - // don't do this correctly yet; see also comments in - // `builtinTransferInitializer()`. - // For the time being, we just document the current erroneous behavior - // here (this should be `EXPECT_EQ` when the behavior is fixed). - EXPECT_NE(&Loc, Env.getThisPointeeStorageLocation()->getChild(*SField)); + EXPECT_EQ(&Loc, Env.getThisPointeeStorageLocation()->getChild(*SField)); }); } @@ -3101,6 +3098,79 @@ TEST(TransferTest, ResultObjectLocationForCXXOperatorCallExpr) { }); } +TEST(TransferTest, ResultObjectLocationForStdInitializerListExpr) { + std::string Code = R"( + namespace std { + template + struct initializer_list {}; + } // namespace std + + void target() { + std::initializer_list list = {1}; + // [[p]] + } + )"; + + using ast_matchers::cxxStdInitializerListExpr; + using ast_matchers::match; + using ast_matchers::selectFirst; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto *StdInitList = selectFirst( + "std_init_list", + match(cxxStdInitializerListExpr().bind("std_init_list"), ASTCtx)); + ASSERT_NE(StdInitList, nullptr); + + EXPECT_EQ(&Env.getResultObjectLocation(*StdInitList), + &getLocForDecl(ASTCtx, Env, "list")); + }); +} + +TEST(TransferTest, ResultObjectLocationPropagatesThroughConditionalOperator) { + std::string Code = R"( + struct A { + A(int); + }; + + void target(bool b) { + A a = b ? A(0) : A(1); + (void)0; // [[p]] + } + )"; + using ast_matchers::cxxConstructExpr; + using ast_matchers::equals; + using ast_matchers::hasArgument; + using ast_matchers::integerLiteral; + using ast_matchers::match; + using ast_matchers::selectFirst; + using ast_matchers::traverse; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto *ConstructExpr0 = selectFirst( + "construct", + match(cxxConstructExpr(hasArgument(0, integerLiteral(equals(0)))) + .bind("construct"), + ASTCtx)); + auto *ConstructExpr1 = selectFirst( + "construct", + match(cxxConstructExpr(hasArgument(0, integerLiteral(equals(1)))) + .bind("construct"), + ASTCtx)); + + auto &ALoc = getLocForDecl(ASTCtx, Env, "a"); + EXPECT_EQ(&Env.getResultObjectLocation(*ConstructExpr0), &ALoc); + EXPECT_EQ(&Env.getResultObjectLocation(*ConstructExpr1), &ALoc); + }); +} + TEST(TransferTest, StaticCast) { std::string Code = R"( void target(int Foo) { @@ -5886,6 +5956,38 @@ TEST(TransferTest, ContextSensitiveReturnRecord) { {BuiltinOptions{ContextSensitiveOptions{}}}); } +TEST(TransferTest, ContextSensitiveReturnSelfReferentialRecord) { + std::string Code = R"( + struct S { + S() { self = this; } + S *self; + }; + + S makeS() { + // RVO guarantees that this will be constructed directly into `MyS`. + return S(); + } + + void target() { + S MyS = makeS(); + // [[p]] + } + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto &MySLoc = getLocForDecl(ASTCtx, Env, "MyS"); + + auto *SelfVal = + cast(getFieldValue(&MySLoc, "self", ASTCtx, Env)); + EXPECT_EQ(&SelfVal->getPointeeLoc(), &MySLoc); + }, + {BuiltinOptions{ContextSensitiveOptions{}}}); +} + TEST(TransferTest, ContextSensitiveMethodLiteral) { std::string Code = R"( class MyClass { @@ -6830,50 +6932,6 @@ TEST(TransferTest, LambdaCaptureThis) { }); } -TEST(TransferTest, DifferentReferenceLocInJoin) { - // This test triggers a case where the storage location for a reference-type - // variable is different for two states being joined. We used to believe this - // could not happen and therefore had an assertion disallowing this; this test - // exists to demonstrate that we can handle this condition without a failing - // assertion. See also the discussion here: - // https://discourse.llvm.org/t/70086/6 - std::string Code = R"( - namespace std { - template struct initializer_list { - const T* begin(); - const T* end(); - }; - } - - void target(char* p, char* end) { - while (p != end) { - if (*p == ' ') { - p++; - continue; - } - - auto && range = {1, 2}; - for (auto b = range.begin(), e = range.end(); b != e; ++b) { - } - (void)0; - // [[p]] - } - } - )"; - runDataflow( - Code, - [](const llvm::StringMap> &Results, - ASTContext &ASTCtx) { - const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); - - // Joining environments with different storage locations for the same - // declaration results in the declaration being removed from the joined - // environment. - const ValueDecl *VD = findValueDecl(ASTCtx, "range"); - ASSERT_EQ(Env.getStorageLocation(*VD), nullptr); - }); -} - // This test verifies correct modeling of a relational dependency that goes // through unmodeled functions (the simple `cond()` in this case). TEST(TransferTest, ConditionalRelation) { From b9a3551c905573df456ee52fa1051e49fa956c65 Mon Sep 17 00:00:00 2001 From: "Kevin P. Neal" Date: Wed, 10 Apr 2024 11:28:43 -0400 Subject: [PATCH 047/886] [FPEnv][BitcodeReader] Correct strictfp test. Correct a strictfp test to follow the rules documented in the LangRef: https://llvm.org/docs/LangRef.html#constrained-floating-point-intrinsics This test needed the strictfp attribute added to a function definition. Test changes verified with D146845. --- llvm/unittests/Bitcode/BitReaderTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/unittests/Bitcode/BitReaderTest.cpp b/llvm/unittests/Bitcode/BitReaderTest.cpp index 3e449f9057789..22cc5e7492803 100644 --- a/llvm/unittests/Bitcode/BitReaderTest.cpp +++ b/llvm/unittests/Bitcode/BitReaderTest.cpp @@ -160,7 +160,7 @@ TEST(BitReaderTest, MaterializeConstrainedFPStrictFP) { LLVMContext Context; std::unique_ptr M = getLazyModuleFromAssembly( Context, Mem, - "define double @foo(double %a) {\n" + "define double @foo(double %a) strictfp {\n" " %result = call double @llvm.experimental.constrained.sqrt.f64(double " "%a, metadata !\"round.tonearest\", metadata !\"fpexcept.strict\") " "strictfp\n" From c1d3f39ae98535777c957aab3611d2abc97b2815 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 3 Apr 2024 21:36:58 -0500 Subject: [PATCH 048/886] [ValueTracking] Add tests for `shufflevector` in `isKnownNonZero` --- .../Transforms/InstSimplify/known-non-zero.ll | 132 ++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/known-non-zero.ll b/llvm/test/Transforms/InstSimplify/known-non-zero.ll index 51f80f62c2f34..9486708369b77 100644 --- a/llvm/test/Transforms/InstSimplify/known-non-zero.ll +++ b/llvm/test/Transforms/InstSimplify/known-non-zero.ll @@ -177,3 +177,135 @@ define i1 @smax_non_zero(i8 %xx, i8 %y) { %r = icmp eq i8 %v, 0 ret i1 %r } + +define <4 x i1> @shuf_nonzero_both(<4 x i8> %xx, <4 x i8> %yy) { +; CHECK-LABEL: @shuf_nonzero_both( +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y:%.*]] = add nuw <4 x i8> [[YY:%.*]], +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[Y]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer +; CHECK-NEXT: ret <4 x i1> [[R]] +; + %x = add nuw <4 x i8> %xx, + %y = add nuw <4 x i8> %yy, + + %shuf = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> + %r = icmp eq <4 x i8> %shuf, zeroinitializer + ret <4 x i1> %r +} + +define <4 x i1> @shuf_nonzero_both_fail(<4 x i8> %xx, <4 x i8> %yy) { +; CHECK-LABEL: @shuf_nonzero_both_fail( +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y:%.*]] = add nuw <4 x i8> [[YY:%.*]], +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[Y]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer +; CHECK-NEXT: ret <4 x i1> [[R]] +; + %x = add nuw <4 x i8> %xx, + %y = add nuw <4 x i8> %yy, + + %shuf = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> + %r = icmp eq <4 x i8> %shuf, zeroinitializer + ret <4 x i1> %r +} + +define <4 x i1> @shuf_nonzero_both_fail2(<4 x i8> %xx, <4 x i8> %yy) { +; CHECK-LABEL: @shuf_nonzero_both_fail2( +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y:%.*]] = add <4 x i8> [[YY:%.*]], +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[Y]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer +; CHECK-NEXT: ret <4 x i1> [[R]] +; + %x = add nuw <4 x i8> %xx, + %y = add <4 x i8> %yy, + + %shuf = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> + %r = icmp eq <4 x i8> %shuf, zeroinitializer + ret <4 x i1> %r +} + +define <4 x i1> @shuf_nonzero_lhs(<4 x i8> %xx) { +; CHECK-LABEL: @shuf_nonzero_lhs( +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer +; CHECK-NEXT: ret <4 x i1> [[R]] +; + %x = add nuw <4 x i8> %xx, + + %shuf = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> + %r = icmp eq <4 x i8> %shuf, zeroinitializer + ret <4 x i1> %r +} + +define <4 x i1> @shuf_nonzero_lhs2(<4 x i8> %xx) { +; CHECK-LABEL: @shuf_nonzero_lhs2( +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer +; CHECK-NEXT: ret <4 x i1> [[R]] +; + %x = add nuw <4 x i8> %xx, + + %shuf = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> + %r = icmp eq <4 x i8> %shuf, zeroinitializer + ret <4 x i1> %r +} + +define <4 x i1> @shuf_nonzero_lhs2_fail(<4 x i8> %xx) { +; CHECK-LABEL: @shuf_nonzero_lhs2_fail( +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer +; CHECK-NEXT: ret <4 x i1> [[R]] +; + %x = add nuw <4 x i8> %xx, + + %shuf = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> + %r = icmp eq <4 x i8> %shuf, zeroinitializer + ret <4 x i1> %r +} + +define <4 x i1> @shuf_nonzero_rhs(<4 x i8> %xx) { +; CHECK-LABEL: @shuf_nonzero_rhs( +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> poison, <4 x i8> [[X]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer +; CHECK-NEXT: ret <4 x i1> [[R]] +; + %x = add nuw <4 x i8> %xx, + + %shuf = shufflevector <4 x i8> poison, <4 x i8> %x, <4 x i32> + %r = icmp eq <4 x i8> %shuf, zeroinitializer + ret <4 x i1> %r +} + +define <4 x i1> @shuf_nonzero_rhs2(<4 x i8> %xx) { +; CHECK-LABEL: @shuf_nonzero_rhs2( +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> poison, <4 x i8> [[X]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer +; CHECK-NEXT: ret <4 x i1> [[R]] +; + %x = add nuw <4 x i8> %xx, + + %shuf = shufflevector <4 x i8> poison, <4 x i8> %x, <4 x i32> + %r = icmp eq <4 x i8> %shuf, zeroinitializer + ret <4 x i1> %r +} + +define <4 x i1> @shuf_nonzero_rhs2_fail(<4 x i8> %xx) { +; CHECK-LABEL: @shuf_nonzero_rhs2_fail( +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> poison, <4 x i8> [[X]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer +; CHECK-NEXT: ret <4 x i1> [[R]] +; + %x = add nuw <4 x i8> %xx, + + %shuf = shufflevector <4 x i8> poison, <4 x i8> %x, <4 x i32> + %r = icmp eq <4 x i8> %shuf, zeroinitializer + ret <4 x i1> %r +} From 87528bfefbb50ed6560b9b8482fc7c9f86ca34cd Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 3 Apr 2024 15:33:55 -0500 Subject: [PATCH 049/886] [ValueTracking] Add support for `shufflevector` in `isKnownNonZero` Shuffles don't modify the data, so if all elements that end up in the destination are non-zero the result is non-zero. Closes #87702 --- llvm/lib/Analysis/ValueTracking.cpp | 15 +++++++++++ .../Transforms/InstSimplify/known-non-zero.ll | 26 ++++--------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 5ef1969893b42..b3029f440ca2a 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2777,6 +2777,21 @@ static bool isKnownNonZeroFromOperator(const Operator *I, } } break; + case Instruction::ShuffleVector: { + auto *Shuf = dyn_cast(I); + if (!Shuf) + break; + APInt DemandedLHS, DemandedRHS; + // For undef elements, we don't know anything about the common state of + // the shuffle result. + if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) + break; + // If demanded elements for both vecs are non-zero, the shuffle is non-zero. + return (DemandedRHS.isZero() || + isKnownNonZero(Shuf->getOperand(1), DemandedRHS, Depth, Q)) && + (DemandedLHS.isZero() || + isKnownNonZero(Shuf->getOperand(0), DemandedLHS, Depth, Q)); + } case Instruction::Freeze: return isKnownNonZero(I->getOperand(0), Depth, Q) && isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, diff --git a/llvm/test/Transforms/InstSimplify/known-non-zero.ll b/llvm/test/Transforms/InstSimplify/known-non-zero.ll index 9486708369b77..c443dc68d0c13 100644 --- a/llvm/test/Transforms/InstSimplify/known-non-zero.ll +++ b/llvm/test/Transforms/InstSimplify/known-non-zero.ll @@ -180,11 +180,7 @@ define i1 @smax_non_zero(i8 %xx, i8 %y) { define <4 x i1> @shuf_nonzero_both(<4 x i8> %xx, <4 x i8> %yy) { ; CHECK-LABEL: @shuf_nonzero_both( -; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], -; CHECK-NEXT: [[Y:%.*]] = add nuw <4 x i8> [[YY:%.*]], -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[Y]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer -; CHECK-NEXT: ret <4 x i1> [[R]] +; CHECK-NEXT: ret <4 x i1> zeroinitializer ; %x = add nuw <4 x i8> %xx, %y = add nuw <4 x i8> %yy, @@ -228,10 +224,7 @@ define <4 x i1> @shuf_nonzero_both_fail2(<4 x i8> %xx, <4 x i8> %yy) { define <4 x i1> @shuf_nonzero_lhs(<4 x i8> %xx) { ; CHECK-LABEL: @shuf_nonzero_lhs( -; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer -; CHECK-NEXT: ret <4 x i1> [[R]] +; CHECK-NEXT: ret <4 x i1> zeroinitializer ; %x = add nuw <4 x i8> %xx, @@ -242,10 +235,7 @@ define <4 x i1> @shuf_nonzero_lhs(<4 x i8> %xx) { define <4 x i1> @shuf_nonzero_lhs2(<4 x i8> %xx) { ; CHECK-LABEL: @shuf_nonzero_lhs2( -; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer -; CHECK-NEXT: ret <4 x i1> [[R]] +; CHECK-NEXT: ret <4 x i1> zeroinitializer ; %x = add nuw <4 x i8> %xx, @@ -270,10 +260,7 @@ define <4 x i1> @shuf_nonzero_lhs2_fail(<4 x i8> %xx) { define <4 x i1> @shuf_nonzero_rhs(<4 x i8> %xx) { ; CHECK-LABEL: @shuf_nonzero_rhs( -; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> poison, <4 x i8> [[X]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer -; CHECK-NEXT: ret <4 x i1> [[R]] +; CHECK-NEXT: ret <4 x i1> zeroinitializer ; %x = add nuw <4 x i8> %xx, @@ -284,10 +271,7 @@ define <4 x i1> @shuf_nonzero_rhs(<4 x i8> %xx) { define <4 x i1> @shuf_nonzero_rhs2(<4 x i8> %xx) { ; CHECK-LABEL: @shuf_nonzero_rhs2( -; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> poison, <4 x i8> [[X]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer -; CHECK-NEXT: ret <4 x i1> [[R]] +; CHECK-NEXT: ret <4 x i1> zeroinitializer ; %x = add nuw <4 x i8> %xx, From 8a28b9b8ec1686426a4b43c8431570eaa1da77d9 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 3 Apr 2024 21:37:14 -0500 Subject: [PATCH 050/886] [ValueTracking] Add tests for `insertelement` in `isKnownNonZero`; NFC --- .../Transforms/InstSimplify/known-non-zero.ll | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/known-non-zero.ll b/llvm/test/Transforms/InstSimplify/known-non-zero.ll index c443dc68d0c13..1417e86ee678c 100644 --- a/llvm/test/Transforms/InstSimplify/known-non-zero.ll +++ b/llvm/test/Transforms/InstSimplify/known-non-zero.ll @@ -293,3 +293,99 @@ define <4 x i1> @shuf_nonzero_rhs2_fail(<4 x i8> %xx) { %r = icmp eq <4 x i8> %shuf, zeroinitializer ret <4 x i1> %r } + +define <2 x i1> @insert_nonzero0(<2 x i8> %xx, i8 %yy) { +; CHECK-LABEL: @insert_nonzero0( +; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 1 +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[INS]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %x = add nuw <2 x i8> %xx, + %y = add nuw i8 %yy, 1 + + %ins = insertelement <2 x i8> %x, i8 %y, i32 1 + %r = icmp eq <2 x i8> %ins, zeroinitializer + ret <2 x i1> %r +} + +define <2 x i1> @insert_nonzero1(<2 x i8> %xx, i8 %yy) { +; CHECK-LABEL: @insert_nonzero1( +; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 0 +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[INS]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %x = add nuw <2 x i8> %xx, + %y = add nuw i8 %yy, 1 + + %ins = insertelement <2 x i8> %x, i8 %y, i32 0 + %r = icmp eq <2 x i8> %ins, zeroinitializer + ret <2 x i1> %r +} + +define <2 x i1> @insert_nonzero_fail(<2 x i8> %xx, i8 %yy) { +; CHECK-LABEL: @insert_nonzero_fail( +; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 0 +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[INS]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %x = add nuw <2 x i8> %xx, + %y = add nuw i8 %yy, 1 + + %ins = insertelement <2 x i8> %x, i8 %y, i32 0 + %r = icmp eq <2 x i8> %ins, zeroinitializer + ret <2 x i1> %r +} + +define <2 x i1> @insert_nonzero_fail2(<2 x i8> %xx, i8 %yy) { +; CHECK-LABEL: @insert_nonzero_fail2( +; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y:%.*]] = add i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 0 +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[INS]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %x = add nuw <2 x i8> %xx, + %y = add i8 %yy, 1 + + %ins = insertelement <2 x i8> %x, i8 %y, i32 0 + %r = icmp eq <2 x i8> %ins, zeroinitializer + ret <2 x i1> %r +} + +define <2 x i1> @insert_nonzero_any_idx(<2 x i8> %xx, i8 %yy, i32 %idx) { +; CHECK-LABEL: @insert_nonzero_any_idx( +; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 [[IDX:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[INS]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %x = add nuw <2 x i8> %xx, + %y = add nuw i8 %yy, 1 + + %ins = insertelement <2 x i8> %x, i8 %y, i32 %idx + %r = icmp eq <2 x i8> %ins, zeroinitializer + ret <2 x i1> %r +} + +define <2 x i1> @insert_nonzero_any_idx_fail(<2 x i8> %xx, i8 %yy, i32 %idx) { +; CHECK-LABEL: @insert_nonzero_any_idx_fail( +; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 [[IDX:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[INS]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %x = add nuw <2 x i8> %xx, + %y = add nuw i8 %yy, 1 + + %ins = insertelement <2 x i8> %x, i8 %y, i32 %idx + %r = icmp eq <2 x i8> %ins, zeroinitializer + ret <2 x i1> %r +} From 9c545a14c09051b011358854655c1f466d656e79 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 3 Apr 2024 15:34:28 -0500 Subject: [PATCH 051/886] [ValueTracking] Add support for `insertelement` in `isKnownNonZero` Inserts don't modify the data, so if all elements that end up in the destination are non-zero the result is non-zero. Closes #87703 --- llvm/lib/Analysis/ValueTracking.cpp | 23 +++++++++++++++++++ .../Transforms/InstSimplify/known-non-zero.ll | 18 +++------------ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index b3029f440ca2a..9f16eaf9e0990 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2763,6 +2763,29 @@ static bool isKnownNonZeroFromOperator(const Operator *I, return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ); }); } + case Instruction::InsertElement: { + if (isa(I->getType())) + break; + + const Value *Vec = I->getOperand(0); + const Value *Elt = I->getOperand(1); + auto *CIdx = dyn_cast(I->getOperand(2)); + + unsigned NumElts = DemandedElts.getBitWidth(); + APInt DemandedVecElts = DemandedElts; + bool SkipElt = false; + // If we know the index we are inserting too, clear it from Vec check. + if (CIdx && CIdx->getValue().ult(NumElts)) { + DemandedVecElts.clearBit(CIdx->getZExtValue()); + SkipElt = !DemandedElts[CIdx->getZExtValue()]; + } + + // Result is zero if Elt is non-zero and rest of the demanded elts in Vec + // are non-zero. + return (SkipElt || isKnownNonZero(Elt, Depth, Q)) && + (DemandedVecElts.isZero() || + isKnownNonZero(Vec, DemandedVecElts, Depth, Q)); + } case Instruction::ExtractElement: if (const auto *EEI = dyn_cast(I)) { const Value *Vec = EEI->getVectorOperand(); diff --git a/llvm/test/Transforms/InstSimplify/known-non-zero.ll b/llvm/test/Transforms/InstSimplify/known-non-zero.ll index 1417e86ee678c..d9b8f5eed3239 100644 --- a/llvm/test/Transforms/InstSimplify/known-non-zero.ll +++ b/llvm/test/Transforms/InstSimplify/known-non-zero.ll @@ -296,11 +296,7 @@ define <4 x i1> @shuf_nonzero_rhs2_fail(<4 x i8> %xx) { define <2 x i1> @insert_nonzero0(<2 x i8> %xx, i8 %yy) { ; CHECK-LABEL: @insert_nonzero0( -; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[XX:%.*]], -; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 1 -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[INS]], zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[R]] +; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %x = add nuw <2 x i8> %xx, %y = add nuw i8 %yy, 1 @@ -312,11 +308,7 @@ define <2 x i1> @insert_nonzero0(<2 x i8> %xx, i8 %yy) { define <2 x i1> @insert_nonzero1(<2 x i8> %xx, i8 %yy) { ; CHECK-LABEL: @insert_nonzero1( -; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[XX:%.*]], -; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 0 -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[INS]], zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[R]] +; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %x = add nuw <2 x i8> %xx, %y = add nuw i8 %yy, 1 @@ -360,11 +352,7 @@ define <2 x i1> @insert_nonzero_fail2(<2 x i8> %xx, i8 %yy) { define <2 x i1> @insert_nonzero_any_idx(<2 x i8> %xx, i8 %yy, i32 %idx) { ; CHECK-LABEL: @insert_nonzero_any_idx( -; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[XX:%.*]], -; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 [[IDX:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[INS]], zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[R]] +; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %x = add nuw <2 x i8> %xx, %y = add nuw i8 %yy, 1 From 195d278d502308655edb1e9ff1c6f0c9256d0d15 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 3 Apr 2024 21:41:47 -0500 Subject: [PATCH 052/886] [ValueTracking] Add tests for `xor`/`disjoint or` in `getInvertibleOperands`; NFC --- llvm/test/Transforms/InstSimplify/icmp.ll | 94 ++++++++++++++++++++++- 1 file changed, 93 insertions(+), 1 deletion(-) diff --git a/llvm/test/Transforms/InstSimplify/icmp.ll b/llvm/test/Transforms/InstSimplify/icmp.ll index 3109768bdfe00..a66f7cb879ef9 100644 --- a/llvm/test/Transforms/InstSimplify/icmp.ll +++ b/llvm/test/Transforms/InstSimplify/icmp.ll @@ -270,7 +270,7 @@ define i1 @load_ptr(ptr %p) { define i1 @load_ptr_null_valid(ptr %p) null_pointer_is_valid { ; CHECK-LABEL: @load_ptr_null_valid( -; CHECK-NEXT: [[LOAD_P:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable !0 +; CHECK-NEXT: [[LOAD_P:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable [[META0:![0-9]+]] ; CHECK-NEXT: [[R:%.*]] = icmp ne ptr [[LOAD_P]], null ; CHECK-NEXT: ret i1 [[R]] ; @@ -278,3 +278,95 @@ define i1 @load_ptr_null_valid(ptr %p) null_pointer_is_valid { %r = icmp ne ptr %load_p, null ret i1 %r } + +define i1 @non_eq_disjoint_or_common_op(i8 %x, i8 %y, i8 %ww, i8 %a) { +; CHECK-LABEL: @non_eq_disjoint_or_common_op( +; CHECK-NEXT: [[W:%.*]] = add nuw i8 [[WW:%.*]], 1 +; CHECK-NEXT: [[Z:%.*]] = add i8 [[Y:%.*]], [[W]] +; CHECK-NEXT: [[XY:%.*]] = or disjoint i8 [[X:%.*]], [[Y]] +; CHECK-NEXT: [[XZ:%.*]] = or disjoint i8 [[X]], [[Z]] +; CHECK-NEXT: [[AXY:%.*]] = add i8 [[A:%.*]], [[XY]] +; CHECK-NEXT: [[AXZ:%.*]] = add i8 [[A]], [[XZ]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AXY]], [[AXZ]] +; CHECK-NEXT: ret i1 [[R]] +; + %w = add nuw i8 %ww, 1 + %z = add i8 %y, %w + + %xy = or disjoint i8 %x, %y + %xz = or disjoint i8 %x, %z + + %axy = add i8 %a, %xy + %axz = add i8 %a, %xz + %r = icmp eq i8 %axy, %axz + ret i1 %r +} + +define i1 @non_eq_disjoint_or_common_op_fail(i8 %x, i8 %y, i8 %ww, i8 %a) { +; CHECK-LABEL: @non_eq_disjoint_or_common_op_fail( +; CHECK-NEXT: [[W:%.*]] = add nuw i8 [[WW:%.*]], 1 +; CHECK-NEXT: [[Z:%.*]] = add i8 [[Y:%.*]], [[W]] +; CHECK-NEXT: [[XY:%.*]] = or i8 [[X:%.*]], [[Y]] +; CHECK-NEXT: [[XZ:%.*]] = or disjoint i8 [[X]], [[Z]] +; CHECK-NEXT: [[AXY:%.*]] = add i8 [[A:%.*]], [[XY]] +; CHECK-NEXT: [[AXZ:%.*]] = add i8 [[A]], [[XZ]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AXY]], [[AXZ]] +; CHECK-NEXT: ret i1 [[R]] +; + %w = add nuw i8 %ww, 1 + %z = add i8 %y, %w + + %xy = or i8 %x, %y + %xz = or disjoint i8 %x, %z + + %axy = add i8 %a, %xy + %axz = add i8 %a, %xz + %r = icmp eq i8 %axy, %axz + ret i1 %r +} + +define i1 @non_eq_xor_common_op(i8 %x, i8 %y, i8 %ww, i8 %a) { +; CHECK-LABEL: @non_eq_xor_common_op( +; CHECK-NEXT: [[W:%.*]] = add nuw i8 [[WW:%.*]], 1 +; CHECK-NEXT: [[Z:%.*]] = add i8 [[Y:%.*]], [[W]] +; CHECK-NEXT: [[XY:%.*]] = xor i8 [[Y]], [[X:%.*]] +; CHECK-NEXT: [[XZ:%.*]] = xor i8 [[X]], [[Z]] +; CHECK-NEXT: [[AXY:%.*]] = add i8 [[A:%.*]], [[XY]] +; CHECK-NEXT: [[AXZ:%.*]] = add i8 [[A]], [[XZ]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AXY]], [[AXZ]] +; CHECK-NEXT: ret i1 [[R]] +; + %w = add nuw i8 %ww, 1 + %z = add i8 %y, %w + + %xy = xor i8 %y, %x + %xz = xor i8 %x, %z + + %axy = add i8 %a, %xy + %axz = add i8 %a, %xz + %r = icmp eq i8 %axy, %axz + ret i1 %r +} + +define i1 @non_eq_xor_common_op_fail(i8 %x, i8 %y, i8 %ww, i8 %a) { +; CHECK-LABEL: @non_eq_xor_common_op_fail( +; CHECK-NEXT: [[W:%.*]] = add nsw i8 [[WW:%.*]], 1 +; CHECK-NEXT: [[Z:%.*]] = add i8 [[Y:%.*]], [[W]] +; CHECK-NEXT: [[XY:%.*]] = xor i8 [[Y]], [[X:%.*]] +; CHECK-NEXT: [[XZ:%.*]] = xor i8 [[X]], [[Z]] +; CHECK-NEXT: [[AXY:%.*]] = add i8 [[A:%.*]], [[XY]] +; CHECK-NEXT: [[AXZ:%.*]] = add i8 [[A]], [[XZ]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AXY]], [[AXZ]] +; CHECK-NEXT: ret i1 [[R]] +; + %w = add nsw i8 %ww, 1 + %z = add i8 %y, %w + + %xy = xor i8 %y, %x + %xz = xor i8 %x, %z + + %axy = add i8 %a, %xy + %axz = add i8 %a, %xz + %r = icmp eq i8 %axy, %axz + ret i1 %r +} From 0c57a2e4b4e5a6e5dda78a313fc8d8e3c91797f5 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 3 Apr 2024 17:40:03 -0500 Subject: [PATCH 053/886] [ValueTracking] Add support for `xor`/`disjoint or` in `getInvertibleOperands` This strengthens our `isKnownNonEqual` logic with some fairly trivial cases. Proofs: https://alive2.llvm.org/ce/z/4pxRTj Closes #87705 --- llvm/lib/Analysis/ValueTracking.cpp | 15 ++++++++++++++- llvm/test/Transforms/InstSimplify/icmp.ll | 18 ++---------------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 9f16eaf9e0990..f3ea73b2f0ec4 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -3106,7 +3106,20 @@ getInvertibleOperands(const Operator *Op1, switch (Op1->getOpcode()) { default: break; - case Instruction::Add: + case Instruction::Or: + if (!cast(Op1)->isDisjoint() || + !cast(Op2)->isDisjoint()) + break; + [[fallthrough]]; + case Instruction::Xor: + case Instruction::Add: { + Value *Other; + if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(0)), m_Value(Other)))) + return std::make_pair(Op1->getOperand(1), Other); + if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(1)), m_Value(Other)))) + return std::make_pair(Op1->getOperand(0), Other); + break; + } case Instruction::Sub: if (Op1->getOperand(0) == Op2->getOperand(0)) return getOperands(1); diff --git a/llvm/test/Transforms/InstSimplify/icmp.ll b/llvm/test/Transforms/InstSimplify/icmp.ll index a66f7cb879ef9..d179909811549 100644 --- a/llvm/test/Transforms/InstSimplify/icmp.ll +++ b/llvm/test/Transforms/InstSimplify/icmp.ll @@ -281,14 +281,7 @@ define i1 @load_ptr_null_valid(ptr %p) null_pointer_is_valid { define i1 @non_eq_disjoint_or_common_op(i8 %x, i8 %y, i8 %ww, i8 %a) { ; CHECK-LABEL: @non_eq_disjoint_or_common_op( -; CHECK-NEXT: [[W:%.*]] = add nuw i8 [[WW:%.*]], 1 -; CHECK-NEXT: [[Z:%.*]] = add i8 [[Y:%.*]], [[W]] -; CHECK-NEXT: [[XY:%.*]] = or disjoint i8 [[X:%.*]], [[Y]] -; CHECK-NEXT: [[XZ:%.*]] = or disjoint i8 [[X]], [[Z]] -; CHECK-NEXT: [[AXY:%.*]] = add i8 [[A:%.*]], [[XY]] -; CHECK-NEXT: [[AXZ:%.*]] = add i8 [[A]], [[XZ]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AXY]], [[AXZ]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %w = add nuw i8 %ww, 1 %z = add i8 %y, %w @@ -327,14 +320,7 @@ define i1 @non_eq_disjoint_or_common_op_fail(i8 %x, i8 %y, i8 %ww, i8 %a) { define i1 @non_eq_xor_common_op(i8 %x, i8 %y, i8 %ww, i8 %a) { ; CHECK-LABEL: @non_eq_xor_common_op( -; CHECK-NEXT: [[W:%.*]] = add nuw i8 [[WW:%.*]], 1 -; CHECK-NEXT: [[Z:%.*]] = add i8 [[Y:%.*]], [[W]] -; CHECK-NEXT: [[XY:%.*]] = xor i8 [[Y]], [[X:%.*]] -; CHECK-NEXT: [[XZ:%.*]] = xor i8 [[X]], [[Z]] -; CHECK-NEXT: [[AXY:%.*]] = add i8 [[A:%.*]], [[XY]] -; CHECK-NEXT: [[AXZ:%.*]] = add i8 [[A]], [[XZ]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AXY]], [[AXZ]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %w = add nuw i8 %ww, 1 %z = add i8 %y, %w From 2646790155f73d6cfb28ec0ee472056740e4658e Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 3 Apr 2024 21:41:53 -0500 Subject: [PATCH 054/886] [ValueTracking] Add tests for `xor`/`disjoint or` in `isKnownNonZero`; NFC --- llvm/test/Transforms/InstSimplify/icmp.ll | 68 +++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/icmp.ll b/llvm/test/Transforms/InstSimplify/icmp.ll index d179909811549..530dc16144eba 100644 --- a/llvm/test/Transforms/InstSimplify/icmp.ll +++ b/llvm/test/Transforms/InstSimplify/icmp.ll @@ -356,3 +356,71 @@ define i1 @non_eq_xor_common_op_fail(i8 %x, i8 %y, i8 %ww, i8 %a) { %r = icmp eq i8 %axy, %axz ret i1 %r } + +define i1 @non_eq_disjoint_or(i8 %x, i8 %yy, i8 %w) { +; CHECK-LABEL: @non_eq_disjoint_or( +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[LHS:%.*]] = add i8 [[X:%.*]], [[W:%.*]] +; CHECK-NEXT: [[VAL:%.*]] = or disjoint i8 [[Y]], [[W]] +; CHECK-NEXT: [[RHS:%.*]] = add i8 [[X]], [[VAL]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[LHS]], [[RHS]] +; CHECK-NEXT: ret i1 [[R]] +; + %y = add nuw i8 %yy, 1 + %lhs = add i8 %x, %w + %val = or disjoint i8 %y, %w + %rhs = add i8 %x, %val + %r = icmp eq i8 %lhs, %rhs + ret i1 %r +} + +define i1 @non_eq_or_fail(i8 %x, i8 %yy, i8 %w) { +; CHECK-LABEL: @non_eq_or_fail( +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[LHS:%.*]] = add i8 [[X:%.*]], [[W:%.*]] +; CHECK-NEXT: [[VAL:%.*]] = or i8 [[Y]], [[W]] +; CHECK-NEXT: [[RHS:%.*]] = add i8 [[X]], [[VAL]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[LHS]], [[RHS]] +; CHECK-NEXT: ret i1 [[R]] +; + %y = add nuw i8 %yy, 1 + %lhs = add i8 %x, %w + %val = or i8 %y, %w + %rhs = add i8 %x, %val + %r = icmp eq i8 %lhs, %rhs + ret i1 %r +} + +define i1 @non_eq_xor(i8 %x, i8 %yy, i8 %w) { +; CHECK-LABEL: @non_eq_xor( +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[LHS:%.*]] = add i8 [[X:%.*]], [[W:%.*]] +; CHECK-NEXT: [[VAL:%.*]] = xor i8 [[Y]], [[W]] +; CHECK-NEXT: [[RHS:%.*]] = add i8 [[X]], [[VAL]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[LHS]], [[RHS]] +; CHECK-NEXT: ret i1 [[R]] +; + %y = add nuw i8 %yy, 1 + %lhs = add i8 %x, %w + %val = xor i8 %y, %w + %rhs = add i8 %x, %val + %r = icmp eq i8 %lhs, %rhs + ret i1 %r +} + +define i1 @non_eq_xor_fail(i8 %x, i8 %yy, i8 %w) { +; CHECK-LABEL: @non_eq_xor_fail( +; CHECK-NEXT: [[Y:%.*]] = add nsw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[LHS:%.*]] = add i8 [[X:%.*]], [[W:%.*]] +; CHECK-NEXT: [[VAL:%.*]] = xor i8 [[Y]], [[W]] +; CHECK-NEXT: [[RHS:%.*]] = add i8 [[X]], [[VAL]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[LHS]], [[RHS]] +; CHECK-NEXT: ret i1 [[R]] +; + %y = add nsw i8 %yy, 1 + %lhs = add i8 %x, %w + %val = xor i8 %y, %w + %rhs = add i8 %x, %val + %r = icmp eq i8 %lhs, %rhs + ret i1 %r +} From 81cdd35c0c8db22bfdd1f06cb2118d17fd99fc07 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 3 Apr 2024 17:41:45 -0500 Subject: [PATCH 055/886] [ValueTracking] Add support for `xor`/`disjoint or` in `isKnownNonZero` Handles cases like `X ^ Y == X` / `X disjoint| Y == X`. Both of these cases have identical logic to the existing `add` case, so just converting the `add` code to a more general helper. Proofs: https://alive2.llvm.org/ce/z/Htm7pe Closes #87706 --- llvm/lib/Analysis/ValueTracking.cpp | 40 +++++++++++++++-------- llvm/test/Transforms/InstSimplify/icmp.ll | 14 ++------ 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index f3ea73b2f0ec4..3a10de72a2756 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -3207,20 +3207,33 @@ getInvertibleOperands(const Operator *Op1, return std::nullopt; } -/// Return true if V2 == V1 + X, where X is known non-zero. -static bool isAddOfNonZero(const Value *V1, const Value *V2, unsigned Depth, - const SimplifyQuery &Q) { +/// Return true if V1 == (binop V2, X), where X is known non-zero. +/// Only handle a small subset of binops where (binop V2, X) with non-zero X +/// implies V2 != V1. +static bool isModifyingBinopOfNonZero(const Value *V1, const Value *V2, + unsigned Depth, const SimplifyQuery &Q) { const BinaryOperator *BO = dyn_cast(V1); - if (!BO || BO->getOpcode() != Instruction::Add) + if (!BO) return false; - Value *Op = nullptr; - if (V2 == BO->getOperand(0)) - Op = BO->getOperand(1); - else if (V2 == BO->getOperand(1)) - Op = BO->getOperand(0); - else - return false; - return isKnownNonZero(Op, Depth + 1, Q); + switch (BO->getOpcode()) { + default: + break; + case Instruction::Or: + if (!cast(V1)->isDisjoint()) + break; + [[fallthrough]]; + case Instruction::Xor: + case Instruction::Add: + Value *Op = nullptr; + if (V2 == BO->getOperand(0)) + Op = BO->getOperand(1); + else if (V2 == BO->getOperand(1)) + Op = BO->getOperand(0); + else + return false; + return isKnownNonZero(Op, Depth + 1, Q); + } + return false; } /// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and @@ -3380,7 +3393,8 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, }; } - if (isAddOfNonZero(V1, V2, Depth, Q) || isAddOfNonZero(V2, V1, Depth, Q)) + if (isModifyingBinopOfNonZero(V1, V2, Depth, Q) || + isModifyingBinopOfNonZero(V2, V1, Depth, Q)) return true; if (isNonEqualMul(V1, V2, Depth, Q) || isNonEqualMul(V2, V1, Depth, Q)) diff --git a/llvm/test/Transforms/InstSimplify/icmp.ll b/llvm/test/Transforms/InstSimplify/icmp.ll index 530dc16144eba..c94922197096f 100644 --- a/llvm/test/Transforms/InstSimplify/icmp.ll +++ b/llvm/test/Transforms/InstSimplify/icmp.ll @@ -359,12 +359,7 @@ define i1 @non_eq_xor_common_op_fail(i8 %x, i8 %y, i8 %ww, i8 %a) { define i1 @non_eq_disjoint_or(i8 %x, i8 %yy, i8 %w) { ; CHECK-LABEL: @non_eq_disjoint_or( -; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 -; CHECK-NEXT: [[LHS:%.*]] = add i8 [[X:%.*]], [[W:%.*]] -; CHECK-NEXT: [[VAL:%.*]] = or disjoint i8 [[Y]], [[W]] -; CHECK-NEXT: [[RHS:%.*]] = add i8 [[X]], [[VAL]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[LHS]], [[RHS]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %y = add nuw i8 %yy, 1 %lhs = add i8 %x, %w @@ -393,12 +388,7 @@ define i1 @non_eq_or_fail(i8 %x, i8 %yy, i8 %w) { define i1 @non_eq_xor(i8 %x, i8 %yy, i8 %w) { ; CHECK-LABEL: @non_eq_xor( -; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 -; CHECK-NEXT: [[LHS:%.*]] = add i8 [[X:%.*]], [[W:%.*]] -; CHECK-NEXT: [[VAL:%.*]] = xor i8 [[Y]], [[W]] -; CHECK-NEXT: [[RHS:%.*]] = add i8 [[X]], [[VAL]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[LHS]], [[RHS]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %y = add nuw i8 %yy, 1 %lhs = add i8 %x, %w From 2b00a73f62605fcaeaedd358ba8b55fad06571aa Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 10 Apr 2024 14:33:56 -0400 Subject: [PATCH 056/886] [SLP]Buildvector for alternate instructions with non-profitable gather operands. If the operands of the potentially alternate node are going to produce buildvector sequences, which result in more instructions, than the original code, then suhinstructions should be vectorized as alternate node, better to end up with the buildvector node. Left column - experimental, Right - reference. Metric: size..text Program size..text results results0 diff test-suite :: SingleSource/Benchmarks/Adobe-C++/loop_unroll.test 413680.00 416272.00 0.6% test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 12351788.00 12354844.00 0.0% test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 664901.00 664949.00 0.0% test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 664901.00 664949.00 0.0% test-suite :: External/SPEC/CFP2017rate/511.povray_r/511.povray_r.test 1171371.00 1171355.00 -0.0% test-suite :: MultiSource/Benchmarks/7zip/7zip-benchmark.test 1036396.00 1036284.00 -0.0% test-suite :: MultiSource/Benchmarks/MiBench/consumer-jpeg/consumer-jpeg.test 111280.00 111248.00 -0.0% test-suite :: External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r.test 1392113.00 1391361.00 -0.1% test-suite :: External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s.test 1392113.00 1391361.00 -0.1% test-suite :: MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/timberwolfmc.test 281676.00 281452.00 -0.1% test-suite :: MultiSource/Benchmarks/VersaBench/ecbdes/ecbdes.test 3025.00 3019.00 -0.2% test-suite :: MultiSource/Benchmarks/Prolangs-C/plot2fig/plot2fig.test 6351.00 6335.00 -0.3% Metric: SLP.NumVectorInstructions Program SLP.NumVectorInstructions results results0 diff test-suite :: MultiSource/Benchmarks/VersaBench/ecbdes/ecbdes.test 15.00 16.00 6.7% test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 1703.00 1707.00 0.2% test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 1703.00 1707.00 0.2% test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 26241.00 26239.00 -0.0% test-suite :: External/SPEC/CFP2017rate/510.parest_r/510.parest_r.test 11761.00 11754.00 -0.1% test-suite :: MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/timberwolfmc.test 824.00 822.00 -0.2% test-suite :: External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s.test 5668.00 5654.00 -0.2% test-suite :: External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r.test 5668.00 5654.00 -0.2% test-suite :: External/SPEC/CINT2017rate/502.gcc_r/502.gcc_r.test 792.00 790.00 -0.3% test-suite :: External/SPEC/CINT2017speed/602.gcc_s/602.gcc_s.test 792.00 790.00 -0.3% test-suite :: MultiSource/Benchmarks/FreeBench/pifft/pifft.test 1389.00 1384.00 -0.4% test-suite :: MultiSource/Benchmarks/7zip/7zip-benchmark.test 596.00 590.00 -1.0% test-suite :: MultiSource/Benchmarks/Prolangs-C/plot2fig/plot2fig.test 6.00 5.00 -16.7% Metric: exec_time Program exec_time results results0 diff test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 99.14 100.00 0.9% Other changes are not significant (less than 0.1% percent with exectime less 5 secs). SingleSource/Benchmarks/Adobe-C++/loop_unroll - same small patterns remain scalar, smaller code. External/SPEC/CFP2017rate/526.blender_r/526.blender_r - many small changes, some extra stores gets vectorized. External/SPEC/CINT2017speed/625.x264_s/625.x264_s External/SPEC/CINT2017rate/525.x264_r/525.x264_r x264 has one change in a loop body, in function ssim_end4, some code remain scalar, resulting in less code size. External/SPEC/CFP2017rate/511.povray_r/511.povray_r - some extra code gets vectorized, looks like some other patterns were matched. MultiSource/Benchmarks/7zip/7zip-benchmark - extra stores were vectorized (looks like the graphs become profitable) MultiSource/Benchmarks/MiBench/consumer-jpeg/consumer-jpeg - small changes in vectorized code (some small part remain scalar). External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s Many changes cause by the fact that the code of one function becomes smaller (onvertLCHabToRGB) and this functions gets inlined after that. MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/timberwolfmc - some small changes here and there, some extra code is vectorized, some remain scalar (2 x vectors) MultiSource/Benchmarks/VersaBench/ecbdes/ecbdes - emits 2 scalars + 2 insertelems instead of insert, broadcast, alt code (3 instructions, total 5 insts) MultiSource/Benchmarks/Prolangs-C/plot2fig/plot2fig - small graph becomes profitable and gets vectorized. External/SPEC/CINT2017rate/502.gcc_r/502.gcc_r External/SPEC/CINT2017speed/602.gcc_s/602.gcc_s Some small graph becomes profitable and gets vectorized. MultiSource/Benchmarks/FreeBench/pifft/pifft - no changes in final code. Reviewers: RKSimon, dtcxzyw Reviewed By: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/84978 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 128 ++++++++++++++++++ .../AArch64/extractelements-to-shuffle.ll | 40 +++--- .../X86/ext-int-reduced-not-operand.ll | 9 +- .../X86/gather-move-out-of-loop.ll | 10 +- ...gathered-delayed-nodes-with-reused-user.ll | 18 ++- .../non-scheduled-inst-reused-as-last-inst.ll | 12 +- .../X86/reorder_with_external_users.ll | 16 +-- .../SLPVectorizer/alternate-non-profitable.ll | 36 +++-- 8 files changed, 192 insertions(+), 77 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 6b758f63a7961..4ac719af60295 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2995,6 +2995,15 @@ class BoUpSLP { return ScalarToTreeEntry.lookup(V); } + /// Check that the operand node of alternate node does not generate + /// buildvector sequence. If it is, then probably not worth it to build + /// alternate shuffle, if number of buildvector operands + alternate + /// instruction > than the number of buildvector instructions. + /// \param S the instructions state of the analyzed values. + /// \param VL list of the instructions with alternate opcodes. + bool areAltOperandsProfitable(const InstructionsState &S, + ArrayRef VL) const; + /// Checks if the specified list of the instructions/values can be vectorized /// and fills required data before actual scheduling of the instructions. TreeEntry::EntryState getScalarsVectorizationState( @@ -5777,6 +5786,117 @@ static bool isAlternateInstruction(const Instruction *I, const Instruction *AltOp, const TargetLibraryInfo &TLI); +bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S, + ArrayRef VL) const { + unsigned Opcode0 = S.getOpcode(); + unsigned Opcode1 = S.getAltOpcode(); + // The opcode mask selects between the two opcodes. + SmallBitVector OpcodeMask(VL.size(), false); + for (unsigned Lane : seq(0, VL.size())) + if (cast(VL[Lane])->getOpcode() == Opcode1) + OpcodeMask.set(Lane); + // If this pattern is supported by the target then consider it profitable. + if (TTI->isLegalAltInstr(FixedVectorType::get(S.MainOp->getType(), VL.size()), + Opcode0, Opcode1, OpcodeMask)) + return true; + SmallVector Operands; + for (unsigned I : seq(0, S.MainOp->getNumOperands())) { + Operands.emplace_back(); + // Prepare the operand vector. + for (Value *V : VL) + Operands.back().push_back(cast(V)->getOperand(I)); + } + if (Operands.size() == 2) { + // Try find best operands candidates. + for (unsigned I : seq(0, VL.size() - 1)) { + SmallVector> Candidates(3); + Candidates[0] = std::make_pair(Operands[0][I], Operands[0][I + 1]); + Candidates[1] = std::make_pair(Operands[0][I], Operands[1][I + 1]); + Candidates[2] = std::make_pair(Operands[1][I], Operands[0][I + 1]); + std::optional Res = findBestRootPair(Candidates); + switch (Res.value_or(0)) { + case 0: + break; + case 1: + std::swap(Operands[0][I + 1], Operands[1][I + 1]); + break; + case 2: + std::swap(Operands[0][I], Operands[1][I]); + break; + default: + llvm_unreachable("Unexpected index."); + } + } + } + DenseSet UniqueOpcodes; + constexpr unsigned NumAltInsts = 3; // main + alt + shuffle. + unsigned NonInstCnt = 0; + // Estimate number of instructions, required for the vectorized node and for + // the buildvector node. + unsigned UndefCnt = 0; + // Count the number of extra shuffles, required for vector nodes. + unsigned ExtraShuffleInsts = 0; + // Check that operands do not contain same values and create either perfect + // diamond match or shuffled match. + if (Operands.size() == 2) { + // Do not count same operands twice. + if (Operands.front() == Operands.back()) { + Operands.erase(Operands.begin()); + } else if (!allConstant(Operands.front()) && + all_of(Operands.front(), [&](Value *V) { + return is_contained(Operands.back(), V); + })) { + Operands.erase(Operands.begin()); + ++ExtraShuffleInsts; + } + } + const Loop *L = LI->getLoopFor(S.MainOp->getParent()); + // Vectorize node, if: + // 1. at least single operand is constant or splat. + // 2. Operands have many loop invariants (the instructions are not loop + // invariants). + // 3. At least single unique operands is supposed to vectorized. + return none_of(Operands, + [&](ArrayRef Op) { + if (allConstant(Op) || + (!isSplat(Op) && allSameBlock(Op) && allSameType(Op) && + getSameOpcode(Op, *TLI).MainOp)) + return false; + DenseMap Uniques; + for (Value *V : Op) { + if (isa(V) || + getTreeEntry(V) || (L && L->isLoopInvariant(V))) { + if (isa(V)) + ++UndefCnt; + continue; + } + auto Res = Uniques.try_emplace(V, 0); + // Found first duplicate - need to add shuffle. + if (!Res.second && Res.first->second == 1) + ++ExtraShuffleInsts; + ++Res.first->getSecond(); + if (auto *I = dyn_cast(V)) + UniqueOpcodes.insert(I->getOpcode()); + else if (Res.second) + ++NonInstCnt; + } + return none_of(Uniques, [&](const auto &P) { + return P.first->hasNUsesOrMore(P.second + 1) && + none_of(P.first->users(), [&](User *U) { + return getTreeEntry(U) || Uniques.contains(U); + }); + }); + }) || + // Do not vectorize node, if estimated number of vector instructions is + // more than estimated number of buildvector instructions. Number of + // vector operands is number of vector instructions + number of vector + // instructions for operands (buildvectors). Number of buildvector + // instructions is just number_of_operands * number_of_scalars. + (UndefCnt < (VL.size() - 1) * S.MainOp->getNumOperands() && + (UniqueOpcodes.size() + NonInstCnt + ExtraShuffleInsts + + NumAltInsts) < S.MainOp->getNumOperands() * VL.size()); +} + BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( InstructionsState &S, ArrayRef VL, bool IsScatterVectorizeUserTE, OrdersType &CurrentOrder, SmallVectorImpl &PointerOps) const { @@ -6074,6 +6194,14 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); return TreeEntry::NeedToGather; } + if (!areAltOperandsProfitable(S, VL)) { + LLVM_DEBUG( + dbgs() + << "SLP: ShuffleVector not vectorized, operands are buildvector and " + "the whole alt sequence is not profitable.\n"); + return TreeEntry::NeedToGather; + } + return TreeEntry::Vectorize; } default: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll index d2711d0546c0a..283cc07dfb9b9 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll @@ -104,16 +104,16 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef ; CHECK-NEXT: [[AND95:%.*]] = and i32 [[B_0278]], 1 ; CHECK-NEXT: [[SHR96]] = lshr i32 [[A_0279]], 1 ; CHECK-NEXT: [[SHR97]] = lshr i32 [[B_0278]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> poison, i32 [[AND94]], i32 0 -; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP24:%.*]] = icmp eq <2 x i32> [[TMP23]], zeroinitializer -; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <2 x i32> [[TMP23]], zeroinitializer -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <2 x i1> [[TMP24]], <2 x i1> [[TMP25]], <4 x i32> -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x i32> poison, i32 [[AND95]], i32 0 -; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x i32> [[TMP27]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP29:%.*]] = icmp ne <2 x i32> [[TMP28]], zeroinitializer -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq <2 x i32> [[TMP28]], zeroinitializer -; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <2 x i1> [[TMP29]], <2 x i1> [[TMP30]], <4 x i32> +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND94]], 0 +; CHECK-NEXT: [[TOBOOL98:%.*]] = icmp ne i32 [[AND95]], 0 +; CHECK-NEXT: [[TOBOOL100:%.*]] = icmp eq i32 [[AND94]], 0 +; CHECK-NEXT: [[TOBOOL103:%.*]] = icmp eq i32 [[AND95]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL100]], i32 0 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i1> [[TMP22]], i1 [[TOBOOL]], i32 1 +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL98]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i1> [[TMP25]], i1 [[TOBOOL103]], i32 1 +; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i1> [[TMP27]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP26]], <4 x i1> [[TMP31]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP33:%.*]] = zext <4 x i1> [[TMP32]] to <4 x i32> ; CHECK-NEXT: [[TMP34]] = add <4 x i32> [[TMP21]], [[TMP33]] @@ -149,16 +149,16 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef ; CHECK-NEXT: [[AND134:%.*]] = and i32 [[B_1300]], 1 ; CHECK-NEXT: [[SHR135]] = lshr i32 [[A_1301]], 1 ; CHECK-NEXT: [[SHR136]] = lshr i32 [[B_1300]], 1 -; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i32> poison, i32 [[AND133]], i32 0 -; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <2 x i32> [[TMP39]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP41:%.*]] = icmp eq <2 x i32> [[TMP40]], zeroinitializer -; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <2 x i32> [[TMP40]], zeroinitializer -; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <2 x i1> [[TMP41]], <2 x i1> [[TMP42]], <4 x i32> -; CHECK-NEXT: [[TMP44:%.*]] = insertelement <2 x i32> poison, i32 [[AND134]], i32 0 -; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <2 x i32> [[TMP44]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <2 x i32> [[TMP45]], zeroinitializer -; CHECK-NEXT: [[TMP47:%.*]] = icmp eq <2 x i32> [[TMP45]], zeroinitializer -; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <2 x i1> [[TMP46]], <2 x i1> [[TMP47]], <4 x i32> +; CHECK-NEXT: [[TOBOOL137:%.*]] = icmp ne i32 [[AND133]], 0 +; CHECK-NEXT: [[TOBOOL139:%.*]] = icmp ne i32 [[AND134]], 0 +; CHECK-NEXT: [[TOBOOL144:%.*]] = icmp eq i32 [[AND133]], 0 +; CHECK-NEXT: [[TOBOOL147:%.*]] = icmp eq i32 [[AND134]], 0 +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL144]], i32 0 +; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i1> [[TMP40]], i1 [[TOBOOL137]], i32 1 +; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i1> [[TMP41]], <4 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL139]], i32 0 +; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP42]], i1 [[TOBOOL147]], i32 1 +; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <4 x i1> [[TMP39]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP43]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP50:%.*]] = zext <4 x i1> [[TMP49]] to <4 x i32> ; CHECK-NEXT: [[TMP51]] = add <4 x i32> [[TMP38]], [[TMP50]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll index 05534fa961ee4..b76e26e0fd571 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll @@ -9,13 +9,8 @@ define i64 @wombat() { ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: ; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ 0, [[BB1:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[PHI]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i64 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[PHI]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[PHI]] to i64 ; CHECK-NEXT: [[OR:%.*]] = or i64 [[TMP4]], [[TMP6]] ; CHECK-NEXT: ret i64 [[OR]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll index 78fc3a60f0514..3c3dea3f1ea88 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll @@ -4,14 +4,12 @@ define void @test(i16 %0) { ; CHECK-LABEL: @test( ; CHECK-NEXT: for.body92.preheader: -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> , i16 [[TMP0:%.*]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP5]], <4 x i32> ; CHECK-NEXT: br label [[FOR_BODY92:%.*]] ; CHECK: for.body92: +; CHECK-NEXT: [[CONV177_I:%.*]] = sext i16 0 to i32 +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> , i32 [[CONV177_I]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP1]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP6]] ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr undef, align 8 ; CHECK-NEXT: br label [[FOR_BODY92]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll index 16ede231c200e..19a8aa9b61815 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll @@ -6,21 +6,19 @@ define i64 @foo() { ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ] +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ADD:%.*]], [[BB3]] ] +; CHECK-NEXT: [[PHI2:%.*]] = phi i64 [ [[TMP9:%.*]], [[BB3]] ] ; CHECK-NEXT: ret i64 0 ; CHECK: bb3: ; CHECK-NEXT: [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ] ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ] -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> , i64 [[PHI5]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> , <2 x i32> -; CHECK-NEXT: [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1 -; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +; CHECK-NEXT: [[ADD]] = add i64 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 0 +; CHECK-NEXT: [[TMP9]] = or i64 [[PHI5]], 0 ; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP7]] = insertelement <2 x i64> , i64 [[ADD]], i32 0 ; CHECK-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]] ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll index 3a9eca2bf2e6b..59cd1c0ccddf8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll @@ -4,22 +4,22 @@ define void @foo() { ; CHECK-LABEL: define void @foo() { ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 0, i32 0 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[TMP2]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SHL]], i32 0 ; CHECK-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 ; CHECK-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]]) ; CHECK-NEXT: br label [[BB4]] ; CHECK: bb4: +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1 ; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]] ; CHECK: bb5: -; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP4]], [[BB4]] ] +; CHECK-NEXT: [[PHI6:%.*]] = phi i32 [ [[SHL]], [[BB4]] ] +; CHECK-NEXT: [[PHI7:%.*]] = phi i32 [ [[TMP8]], [[BB4]] ] ; CHECK-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll index 09b3d25fd6dc0..93258f2975f34 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll @@ -112,10 +112,10 @@ define void @addsub_and_external_users(ptr %A, ptr %ptr) { ; CHECK-NEXT: bb1: ; CHECK-NEXT: [[LD:%.*]] = load double, ptr undef, align 8 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> [[SHUFFLE]], -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP6]], <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> @@ -159,10 +159,10 @@ define void @subadd_and_external_users(ptr %A, ptr %ptr) { ; CHECK-NEXT: bb1: ; CHECK-NEXT: [[LD:%.*]] = load double, ptr undef, align 8 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[SHUFFLE]], -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP6]], <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], ; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[A:%.*]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll index c6e2cf5543e12..287b623f63690 100644 --- a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll +++ b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll @@ -33,11 +33,10 @@ define <2 x float> @replace_through_casts(i16 %inp) { ; CHECK-LABEL: define <2 x float> @replace_through_casts( ; CHECK-SAME: i16 [[INP:%.*]]) { ; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP]], -10 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> [[TMP1]], i16 [[ADD]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = uitofp <2 x i16> [[TMP2]] to <2 x float> -; CHECK-NEXT: [[TMP4:%.*]] = sitofp <2 x i16> [[TMP2]] to <2 x float> -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = uitofp i16 [[INP]] to float +; CHECK-NEXT: [[TMP2:%.*]] = sitofp i16 [[ADD]] to float +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP2]], i64 1 ; CHECK-NEXT: ret <2 x float> [[R]] ; %add = add nsw i16 %inp, -10 @@ -118,11 +117,10 @@ define <2 x i32> @replace_through_int_casts(i16 %inp, <2 x i16> %dead) { ; CHECK-LABEL: define <2 x i32> @replace_through_int_casts( ; CHECK-SAME: i16 [[INP:%.*]], <2 x i16> [[DEAD:%.*]]) { ; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP]], -10 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> [[TMP1]], i16 [[ADD]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[INP]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = sext i16 [[ADD]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP2]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[R]] ; %add = add nsw i16 %inp, -10 @@ -136,11 +134,10 @@ define <2 x i32> @replace_through_int_casts(i16 %inp, <2 x i16> %dead) { define <2 x i32> @replace_through_int_casts_ele0_only(i16 %inp, <2 x i16> %dead) { ; CHECK-LABEL: define <2 x i32> @replace_through_int_casts_ele0_only( ; CHECK-SAME: i16 [[INP:%.*]], <2 x i16> [[DEAD:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[INP]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[INP]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[R]] ; %2 = sext i16 %inp to i32 @@ -174,11 +171,10 @@ define <2 x i8> @replace_through_binop_preserve_flags(i8 %inp, <2 x i8> %d, <2 x ; CHECK-LABEL: define <2 x i8> @replace_through_binop_preserve_flags( ; CHECK-SAME: i8 [[INP:%.*]], <2 x i8> [[D:%.*]], <2 x i8> [[ANY:%.*]]) { ; CHECK-NEXT: [[ADD:%.*]] = xor i8 [[INP]], 5 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[ADD]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i8> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i8> [[TMP2]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[INP]], 123 +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i8 [[ADD]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i8> poison, i8 [[TMP1]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i8> [[TMP3]], i8 [[TMP2]], i64 1 ; CHECK-NEXT: ret <2 x i8> [[R]] ; %add = xor i8 %inp, 5 From 0a1317564a6b437760d96f0a227a3c910875428d Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Wed, 10 Apr 2024 20:34:58 +0200 Subject: [PATCH 057/886] [libc++] Adds a global private constructor tag. (#87920) This removes the similar tags used in the chrono tzdb implementation. Fixes: https://github.com/llvm/llvm-project/issues/85432 --- libcxx/include/CMakeLists.txt | 1 + libcxx/include/__chrono/leap_second.h | 4 +-- libcxx/include/__chrono/time_zone_link.h | 4 +-- libcxx/include/__locale | 12 ++++---- libcxx/include/__stop_token/stop_callback.h | 9 +++--- .../__utility/private_constructor_tag.h | 28 +++++++++++++++++++ libcxx/include/module.modulemap | 21 +++++++------- libcxx/src/CMakeLists.txt | 2 -- libcxx/src/include/tzdb/leap_second_private.h | 27 ------------------ .../src/include/tzdb/time_zone_link_private.h | 27 ------------------ libcxx/src/locale.cpp | 2 +- libcxx/src/tzdb.cpp | 6 ++-- .../private_constructor_tag.compile.pass.cpp | 19 +++++++++++++ .../time.zone.leap/assign.copy.pass.cpp | 2 -- .../time.zone.leap/cons.copy.pass.cpp | 2 -- .../time.zone.leap/members/date.pass.cpp | 2 -- .../time.zone.leap/members/value.pass.cpp | 2 -- .../nonmembers/comparison.pass.cpp | 2 -- libcxx/test/support/test_chrono_leap_second.h | 9 ++---- libcxx/utils/generate_iwyu_mapping.py | 1 + 20 files changed, 79 insertions(+), 103 deletions(-) create mode 100644 libcxx/include/__utility/private_constructor_tag.h delete mode 100644 libcxx/src/include/tzdb/leap_second_private.h delete mode 100644 libcxx/src/include/tzdb/time_zone_link_private.h create mode 100644 libcxx/test/libcxx/utilities/utility/private_constructor_tag.compile.pass.cpp diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index a4a58a787ee9a..d4e8c196a9a88 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -863,6 +863,7 @@ set(files __utility/pair.h __utility/piecewise_construct.h __utility/priority_tag.h + __utility/private_constructor_tag.h __utility/rel_ops.h __utility/small_buffer.h __utility/swap.h diff --git a/libcxx/include/__chrono/leap_second.h b/libcxx/include/__chrono/leap_second.h index 4e67cc2d65277..557abc15ff184 100644 --- a/libcxx/include/__chrono/leap_second.h +++ b/libcxx/include/__chrono/leap_second.h @@ -22,6 +22,7 @@ # include <__compare/ordering.h> # include <__compare/three_way_comparable.h> # include <__config> +# include <__utility/private_constructor_tag.h> # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -35,9 +36,8 @@ namespace chrono { class leap_second { public: - struct __constructor_tag; [[nodiscard]] - _LIBCPP_HIDE_FROM_ABI explicit constexpr leap_second(__constructor_tag&&, sys_seconds __date, seconds __value) + _LIBCPP_HIDE_FROM_ABI explicit constexpr leap_second(__private_constructor_tag, sys_seconds __date, seconds __value) : __date_(__date), __value_(__value) {} _LIBCPP_HIDE_FROM_ABI leap_second(const leap_second&) = default; diff --git a/libcxx/include/__chrono/time_zone_link.h b/libcxx/include/__chrono/time_zone_link.h index 17e915d2677a8..c76ddeff9f966 100644 --- a/libcxx/include/__chrono/time_zone_link.h +++ b/libcxx/include/__chrono/time_zone_link.h @@ -18,6 +18,7 @@ # include <__compare/strong_order.h> # include <__config> +# include <__utility/private_constructor_tag.h> # include # include @@ -37,9 +38,8 @@ namespace chrono { class time_zone_link { public: - struct __constructor_tag; _LIBCPP_NODISCARD_EXT - _LIBCPP_HIDE_FROM_ABI explicit time_zone_link(__constructor_tag&&, string_view __name, string_view __target) + _LIBCPP_HIDE_FROM_ABI explicit time_zone_link(__private_constructor_tag, string_view __name, string_view __target) : __name_{__name}, __target_{__target} {} _LIBCPP_HIDE_FROM_ABI time_zone_link(time_zone_link&&) = default; diff --git a/libcxx/include/__locale b/libcxx/include/__locale index fab87f0d6a279..36ac099d650e4 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -16,6 +16,7 @@ #include <__mutex/once_flag.h> #include <__type_traits/make_unsigned.h> #include <__utility/no_destroy.h> +#include <__utility/private_constructor_tag.h> #include #include #include @@ -97,8 +98,7 @@ private: template friend struct __no_destroy; - struct __private_tag {}; - _LIBCPP_HIDE_FROM_ABI explicit locale(__private_tag, __imp* __loc) : __locale_(__loc) {} + _LIBCPP_HIDE_FROM_ABI explicit locale(__private_constructor_tag, __imp* __loc) : __locale_(__loc) {} void __install_ctor(const locale&, facet*, long); static locale& __global(); @@ -1248,10 +1248,10 @@ extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; #endif -extern template class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS - codecvt_byname; // deprecated in C++20 -extern template class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS - codecvt_byname; // deprecated in C++20 +extern template class _LIBCPP_DEPRECATED_IN_CXX20 +_LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; // deprecated in C++20 +extern template class _LIBCPP_DEPRECATED_IN_CXX20 +_LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; // deprecated in C++20 #ifndef _LIBCPP_HAS_NO_CHAR8_T extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; // C++20 extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; // C++20 diff --git a/libcxx/include/__stop_token/stop_callback.h b/libcxx/include/__stop_token/stop_callback.h index 9e5b0338d4667..7b526820f98a3 100644 --- a/libcxx/include/__stop_token/stop_callback.h +++ b/libcxx/include/__stop_token/stop_callback.h @@ -21,6 +21,7 @@ #include <__type_traits/is_nothrow_constructible.h> #include <__utility/forward.h> #include <__utility/move.h> +#include <__utility/private_constructor_tag.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -49,13 +50,13 @@ class _LIBCPP_AVAILABILITY_SYNC stop_callback : private __stop_callback_base { requires constructible_from<_Callback, _Cb> _LIBCPP_HIDE_FROM_ABI explicit stop_callback(const stop_token& __st, _Cb&& __cb) noexcept(is_nothrow_constructible_v<_Callback, _Cb>) - : stop_callback(__private_tag{}, __st.__state_, std::forward<_Cb>(__cb)) {} + : stop_callback(__private_constructor_tag{}, __st.__state_, std::forward<_Cb>(__cb)) {} template requires constructible_from<_Callback, _Cb> _LIBCPP_HIDE_FROM_ABI explicit stop_callback(stop_token&& __st, _Cb&& __cb) noexcept(is_nothrow_constructible_v<_Callback, _Cb>) - : stop_callback(__private_tag{}, std::move(__st.__state_), std::forward<_Cb>(__cb)) {} + : stop_callback(__private_constructor_tag{}, std::move(__st.__state_), std::forward<_Cb>(__cb)) {} _LIBCPP_HIDE_FROM_ABI ~stop_callback() { if (__state_) { @@ -74,10 +75,8 @@ class _LIBCPP_AVAILABILITY_SYNC stop_callback : private __stop_callback_base { friend __stop_callback_base; - struct __private_tag {}; - template - _LIBCPP_HIDE_FROM_ABI explicit stop_callback(__private_tag, _StatePtr&& __state, _Cb&& __cb) noexcept( + _LIBCPP_HIDE_FROM_ABI explicit stop_callback(__private_constructor_tag, _StatePtr&& __state, _Cb&& __cb) noexcept( is_nothrow_constructible_v<_Callback, _Cb>) : __stop_callback_base([](__stop_callback_base* __cb_base) noexcept { // stop callback is supposed to only be called once diff --git a/libcxx/include/__utility/private_constructor_tag.h b/libcxx/include/__utility/private_constructor_tag.h new file mode 100644 index 0000000000000..462cab48c9edd --- /dev/null +++ b/libcxx/include/__utility/private_constructor_tag.h @@ -0,0 +1,28 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP__UTILITY_PRIVATE_CONSTRUCTOR_TAG_H +#define _LIBCPP__UTILITY_PRIVATE_CONSTRUCTOR_TAG_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +// This tag allows defining non-standard exposition-only constructors while +// preventing users from being able to use them, since this reserved-name tag +// needs to be used. +struct __private_constructor_tag {}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP__UTILITY_PRIVATE_CONSTRUCTOR_TAG_H diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 011a4818ab9d2..9c0f0ddd20c96 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -2090,18 +2090,19 @@ module std_private_utility_pair [system] { export std_private_type_traits_is_nothrow_move_assignable export std_private_utility_pair_fwd } -module std_private_utility_pair_fwd [system] { header "__fwd/pair.h" } -module std_private_utility_piecewise_construct [system] { header "__utility/piecewise_construct.h" } -module std_private_utility_priority_tag [system] { header "__utility/priority_tag.h" } -module std_private_utility_rel_ops [system] { header "__utility/rel_ops.h" } -module std_private_utility_small_buffer [system] { header "__utility/small_buffer.h" } -module std_private_utility_swap [system] { +module std_private_utility_pair_fwd [system] { header "__fwd/pair.h" } +module std_private_utility_piecewise_construct [system] { header "__utility/piecewise_construct.h" } +module std_private_utility_priority_tag [system] { header "__utility/priority_tag.h" } +module std_private_utility_private_constructor_tag [system] { header "__utility/private_constructor_tag.h" } +module std_private_utility_rel_ops [system] { header "__utility/rel_ops.h" } +module std_private_utility_small_buffer [system] { header "__utility/small_buffer.h" } +module std_private_utility_swap [system] { header "__utility/swap.h" export std_private_type_traits_is_swappable } -module std_private_utility_to_underlying [system] { header "__utility/to_underlying.h" } -module std_private_utility_unreachable [system] { header "__utility/unreachable.h" } +module std_private_utility_to_underlying [system] { header "__utility/to_underlying.h" } +module std_private_utility_unreachable [system] { header "__utility/unreachable.h" } -module std_private_variant_monostate [system] { header "__variant/monostate.h" } +module std_private_variant_monostate [system] { header "__variant/monostate.h" } -module std_private_vector_fwd [system] { header "__fwd/vector.h" } +module std_private_vector_fwd [system] { header "__fwd/vector.h" } diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index 16ccb80ba3326..208500ec14fcd 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -334,8 +334,6 @@ endif() if (LIBCXX_ENABLE_LOCALIZATION AND LIBCXX_ENABLE_FILESYSTEM AND LIBCXX_ENABLE_TIME_ZONE_DATABASE) list(APPEND LIBCXX_EXPERIMENTAL_SOURCES - include/tzdb/leap_second_private.h - include/tzdb/time_zone_link_private.h include/tzdb/time_zone_private.h include/tzdb/types_private.h include/tzdb/tzdb_list_private.h diff --git a/libcxx/src/include/tzdb/leap_second_private.h b/libcxx/src/include/tzdb/leap_second_private.h deleted file mode 100644 index 7a811ab197594..0000000000000 --- a/libcxx/src/include/tzdb/leap_second_private.h +++ /dev/null @@ -1,27 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html - -#ifndef _LIBCPP_SRC_INCLUDE_TZDB_LEAP_SECOND_PRIVATE_H -#define _LIBCPP_SRC_INCLUDE_TZDB_LEAP_SECOND_PRIVATE_H - -#include - -_LIBCPP_BEGIN_NAMESPACE_STD - -namespace chrono { - -struct leap_second::__constructor_tag {}; - -} // namespace chrono - -_LIBCPP_END_NAMESPACE_STD - -#endif // _LIBCPP_SRC_INCLUDE_TZDB_LEAP_SECOND_PRIVATE_H diff --git a/libcxx/src/include/tzdb/time_zone_link_private.h b/libcxx/src/include/tzdb/time_zone_link_private.h deleted file mode 100644 index 139237625274d..0000000000000 --- a/libcxx/src/include/tzdb/time_zone_link_private.h +++ /dev/null @@ -1,27 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html - -#ifndef _LIBCPP_SRC_INCLUDE_TZDB_TIME_ZONE_LINK_PRIVATE_H -#define _LIBCPP_SRC_INCLUDE_TZDB_TIME_ZONE_LINK_PRIVATE_H - -#include - -_LIBCPP_BEGIN_NAMESPACE_STD - -namespace chrono { - -struct time_zone_link::__constructor_tag {}; - -} // namespace chrono - -_LIBCPP_END_NAMESPACE_STD - -#endif // _LIBCPP_SRC_INCLUDE_TZDB_TIME_ZONE_LINK_PRIVATE_H diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp index 7fdd5be181ed9..1ca88e30f63ac 100644 --- a/libcxx/src/locale.cpp +++ b/libcxx/src/locale.cpp @@ -497,7 +497,7 @@ constinit __no_destroy locale::__imp::classic_locale_imp_(__uninitialized_tag{}); // initialized below in classic() const locale& locale::classic() { - static const __no_destroy classic_locale(__private_tag{}, [] { + static const __no_destroy classic_locale(__private_constructor_tag{}, [] { // executed exactly once on first initialization of `classic_locale` locale::__imp::classic_locale_imp_.__emplace(1u); return &locale::__imp::classic_locale_imp_.__get(); diff --git a/libcxx/src/tzdb.cpp b/libcxx/src/tzdb.cpp index 8909ecd026add..d521d810523ea 100644 --- a/libcxx/src/tzdb.cpp +++ b/libcxx/src/tzdb.cpp @@ -15,8 +15,6 @@ #include #include -#include "include/tzdb/leap_second_private.h" -#include "include/tzdb/time_zone_link_private.h" #include "include/tzdb/time_zone_private.h" #include "include/tzdb/types_private.h" #include "include/tzdb/tzdb_list_private.h" @@ -582,7 +580,7 @@ static void __parse_link(tzdb& __tzdb, istream& __input) { string __name = chrono::__parse_string(__input); chrono::__skip_line(__input); - __tzdb.links.emplace_back(time_zone_link::__constructor_tag{}, std::move(__name), std::move(__target)); + __tzdb.links.emplace_back(std::__private_constructor_tag{}, std::move(__name), std::move(__target)); } static void __parse_tzdata(tzdb& __db, __tz::__rules_storage_type& __rules, istream& __input) { @@ -649,7 +647,7 @@ static void __parse_leap_seconds(vector& __leap_seconds, istream&& seconds __value{chrono::__parse_integral(__input, false)}; chrono::__skip_line(__input); - __leap_seconds.emplace_back(leap_second::__constructor_tag{}, __date, __value); + __leap_seconds.emplace_back(std::__private_constructor_tag{}, __date, __value); } } diff --git a/libcxx/test/libcxx/utilities/utility/private_constructor_tag.compile.pass.cpp b/libcxx/test/libcxx/utilities/utility/private_constructor_tag.compile.pass.cpp new file mode 100644 index 0000000000000..097e05f29cebd --- /dev/null +++ b/libcxx/test/libcxx/utilities/utility/private_constructor_tag.compile.pass.cpp @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// struct __private_constructor_tag{}; + +// The private constructor tag is intended to be a trivial type that can easily +// be used to mark a constructor exposition-only. +// +// Tests whether the type is trivial. + +#include <__utility/private_constructor_tag.h> +#include + +static_assert(std::is_trivial::value, ""); diff --git a/libcxx/test/std/time/time.zone/time.zone.leap/assign.copy.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.leap/assign.copy.pass.cpp index 4d91e73f38e41..6918ed6be5c14 100644 --- a/libcxx/test/std/time/time.zone/time.zone.leap/assign.copy.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.leap/assign.copy.pass.cpp @@ -27,8 +27,6 @@ #include #include -// Add the include path required by test_chrono_leap_second.h when using libc++. -// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include #include "test_chrono_leap_second.h" constexpr bool test() { diff --git a/libcxx/test/std/time/time.zone/time.zone.leap/cons.copy.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.leap/cons.copy.pass.cpp index e2419b7d1f09d..3dad08968d12a 100644 --- a/libcxx/test/std/time/time.zone/time.zone.leap/cons.copy.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.leap/cons.copy.pass.cpp @@ -25,8 +25,6 @@ #include #include -// Add the include path required by test_chrono_leap_second.h when using libc++. -// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include #include "test_chrono_leap_second.h" constexpr bool test() { diff --git a/libcxx/test/std/time/time.zone/time.zone.leap/members/date.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.leap/members/date.pass.cpp index 23f95eccfdecd..6f9fe1c47d351 100644 --- a/libcxx/test/std/time/time.zone/time.zone.leap/members/date.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.leap/members/date.pass.cpp @@ -23,8 +23,6 @@ #include "test_macros.h" -// Add the include path required by test_chrono_leap_second.h when using libc++. -// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include #include "test_chrono_leap_second.h" constexpr void test(const std::chrono::leap_second leap_second, std::chrono::sys_seconds expected) { diff --git a/libcxx/test/std/time/time.zone/time.zone.leap/members/value.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.leap/members/value.pass.cpp index 844c74d002ac5..652e51ef0bf10 100644 --- a/libcxx/test/std/time/time.zone/time.zone.leap/members/value.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.leap/members/value.pass.cpp @@ -23,8 +23,6 @@ #include "test_macros.h" -// Add the include path required by test_chrono_leap_second.h when using libc++. -// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include #include "test_chrono_leap_second.h" constexpr void test(const std::chrono::leap_second leap_second, std::chrono::seconds expected) { diff --git a/libcxx/test/std/time/time.zone/time.zone.leap/nonmembers/comparison.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.leap/nonmembers/comparison.pass.cpp index ac8b780af854d..bf6855ea63dfc 100644 --- a/libcxx/test/std/time/time.zone/time.zone.leap/nonmembers/comparison.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.leap/nonmembers/comparison.pass.cpp @@ -50,8 +50,6 @@ #include "test_macros.h" #include "test_comparisons.h" -// Add the include path required by test_chrono_leap_second.h when using libc++. -// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include #include "test_chrono_leap_second.h" constexpr void test_comparison(const std::chrono::leap_second lhs, const std::chrono::leap_second rhs) { diff --git a/libcxx/test/support/test_chrono_leap_second.h b/libcxx/test/support/test_chrono_leap_second.h index 485f68d91b1a1..be5ce760bfe98 100644 --- a/libcxx/test/support/test_chrono_leap_second.h +++ b/libcxx/test/support/test_chrono_leap_second.h @@ -32,16 +32,11 @@ #ifdef _LIBCPP_VERSION -// In order to find this include the calling test needs to provide this path in -// the search path. Typically this looks like: -// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include -// where the number of `../` sequences depends on the subdirectory level of the -// test. -# include "tzdb/leap_second_private.h" // Header in the dylib +# include <__utility/private_constructor_tag.h> inline constexpr std::chrono::leap_second test_leap_second_create(const std::chrono::sys_seconds& date, const std::chrono::seconds& value) { - return std::chrono::leap_second{std::chrono::leap_second::__constructor_tag{}, date, value}; + return std::chrono::leap_second{std::__private_constructor_tag{}, date, value}; } #else // _LIBCPP_VERSION diff --git a/libcxx/utils/generate_iwyu_mapping.py b/libcxx/utils/generate_iwyu_mapping.py index 8ab7b86299edc..2265438ab49cc 100644 --- a/libcxx/utils/generate_iwyu_mapping.py +++ b/libcxx/utils/generate_iwyu_mapping.py @@ -11,6 +11,7 @@ def IWYU_mapping(header: str) -> typing.Optional[typing.List[str]]: "__debug_utils/.+", "__fwd/get[.]h", "__support/.+", + "__utility/private_constructor_tag.h", ] if any(re.match(pattern, header) for pattern in ignore): return None From f81879c0f70ee5a1cf1d5b716dfd49d1a271cc2d Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 10 Apr 2024 13:35:04 -0500 Subject: [PATCH 058/886] [Libomptarget] Add RPC-based printf implementation for OpenMP #85638 Summary: Relanding after reverting, only applies to AMDGPU for now. This patch adds an implementation of printf that's provided by the GPU C library runtime. This pritnf currently implemented using the same wrapper handling that OpenMP sets up. This will be removed once we have proper varargs support. This printf differs from the one CUDA offers in that it is synchronous and uses a finite size. Additionally we support pretty much every format specifier except the %n option. Depends on #85331 --- openmp/libomptarget/DeviceRTL/CMakeLists.txt | 5 +++++ openmp/libomptarget/DeviceRTL/src/LibC.cpp | 15 ++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 2509f1276ccee..2e7f28df24d64 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -122,6 +122,11 @@ set(clang_opt_flags -O3 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=512 set(link_opt_flags -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false ) set(link_export_flag -passes=internalize -internalize-public-api-file=${source_directory}/exports) +# If the user built with the GPU C library enabled we will use that instead. +if(${LIBOMPTARGET_GPU_LIBC_SUPPORT}) + list(APPEND clang_opt_flags -DOMPTARGET_HAS_LIBC) +endif() + # Prepend -I to each list element set (LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL "${LIBOMPTARGET_LLVM_INCLUDE_DIRS}") list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I") diff --git a/openmp/libomptarget/DeviceRTL/src/LibC.cpp b/openmp/libomptarget/DeviceRTL/src/LibC.cpp index af675b97256f6..e587c3057f5ba 100644 --- a/openmp/libomptarget/DeviceRTL/src/LibC.cpp +++ b/openmp/libomptarget/DeviceRTL/src/LibC.cpp @@ -25,13 +25,26 @@ int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) { } // namespace impl #pragma omp end declare variant -// We do not have a vprintf implementation for AMD GPU yet so we use a stub. #pragma omp begin declare variant match(device = {arch(amdgcn)}) + +#ifdef OMPTARGET_HAS_LIBC +// TODO: Remove this handling once we have varargs support. +extern "C" struct FILE *stdout; +extern "C" int32_t rpc_fprintf(FILE *, const char *, void *, uint64_t); + +namespace impl { +int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t Size) { + return rpc_fprintf(stdout, Format, Arguments, Size); +} +} // namespace impl +#else +// We do not have a vprintf implementation for AMD GPU so we use a stub. namespace impl { int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) { return -1; } } // namespace impl +#endif #pragma omp end declare variant extern "C" { From fad14707b73d6387e6276507e1c5726e67f08cd6 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 10 Apr 2024 14:07:18 -0500 Subject: [PATCH 059/886] [libc] Add note to use `LIBC_GPU_BUILD=ON` as another form Summary: This is a shorthand to enable GPU support so it should be listed in the docs. --- libc/docs/gpu/building.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libc/docs/gpu/building.rst b/libc/docs/gpu/building.rst index 6d94134a407d3..d3e64c6d42431 100644 --- a/libc/docs/gpu/building.rst +++ b/libc/docs/gpu/building.rst @@ -33,7 +33,8 @@ The simplest way to build the GPU libc is to use the existing LLVM runtimes support. This will automatically handle bootstrapping an up-to-date ``clang`` compiler and using it to build the C library. The following CMake invocation will instruct it to build the ``libc`` runtime targeting both AMD and NVIDIA -GPUs. +GPUs. The ``LIBC_GPU_BUILD`` option can also be enabled to add the relevant +arguments automatically. .. code-block:: sh @@ -234,6 +235,10 @@ standard runtime build. This flag controls whether or not the libc build will generate its own headers. This must always be on when targeting the GPU. +**LIBC_GPU_BUILD**:BOOL + Shorthand for enabling GPU support. Equivalent to enabling support for both + AMDGPU and NVPTX builds for ``libc``. + **LIBC_GPU_TEST_ARCHITECTURE**:STRING Sets the architecture used to build the GPU tests for, such as ``gfx90a`` or ``sm_80`` for AMD and NVIDIA GPUs respectively. The default behavior is to From ca6b8469c16edfe1713e9050dca3cd68bd585410 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 10 Apr 2024 12:20:28 -0700 Subject: [PATCH 060/886] [ELF] Avoid unneeded config->isLE and config->wordsize. NFC --- lld/ELF/SyntheticSections.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 550659464a440..d8791e83dc9e5 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2769,7 +2769,8 @@ readPubNamesAndTypes(const LLDDwarfObj &obj, SmallVector ret; for (const LLDDWARFSection *pub : {&pubNames, &pubTypes}) { - DWARFDataExtractor data(obj, *pub, config->isLE, config->wordsize); + DWARFDataExtractor data(obj, *pub, ELFT::Endianness == endianness::little, + ELFT::Is64Bits ? 8 : 4); DWARFDebugPubTable table; table.extract(data, /*GnuStyle=*/true, [&](Error e) { warn(toString(pub->sec) + ": " + toString(std::move(e))); @@ -3744,8 +3745,9 @@ template void elf::writeEhdr(uint8_t *buf, Partition &part) { memcpy(buf, "\177ELF", 4); auto *eHdr = reinterpret_cast(buf); - eHdr->e_ident[EI_CLASS] = config->is64 ? ELFCLASS64 : ELFCLASS32; - eHdr->e_ident[EI_DATA] = config->isLE ? ELFDATA2LSB : ELFDATA2MSB; + eHdr->e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32; + eHdr->e_ident[EI_DATA] = + ELFT::Endianness == endianness::little ? ELFDATA2LSB : ELFDATA2MSB; eHdr->e_ident[EI_VERSION] = EV_CURRENT; eHdr->e_ident[EI_OSABI] = config->osabi; eHdr->e_ident[EI_ABIVERSION] = getAbiVersion(); From e3ef4612c18845876cda9a13c3435e102f74a3aa Mon Sep 17 00:00:00 2001 From: shamithoke <152091883+shamithoke@users.noreply.github.com> Date: Thu, 11 Apr 2024 00:52:44 +0530 Subject: [PATCH 061/886] Perform bitreverse using AVX512 GFNI for i32 and i64. (#81764) Currently, the lowering operation for bitreverse using Intel AVX512 GFNI only supports byte vectors Extend the operation to i32 and i64. --------- Co-authored-by: shami --- llvm/lib/Target/X86/X86ISelLowering.cpp | 22 ++ llvm/test/CodeGen/X86/bitreverse.ll | 281 ++++----------------- llvm/test/CodeGen/X86/vector-bitreverse.ll | 46 +--- 3 files changed, 84 insertions(+), 265 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 010f9c30ab403..52be35aafb0f5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1496,6 +1496,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::TRUNCATE, MVT::v32i32, Custom); setOperationAction(ISD::TRUNCATE, MVT::v32i64, Custom); + if (Subtarget.hasGFNI()) { + setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); + setOperationAction(ISD::BITREVERSE, MVT::i64, Custom); + } + for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::CTPOP, VT, Custom); @@ -31332,6 +31337,23 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, if (VT.is256BitVector() && !Subtarget.hasInt256()) return splitVectorIntUnary(Op, DAG, DL); + // Lower i32/i64 to GFNI as vXi8 BITREVERSE + BSWAP + if (!VT.isVector()) { + + assert((VT.getScalarType() == MVT::i32) || + (VT.getScalarType() == MVT::i64)); + + MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits()); + SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In); + Res = DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8, + DAG.getBitcast(MVT::v16i8, Res)); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, + DAG.getBitcast(VecVT, Res), DAG.getIntPtrConstant(0, DL)); + return DAG.getNode(ISD::BSWAP, DL, VT, Res); + } + + assert(VT.isVector() && VT.getSizeInBits() >= 128); + // Lower vXi16/vXi32/vXi64 as BSWAP + vXi8 BITREVERSE. if (VT.getScalarType() != MVT::i8) { MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll index 26b1d64874e59..704563ab1bbf7 100644 --- a/llvm/test/CodeGen/X86/bitreverse.ll +++ b/llvm/test/CodeGen/X86/bitreverse.ll @@ -172,26 +172,10 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind { ; ; GFNI-LABEL: test_bitreverse_i64: ; GFNI: # %bb.0: -; GFNI-NEXT: bswapq %rdi -; GFNI-NEXT: movq %rdi, %rax -; GFNI-NEXT: shrq $4, %rax -; GFNI-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F -; GFNI-NEXT: andq %rcx, %rax -; GFNI-NEXT: andq %rcx, %rdi -; GFNI-NEXT: shlq $4, %rdi -; GFNI-NEXT: orq %rax, %rdi -; GFNI-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 -; GFNI-NEXT: movq %rdi, %rcx -; GFNI-NEXT: andq %rax, %rcx -; GFNI-NEXT: shrq $2, %rdi -; GFNI-NEXT: andq %rax, %rdi -; GFNI-NEXT: leaq (%rdi,%rcx,4), %rax -; GFNI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 -; GFNI-NEXT: movq %rax, %rdx -; GFNI-NEXT: andq %rcx, %rdx -; GFNI-NEXT: shrq %rax -; GFNI-NEXT: andq %rcx, %rax -; GFNI-NEXT: leaq (%rax,%rdx,2), %rax +; GFNI-NEXT: vmovq %rdi, %xmm0 +; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; GFNI-NEXT: vmovq %xmm0, %rax +; GFNI-NEXT: bswapq %rax ; GFNI-NEXT: retq %b = call i64 @llvm.bitreverse.i64(i64 %a) ret i64 %b @@ -253,24 +237,10 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind { ; ; GFNI-LABEL: test_bitreverse_i32: ; GFNI: # %bb.0: -; GFNI-NEXT: # kill: def $edi killed $edi def $rdi -; GFNI-NEXT: bswapl %edi -; GFNI-NEXT: movl %edi, %eax -; GFNI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F -; GFNI-NEXT: shll $4, %eax -; GFNI-NEXT: shrl $4, %edi -; GFNI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; GFNI-NEXT: orl %eax, %edi -; GFNI-NEXT: movl %edi, %eax -; GFNI-NEXT: andl $858993459, %eax # imm = 0x33333333 -; GFNI-NEXT: shrl $2, %edi -; GFNI-NEXT: andl $858993459, %edi # imm = 0x33333333 -; GFNI-NEXT: leal (%rdi,%rax,4), %eax -; GFNI-NEXT: movl %eax, %ecx -; GFNI-NEXT: andl $1431655765, %ecx # imm = 0x55555555 -; GFNI-NEXT: shrl %eax -; GFNI-NEXT: andl $1431655765, %eax # imm = 0x55555555 -; GFNI-NEXT: leal (%rax,%rcx,2), %eax +; GFNI-NEXT: vmovd %edi, %xmm0 +; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; GFNI-NEXT: vmovd %xmm0, %eax +; GFNI-NEXT: bswapl %eax ; GFNI-NEXT: retq %b = call i32 @llvm.bitreverse.i32(i32 %a) ret i32 %b @@ -335,24 +305,10 @@ define i24 @test_bitreverse_i24(i24 %a) nounwind { ; ; GFNI-LABEL: test_bitreverse_i24: ; GFNI: # %bb.0: -; GFNI-NEXT: # kill: def $edi killed $edi def $rdi -; GFNI-NEXT: bswapl %edi -; GFNI-NEXT: movl %edi, %eax -; GFNI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F -; GFNI-NEXT: shll $4, %eax -; GFNI-NEXT: shrl $4, %edi -; GFNI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; GFNI-NEXT: orl %eax, %edi -; GFNI-NEXT: movl %edi, %eax -; GFNI-NEXT: andl $858993459, %eax # imm = 0x33333333 -; GFNI-NEXT: shrl $2, %edi -; GFNI-NEXT: andl $858993459, %edi # imm = 0x33333333 -; GFNI-NEXT: leal (%rdi,%rax,4), %eax -; GFNI-NEXT: movl %eax, %ecx -; GFNI-NEXT: andl $1431655680, %ecx # imm = 0x55555500 -; GFNI-NEXT: shrl %eax -; GFNI-NEXT: andl $1431655680, %eax # imm = 0x55555500 -; GFNI-NEXT: leal (%rax,%rcx,2), %eax +; GFNI-NEXT: vmovd %edi, %xmm0 +; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; GFNI-NEXT: vmovd %xmm0, %eax +; GFNI-NEXT: bswapl %eax ; GFNI-NEXT: shrl $8, %eax ; GFNI-NEXT: retq %b = call i24 @llvm.bitreverse.i24(i24 %a) @@ -1412,196 +1368,67 @@ define i528 @large_promotion(i528 %A) nounwind { ; ; GFNI-LABEL: large_promotion: ; GFNI: # %bb.0: -; GFNI-NEXT: pushq %r15 ; GFNI-NEXT: pushq %r14 -; GFNI-NEXT: pushq %r13 -; GFNI-NEXT: pushq %r12 ; GFNI-NEXT: pushq %rbx ; GFNI-NEXT: movq %rdi, %rax -; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %r12 -; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %r15 -; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %rbx -; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; GFNI-NEXT: vpbroadcastq {{.*#+}} xmm0 = [9241421688590303745,9241421688590303745] +; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 +; GFNI-NEXT: vmovq %xmm1, %r10 +; GFNI-NEXT: bswapq %r10 +; GFNI-NEXT: vmovq %r9, %xmm1 +; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 +; GFNI-NEXT: vmovq %xmm1, %rdi ; GFNI-NEXT: bswapq %rdi -; GFNI-NEXT: movq %rdi, %r10 -; GFNI-NEXT: shrq $4, %r10 -; GFNI-NEXT: movabsq $1085102592571150095, %r11 # imm = 0xF0F0F0F0F0F0F0F -; GFNI-NEXT: andq %r11, %r10 -; GFNI-NEXT: andq %r11, %rdi -; GFNI-NEXT: shlq $4, %rdi -; GFNI-NEXT: orq %r10, %rdi -; GFNI-NEXT: movabsq $3689348814741910323, %r10 # imm = 0x3333333333333333 -; GFNI-NEXT: movq %rdi, %r14 -; GFNI-NEXT: andq %r10, %r14 -; GFNI-NEXT: shrq $2, %rdi -; GFNI-NEXT: andq %r10, %rdi -; GFNI-NEXT: leaq (%rdi,%r14,4), %rdi -; GFNI-NEXT: movabsq $6148820866244280320, %r14 # imm = 0x5555000000000000 -; GFNI-NEXT: movq %rdi, %r13 -; GFNI-NEXT: andq %r14, %r13 -; GFNI-NEXT: shrq %rdi -; GFNI-NEXT: andq %r14, %rdi -; GFNI-NEXT: leaq (%rdi,%r13,2), %rdi -; GFNI-NEXT: bswapq %rbx -; GFNI-NEXT: movq %rbx, %r14 -; GFNI-NEXT: shrq $4, %r14 -; GFNI-NEXT: andq %r11, %r14 -; GFNI-NEXT: andq %r11, %rbx -; GFNI-NEXT: shlq $4, %rbx -; GFNI-NEXT: orq %r14, %rbx -; GFNI-NEXT: movq %rbx, %r14 -; GFNI-NEXT: andq %r10, %r14 -; GFNI-NEXT: shrq $2, %rbx -; GFNI-NEXT: andq %r10, %rbx -; GFNI-NEXT: leaq (%rbx,%r14,4), %rbx -; GFNI-NEXT: movabsq $6148914691236517205, %r14 # imm = 0x5555555555555555 -; GFNI-NEXT: movq %rbx, %r13 -; GFNI-NEXT: andq %r14, %r13 -; GFNI-NEXT: shrq %rbx -; GFNI-NEXT: andq %r14, %rbx -; GFNI-NEXT: leaq (%rbx,%r13,2), %rbx -; GFNI-NEXT: shrdq $48, %rbx, %rdi -; GFNI-NEXT: bswapq %r15 -; GFNI-NEXT: movq %r15, %r13 -; GFNI-NEXT: shrq $4, %r13 -; GFNI-NEXT: andq %r11, %r13 -; GFNI-NEXT: andq %r11, %r15 -; GFNI-NEXT: shlq $4, %r15 -; GFNI-NEXT: orq %r13, %r15 -; GFNI-NEXT: movq %r15, %r13 -; GFNI-NEXT: andq %r10, %r13 -; GFNI-NEXT: shrq $2, %r15 -; GFNI-NEXT: andq %r10, %r15 -; GFNI-NEXT: leaq (%r15,%r13,4), %r15 -; GFNI-NEXT: movq %r15, %r13 -; GFNI-NEXT: andq %r14, %r13 -; GFNI-NEXT: shrq %r15 -; GFNI-NEXT: andq %r14, %r15 -; GFNI-NEXT: leaq (%r15,%r13,2), %r15 -; GFNI-NEXT: shrdq $48, %r15, %rbx -; GFNI-NEXT: bswapq %r12 -; GFNI-NEXT: movq %r12, %r13 -; GFNI-NEXT: shrq $4, %r13 -; GFNI-NEXT: andq %r11, %r13 -; GFNI-NEXT: andq %r11, %r12 -; GFNI-NEXT: shlq $4, %r12 -; GFNI-NEXT: orq %r13, %r12 -; GFNI-NEXT: movq %r12, %r13 -; GFNI-NEXT: andq %r10, %r13 -; GFNI-NEXT: shrq $2, %r12 -; GFNI-NEXT: andq %r10, %r12 -; GFNI-NEXT: leaq (%r12,%r13,4), %r12 -; GFNI-NEXT: movq %r12, %r13 -; GFNI-NEXT: andq %r14, %r13 -; GFNI-NEXT: shrq %r12 -; GFNI-NEXT: andq %r14, %r12 -; GFNI-NEXT: leaq (%r12,%r13,2), %r12 -; GFNI-NEXT: shrdq $48, %r12, %r15 -; GFNI-NEXT: bswapq %r9 -; GFNI-NEXT: movq %r9, %r13 -; GFNI-NEXT: shrq $4, %r13 -; GFNI-NEXT: andq %r11, %r13 -; GFNI-NEXT: andq %r11, %r9 -; GFNI-NEXT: shlq $4, %r9 -; GFNI-NEXT: orq %r13, %r9 -; GFNI-NEXT: movq %r9, %r13 -; GFNI-NEXT: andq %r10, %r13 -; GFNI-NEXT: shrq $2, %r9 -; GFNI-NEXT: andq %r10, %r9 -; GFNI-NEXT: leaq (%r9,%r13,4), %r9 -; GFNI-NEXT: movq %r9, %r13 -; GFNI-NEXT: andq %r14, %r13 -; GFNI-NEXT: shrq %r9 -; GFNI-NEXT: andq %r14, %r9 -; GFNI-NEXT: leaq (%r9,%r13,2), %r9 -; GFNI-NEXT: shrdq $48, %r9, %r12 +; GFNI-NEXT: vmovq %r8, %xmm1 +; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 +; GFNI-NEXT: vmovq %xmm1, %r8 ; GFNI-NEXT: bswapq %r8 -; GFNI-NEXT: movq %r8, %r13 -; GFNI-NEXT: shrq $4, %r13 -; GFNI-NEXT: andq %r11, %r13 -; GFNI-NEXT: andq %r11, %r8 -; GFNI-NEXT: shlq $4, %r8 -; GFNI-NEXT: orq %r13, %r8 -; GFNI-NEXT: movq %r8, %r13 -; GFNI-NEXT: andq %r10, %r13 -; GFNI-NEXT: shrq $2, %r8 -; GFNI-NEXT: andq %r10, %r8 -; GFNI-NEXT: leaq (%r8,%r13,4), %r8 -; GFNI-NEXT: movq %r8, %r13 -; GFNI-NEXT: andq %r14, %r13 -; GFNI-NEXT: shrq %r8 -; GFNI-NEXT: andq %r14, %r8 -; GFNI-NEXT: leaq (%r8,%r13,2), %r8 -; GFNI-NEXT: shrdq $48, %r8, %r9 +; GFNI-NEXT: movq %r8, %r9 +; GFNI-NEXT: shldq $16, %rdi, %r9 +; GFNI-NEXT: shldq $16, %r10, %rdi +; GFNI-NEXT: vmovq %rcx, %xmm1 +; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 +; GFNI-NEXT: vmovq %xmm1, %rcx ; GFNI-NEXT: bswapq %rcx -; GFNI-NEXT: movq %rcx, %r13 -; GFNI-NEXT: shrq $4, %r13 -; GFNI-NEXT: andq %r11, %r13 -; GFNI-NEXT: andq %r11, %rcx -; GFNI-NEXT: shlq $4, %rcx -; GFNI-NEXT: orq %r13, %rcx -; GFNI-NEXT: movq %rcx, %r13 -; GFNI-NEXT: andq %r10, %r13 -; GFNI-NEXT: shrq $2, %rcx -; GFNI-NEXT: andq %r10, %rcx -; GFNI-NEXT: leaq (%rcx,%r13,4), %rcx -; GFNI-NEXT: movq %rcx, %r13 -; GFNI-NEXT: andq %r14, %r13 -; GFNI-NEXT: shrq %rcx -; GFNI-NEXT: andq %r14, %rcx -; GFNI-NEXT: leaq (%rcx,%r13,2), %rcx ; GFNI-NEXT: shrdq $48, %rcx, %r8 +; GFNI-NEXT: vmovq %rdx, %xmm1 +; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 +; GFNI-NEXT: vmovq %xmm1, %rdx ; GFNI-NEXT: bswapq %rdx -; GFNI-NEXT: movq %rdx, %r13 -; GFNI-NEXT: shrq $4, %r13 -; GFNI-NEXT: andq %r11, %r13 -; GFNI-NEXT: andq %r11, %rdx -; GFNI-NEXT: shlq $4, %rdx -; GFNI-NEXT: orq %r13, %rdx -; GFNI-NEXT: movq %rdx, %r13 -; GFNI-NEXT: andq %r10, %r13 -; GFNI-NEXT: shrq $2, %rdx -; GFNI-NEXT: andq %r10, %rdx -; GFNI-NEXT: leaq (%rdx,%r13,4), %rdx -; GFNI-NEXT: movq %rdx, %r13 -; GFNI-NEXT: andq %r14, %r13 -; GFNI-NEXT: shrq %rdx -; GFNI-NEXT: andq %r14, %rdx -; GFNI-NEXT: leaq (%rdx,%r13,2), %rdx ; GFNI-NEXT: shrdq $48, %rdx, %rcx +; GFNI-NEXT: vmovq %rsi, %xmm1 +; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 +; GFNI-NEXT: vmovq %xmm1, %rsi ; GFNI-NEXT: bswapq %rsi -; GFNI-NEXT: movq %rsi, %r13 -; GFNI-NEXT: shrq $4, %r13 -; GFNI-NEXT: andq %r11, %r13 -; GFNI-NEXT: andq %r11, %rsi -; GFNI-NEXT: shlq $4, %rsi -; GFNI-NEXT: orq %r13, %rsi -; GFNI-NEXT: movq %rsi, %r11 -; GFNI-NEXT: andq %r10, %r11 -; GFNI-NEXT: shrq $2, %rsi -; GFNI-NEXT: andq %r10, %rsi -; GFNI-NEXT: leaq (%rsi,%r11,4), %rsi -; GFNI-NEXT: movq %rsi, %r10 -; GFNI-NEXT: andq %r14, %r10 -; GFNI-NEXT: shrq %rsi -; GFNI-NEXT: andq %r14, %rsi -; GFNI-NEXT: leaq (%rsi,%r10,2), %rsi ; GFNI-NEXT: shrdq $48, %rsi, %rdx +; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 +; GFNI-NEXT: vmovq %xmm1, %r11 +; GFNI-NEXT: bswapq %r11 +; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 +; GFNI-NEXT: vmovq %xmm1, %rbx +; GFNI-NEXT: bswapq %rbx +; GFNI-NEXT: shrdq $48, %rbx, %r11 +; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm0 +; GFNI-NEXT: vmovq %xmm0, %r14 +; GFNI-NEXT: bswapq %r14 +; GFNI-NEXT: shrdq $48, %r14, %rbx +; GFNI-NEXT: shrdq $48, %r10, %r14 ; GFNI-NEXT: shrq $48, %rsi +; GFNI-NEXT: movq %r14, 16(%rax) +; GFNI-NEXT: movq %rbx, 8(%rax) +; GFNI-NEXT: movq %r11, (%rax) ; GFNI-NEXT: movq %rdx, 56(%rax) ; GFNI-NEXT: movq %rcx, 48(%rax) ; GFNI-NEXT: movq %r8, 40(%rax) ; GFNI-NEXT: movq %r9, 32(%rax) -; GFNI-NEXT: movq %r12, 24(%rax) -; GFNI-NEXT: movq %r15, 16(%rax) -; GFNI-NEXT: movq %rbx, 8(%rax) -; GFNI-NEXT: movq %rdi, (%rax) +; GFNI-NEXT: movq %rdi, 24(%rax) ; GFNI-NEXT: movw %si, 64(%rax) ; GFNI-NEXT: popq %rbx -; GFNI-NEXT: popq %r12 -; GFNI-NEXT: popq %r13 ; GFNI-NEXT: popq %r14 -; GFNI-NEXT: popq %r15 ; GFNI-NEXT: retq %Z = call i528 @llvm.bitreverse.i528(i528 %A) ret i528 %Z diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll index d3f357cd17952..1c5326d35bb00 100644 --- a/llvm/test/CodeGen/X86/vector-bitreverse.ll +++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll @@ -276,24 +276,10 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind { ; ; GFNIAVX-LABEL: test_bitreverse_i32: ; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: # kill: def $edi killed $edi def $rdi -; GFNIAVX-NEXT: bswapl %edi -; GFNIAVX-NEXT: movl %edi, %eax -; GFNIAVX-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F -; GFNIAVX-NEXT: shll $4, %eax -; GFNIAVX-NEXT: shrl $4, %edi -; GFNIAVX-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; GFNIAVX-NEXT: orl %eax, %edi -; GFNIAVX-NEXT: movl %edi, %eax -; GFNIAVX-NEXT: andl $858993459, %eax # imm = 0x33333333 -; GFNIAVX-NEXT: shrl $2, %edi -; GFNIAVX-NEXT: andl $858993459, %edi # imm = 0x33333333 -; GFNIAVX-NEXT: leal (%rdi,%rax,4), %eax -; GFNIAVX-NEXT: movl %eax, %ecx -; GFNIAVX-NEXT: andl $1431655765, %ecx # imm = 0x55555555 -; GFNIAVX-NEXT: shrl %eax -; GFNIAVX-NEXT: andl $1431655765, %eax # imm = 0x55555555 -; GFNIAVX-NEXT: leal (%rax,%rcx,2), %eax +; GFNIAVX-NEXT: vmovd %edi, %xmm0 +; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX-NEXT: vmovd %xmm0, %eax +; GFNIAVX-NEXT: bswapl %eax ; GFNIAVX-NEXT: retq %b = call i32 @llvm.bitreverse.i32(i32 %a) ret i32 %b @@ -381,26 +367,10 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind { ; ; GFNIAVX-LABEL: test_bitreverse_i64: ; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: bswapq %rdi -; GFNIAVX-NEXT: movq %rdi, %rax -; GFNIAVX-NEXT: shrq $4, %rax -; GFNIAVX-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F -; GFNIAVX-NEXT: andq %rcx, %rax -; GFNIAVX-NEXT: andq %rcx, %rdi -; GFNIAVX-NEXT: shlq $4, %rdi -; GFNIAVX-NEXT: orq %rax, %rdi -; GFNIAVX-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 -; GFNIAVX-NEXT: movq %rdi, %rcx -; GFNIAVX-NEXT: andq %rax, %rcx -; GFNIAVX-NEXT: shrq $2, %rdi -; GFNIAVX-NEXT: andq %rax, %rdi -; GFNIAVX-NEXT: leaq (%rdi,%rcx,4), %rax -; GFNIAVX-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 -; GFNIAVX-NEXT: movq %rax, %rdx -; GFNIAVX-NEXT: andq %rcx, %rdx -; GFNIAVX-NEXT: shrq %rax -; GFNIAVX-NEXT: andq %rcx, %rax -; GFNIAVX-NEXT: leaq (%rax,%rdx,2), %rax +; GFNIAVX-NEXT: vmovq %rdi, %xmm0 +; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX-NEXT: vmovq %xmm0, %rax +; GFNIAVX-NEXT: bswapq %rax ; GFNIAVX-NEXT: retq %b = call i64 @llvm.bitreverse.i64(i64 %a) ret i64 %b From 7549b45825a05fc24fcdbacf006461165aa042cb Mon Sep 17 00:00:00 2001 From: martinboehme Date: Wed, 10 Apr 2024 21:27:10 +0200 Subject: [PATCH 062/886] Revert "[clang][dataflow] Propagate locations from result objects to initializers." (#88315) Reverts llvm/llvm-project#87320 This is causing buildbots to fail because `isOriginalRecordConstructor()` is now unused. --- .../FlowSensitive/DataflowEnvironment.h | 64 +-- .../FlowSensitive/DataflowEnvironment.cpp | 405 +++++------------- clang/lib/Analysis/FlowSensitive/Transfer.cpp | 176 ++++---- .../TypeErasedDataflowAnalysis.cpp | 13 +- .../FlowSensitive/DataflowEnvironmentTest.cpp | 43 -- .../Analysis/FlowSensitive/TransferTest.cpp | 172 +++----- 6 files changed, 283 insertions(+), 590 deletions(-) diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index 706664d7db1c2..9a65f76cdf56b 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -30,7 +30,6 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include #include #include @@ -345,6 +344,17 @@ class Environment { /// location of the result object to pass in `this`, even though prvalues are /// otherwise not associated with storage locations. /// + /// FIXME: Currently, this simply returns a stable storage location for `E`, + /// but this doesn't do the right thing in scenarios like the following: + /// ``` + /// MyClass c = some_condition()? MyClass(foo) : MyClass(bar); + /// ``` + /// Here, `MyClass(foo)` and `MyClass(bar)` will have two different storage + /// locations, when in fact their storage locations should be the same. + /// Eventually, we want to propagate storage locations from result objects + /// down to the prvalues that initialize them, similar to the way that this is + /// done in Clang's CodeGen. + /// /// Requirements: /// `E` must be a prvalue of record type. RecordStorageLocation & @@ -452,13 +462,7 @@ class Environment { /// Initializes the fields (including synthetic fields) of `Loc` with values, /// unless values of the field type are not supported or we hit one of the /// limits at which we stop producing values. - /// If `Type` is provided, initializes only those fields that are modeled for - /// `Type`; this is intended for use in cases where `Loc` is a derived type - /// and we only want to initialize the fields of a base type. - void initializeFieldsWithValues(RecordStorageLocation &Loc, QualType Type); - void initializeFieldsWithValues(RecordStorageLocation &Loc) { - initializeFieldsWithValues(Loc, Loc.getType()); - } + void initializeFieldsWithValues(RecordStorageLocation &Loc); /// Assigns `Val` as the value of `Loc` in the environment. void setValue(const StorageLocation &Loc, Value &Val); @@ -649,9 +653,6 @@ class Environment { LLVM_DUMP_METHOD void dump(raw_ostream &OS) const; private: - using PrValueToResultObject = - llvm::DenseMap; - // The copy-constructor is for use in fork() only. Environment(const Environment &) = default; @@ -681,10 +682,8 @@ class Environment { /// Initializes the fields (including synthetic fields) of `Loc` with values, /// unless values of the field type are not supported or we hit one of the /// limits at which we stop producing values (controlled by `Visited`, - /// `Depth`, and `CreatedValuesCount`). If `Type` is different from - /// `Loc.getType()`, initializes only those fields that are modeled for - /// `Type`. - void initializeFieldsWithValues(RecordStorageLocation &Loc, QualType Type, + /// `Depth`, and `CreatedValuesCount`). + void initializeFieldsWithValues(RecordStorageLocation &Loc, llvm::DenseSet &Visited, int Depth, int &CreatedValuesCount); @@ -703,45 +702,22 @@ class Environment { /// and functions referenced in `FuncDecl`. `FuncDecl` must have a body. void initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl); - static PrValueToResultObject - buildResultObjectMap(DataflowAnalysisContext *DACtx, - const FunctionDecl *FuncDecl, - RecordStorageLocation *ThisPointeeLoc, - RecordStorageLocation *LocForRecordReturnVal); - // `DACtx` is not null and not owned by this object. DataflowAnalysisContext *DACtx; - // FIXME: move the fields `CallStack`, `ResultObjectMap`, `ReturnVal`, - // `ReturnLoc` and `ThisPointeeLoc` into a separate call-context object, - // shared between environments in the same call. + // FIXME: move the fields `CallStack`, `ReturnVal`, `ReturnLoc` and + // `ThisPointeeLoc` into a separate call-context object, shared between + // environments in the same call. // https://github.com/llvm/llvm-project/issues/59005 // `DeclContext` of the block being analysed if provided. std::vector CallStack; - // Maps from prvalues of record type to their result objects. Shared between - // all environments for the same function. - // FIXME: It's somewhat unsatisfactory that we have to use a `shared_ptr` - // here, though the cost is acceptable: The overhead of a `shared_ptr` is - // incurred when it is copied, and this happens only relatively rarely (when - // we fork the environment). The need for a `shared_ptr` will go away once we - // introduce a shared call-context object (see above). - std::shared_ptr ResultObjectMap; - - // The following three member variables handle various different types of - // return values. - // - If the return type is not a reference and not a record: Value returned - // by the function. + // Value returned by the function (if it has non-reference return type). Value *ReturnVal = nullptr; - // - If the return type is a reference: Storage location of the reference - // returned by the function. + // Storage location of the reference returned by the function (if it has + // reference return type). StorageLocation *ReturnLoc = nullptr; - // - If the return type is a record or the function being analyzed is a - // constructor: Storage location into which the return value should be - // constructed. - RecordStorageLocation *LocForRecordReturnVal = nullptr; - // The storage location of the `this` pointee. Should only be null if the // function being analyzed is only a function and not a method. RecordStorageLocation *ThisPointeeLoc = nullptr; diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index 6c796b4ad923e..1bfa7ebcfd50c 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -15,7 +15,6 @@ #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" -#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Type.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" #include "clang/Analysis/FlowSensitive/Value.h" @@ -27,8 +26,6 @@ #include #include -#define DEBUG_TYPE "dataflow" - namespace clang { namespace dataflow { @@ -357,8 +354,6 @@ getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, for (auto *Child : S.children()) if (Child != nullptr) getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); - if (const auto *DefaultArg = dyn_cast(&S)) - getFieldsGlobalsAndFuncs(*DefaultArg->getExpr(), Fields, Vars, Funcs); if (const auto *DefaultInit = dyn_cast(&S)) getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); @@ -391,186 +386,6 @@ getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, } } -namespace { - -// Visitor that builds a map from record prvalues to result objects. -// This traverses the body of the function to be analyzed; for each result -// object that it encounters, it propagates the storage location of the result -// object to all record prvalues that can initialize it. -class ResultObjectVisitor : public RecursiveASTVisitor { -public: - // `ResultObjectMap` will be filled with a map from record prvalues to result - // object. If the function being analyzed returns a record by value, - // `LocForRecordReturnVal` is the location to which this record should be - // written; otherwise, it is null. - explicit ResultObjectVisitor( - llvm::DenseMap &ResultObjectMap, - RecordStorageLocation *LocForRecordReturnVal, - DataflowAnalysisContext &DACtx) - : ResultObjectMap(ResultObjectMap), - LocForRecordReturnVal(LocForRecordReturnVal), DACtx(DACtx) {} - - bool shouldVisitImplicitCode() { return true; } - - bool shouldVisitLambdaBody() const { return false; } - - // Traverse all member and base initializers of `Ctor`. This function is not - // called by `RecursiveASTVisitor`; it should be called manually if we are - // analyzing a constructor. `ThisPointeeLoc` is the storage location that - // `this` points to. - void TraverseConstructorInits(const CXXConstructorDecl *Ctor, - RecordStorageLocation *ThisPointeeLoc) { - assert(ThisPointeeLoc != nullptr); - for (const CXXCtorInitializer *Init : Ctor->inits()) { - Expr *InitExpr = Init->getInit(); - if (FieldDecl *Field = Init->getMember(); - Field != nullptr && Field->getType()->isRecordType()) { - PropagateResultObject(InitExpr, cast( - ThisPointeeLoc->getChild(*Field))); - } else if (Init->getBaseClass()) { - PropagateResultObject(InitExpr, ThisPointeeLoc); - } - - // Ensure that any result objects within `InitExpr` (e.g. temporaries) - // are also propagated to the prvalues that initialize them. - TraverseStmt(InitExpr); - - // If this is a `CXXDefaultInitExpr`, also propagate any result objects - // within the default expression. - if (auto *DefaultInit = dyn_cast(InitExpr)) - TraverseStmt(DefaultInit->getExpr()); - } - } - - bool TraverseBindingDecl(BindingDecl *BD) { - // `RecursiveASTVisitor` doesn't traverse holding variables for - // `BindingDecl`s by itself, so we need to tell it to. - if (VarDecl *HoldingVar = BD->getHoldingVar()) - TraverseDecl(HoldingVar); - return RecursiveASTVisitor::TraverseBindingDecl(BD); - } - - bool VisitVarDecl(VarDecl *VD) { - if (VD->getType()->isRecordType() && VD->hasInit()) - PropagateResultObject( - VD->getInit(), - &cast(DACtx.getStableStorageLocation(*VD))); - return true; - } - - bool VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *MTE) { - if (MTE->getType()->isRecordType()) - PropagateResultObject( - MTE->getSubExpr(), - &cast(DACtx.getStableStorageLocation(*MTE))); - return true; - } - - bool VisitReturnStmt(ReturnStmt *Return) { - Expr *RetValue = Return->getRetValue(); - if (RetValue != nullptr && RetValue->getType()->isRecordType() && - RetValue->isPRValue()) - PropagateResultObject(RetValue, LocForRecordReturnVal); - return true; - } - - bool VisitExpr(Expr *E) { - // Clang's AST can have record-type prvalues without a result object -- for - // example as full-expressions contained in a compound statement or as - // arguments of call expressions. We notice this if we get here and a - // storage location has not yet been associated with `E`. In this case, - // treat this as if it was a `MaterializeTemporaryExpr`. - if (E->isPRValue() && E->getType()->isRecordType() && - !ResultObjectMap.contains(E)) - PropagateResultObject( - E, &cast(DACtx.getStableStorageLocation(*E))); - return true; - } - - // Assigns `Loc` as the result object location of `E`, then propagates the - // location to all lower-level prvalues that initialize the same object as - // `E` (or one of its base classes or member variables). - void PropagateResultObject(Expr *E, RecordStorageLocation *Loc) { - if (!E->isPRValue() || !E->getType()->isRecordType()) { - assert(false); - // Ensure we don't propagate the result object if we hit this in a - // release build. - return; - } - - ResultObjectMap[E] = Loc; - - // The following AST node kinds are "original initializers": They are the - // lowest-level AST node that initializes a given object, and nothing - // below them can initialize the same object (or part of it). - if (isa(E) || isa(E) || isa(E) || - isa(E) || isa(E) || - isa(E)) { - return; - } - - if (auto *InitList = dyn_cast(E)) { - if (!InitList->isSemanticForm()) - return; - if (InitList->isTransparent()) { - PropagateResultObject(InitList->getInit(0), Loc); - return; - } - - RecordInitListHelper InitListHelper(InitList); - - for (auto [Base, Init] : InitListHelper.base_inits()) { - assert(Base->getType().getCanonicalType() == - Init->getType().getCanonicalType()); - - // Storage location for the base class is the same as that of the - // derived class because we "flatten" the object hierarchy and put all - // fields in `RecordStorageLocation` of the derived class. - PropagateResultObject(Init, Loc); - } - - for (auto [Field, Init] : InitListHelper.field_inits()) { - // Fields of non-record type are handled in - // `TransferVisitor::VisitInitListExpr()`. - if (!Field->getType()->isRecordType()) - continue; - PropagateResultObject( - Init, cast(Loc->getChild(*Field))); - } - return; - } - - if (auto *Op = dyn_cast(E); Op && Op->isCommaOp()) { - PropagateResultObject(Op->getRHS(), Loc); - return; - } - - if (auto *Cond = dyn_cast(E)) { - PropagateResultObject(Cond->getTrueExpr(), Loc); - PropagateResultObject(Cond->getFalseExpr(), Loc); - return; - } - - // All other expression nodes that propagate a record prvalue should have - // exactly one child. - SmallVector Children(E->child_begin(), E->child_end()); - LLVM_DEBUG({ - if (Children.size() != 1) - E->dump(); - }); - assert(Children.size() == 1); - for (Stmt *S : Children) - PropagateResultObject(cast(S), Loc); - } - -private: - llvm::DenseMap &ResultObjectMap; - RecordStorageLocation *LocForRecordReturnVal; - DataflowAnalysisContext &DACtx; -}; - -} // namespace - Environment::Environment(DataflowAnalysisContext &DACtx) : DACtx(&DACtx), FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} @@ -586,23 +401,17 @@ void Environment::initialize() { if (DeclCtx == nullptr) return; - const auto *FuncDecl = dyn_cast(DeclCtx); - if (FuncDecl == nullptr) - return; - - assert(FuncDecl->doesThisDeclarationHaveABody()); + if (const auto *FuncDecl = dyn_cast(DeclCtx)) { + assert(FuncDecl->doesThisDeclarationHaveABody()); - initFieldsGlobalsAndFuncs(FuncDecl); + initFieldsGlobalsAndFuncs(FuncDecl); - for (const auto *ParamDecl : FuncDecl->parameters()) { - assert(ParamDecl != nullptr); - setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); + for (const auto *ParamDecl : FuncDecl->parameters()) { + assert(ParamDecl != nullptr); + setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); + } } - if (FuncDecl->getReturnType()->isRecordType()) - LocForRecordReturnVal = &cast( - createStorageLocation(FuncDecl->getReturnType())); - if (const auto *MethodDecl = dyn_cast(DeclCtx)) { auto *Parent = MethodDecl->getParent(); assert(Parent != nullptr); @@ -635,12 +444,6 @@ void Environment::initialize() { initializeFieldsWithValues(ThisLoc); } } - - // We do this below the handling of `CXXMethodDecl` above so that we can - // be sure that the storage location for `this` has been set. - ResultObjectMap = std::make_shared( - buildResultObjectMap(DACtx, FuncDecl, getThisPointeeStorageLocation(), - LocForRecordReturnVal)); } // FIXME: Add support for resetting globals after function calls to enable @@ -681,18 +484,13 @@ void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { if (getStorageLocation(*D) != nullptr) continue; - // We don't run transfer functions on the initializers of global variables, - // so they won't be associated with a value or storage location. We - // therefore intentionally don't pass an initializer to `createObject()`; - // in particular, this ensures that `createObject()` will initialize the - // fields of record-type variables with values. - setStorageLocation(*D, createObject(*D, nullptr)); + setStorageLocation(*D, createObject(*D)); } for (const FunctionDecl *FD : Funcs) { if (getStorageLocation(*FD) != nullptr) continue; - auto &Loc = createStorageLocation(*FD); + auto &Loc = createStorageLocation(FD->getType()); setStorageLocation(*FD, Loc); } } @@ -721,9 +519,6 @@ Environment Environment::pushCall(const CallExpr *Call) const { } } - if (Call->getType()->isRecordType() && Call->isPRValue()) - Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call); - Env.pushCallInternal(Call->getDirectCallee(), llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); @@ -734,7 +529,6 @@ Environment Environment::pushCall(const CXXConstructExpr *Call) const { Environment Env(*this); Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call); - Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call); Env.pushCallInternal(Call->getConstructor(), llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); @@ -763,10 +557,6 @@ void Environment::pushCallInternal(const FunctionDecl *FuncDecl, const VarDecl *Param = *ParamIt; setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); } - - ResultObjectMap = std::make_shared( - buildResultObjectMap(DACtx, FuncDecl, getThisPointeeStorageLocation(), - LocForRecordReturnVal)); } void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { @@ -810,9 +600,6 @@ bool Environment::equivalentTo(const Environment &Other, if (ReturnLoc != Other.ReturnLoc) return false; - if (LocForRecordReturnVal != Other.LocForRecordReturnVal) - return false; - if (ThisPointeeLoc != Other.ThisPointeeLoc) return false; @@ -836,10 +623,8 @@ LatticeEffect Environment::widen(const Environment &PrevEnv, assert(DACtx == PrevEnv.DACtx); assert(ReturnVal == PrevEnv.ReturnVal); assert(ReturnLoc == PrevEnv.ReturnLoc); - assert(LocForRecordReturnVal == PrevEnv.LocForRecordReturnVal); assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); assert(CallStack == PrevEnv.CallStack); - assert(ResultObjectMap == PrevEnv.ResultObjectMap); auto Effect = LatticeEffect::Unchanged; @@ -871,16 +656,12 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB, Environment::ValueModel &Model, ExprJoinBehavior ExprBehavior) { assert(EnvA.DACtx == EnvB.DACtx); - assert(EnvA.LocForRecordReturnVal == EnvB.LocForRecordReturnVal); assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); assert(EnvA.CallStack == EnvB.CallStack); - assert(EnvA.ResultObjectMap == EnvB.ResultObjectMap); Environment JoinedEnv(*EnvA.DACtx); JoinedEnv.CallStack = EnvA.CallStack; - JoinedEnv.ResultObjectMap = EnvA.ResultObjectMap; - JoinedEnv.LocForRecordReturnVal = EnvA.LocForRecordReturnVal; JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { @@ -949,12 +730,6 @@ StorageLocation &Environment::createStorageLocation(const Expr &E) { void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { assert(!DeclToLoc.contains(&D)); - // The only kinds of declarations that may have a "variable" storage location - // are declarations of reference type and `BindingDecl`. For all other - // declaration, the storage location should be the stable storage location - // returned by `createStorageLocation()`. - assert(D.getType()->isReferenceType() || isa(D) || - &Loc == &createStorageLocation(D)); DeclToLoc[&D] = &Loc; } @@ -1016,29 +791,50 @@ Environment::getResultObjectLocation(const Expr &RecordPRValue) const { assert(RecordPRValue.getType()->isRecordType()); assert(RecordPRValue.isPRValue()); - assert(ResultObjectMap != nullptr); - RecordStorageLocation *Loc = ResultObjectMap->lookup(&RecordPRValue); - assert(Loc != nullptr); - // In release builds, use the "stable" storage location if the map lookup - // failed. - if (Loc == nullptr) + // Returns a storage location that we can use if assertions fail. + auto FallbackForAssertFailure = + [this, &RecordPRValue]() -> RecordStorageLocation & { return cast( DACtx->getStableStorageLocation(RecordPRValue)); - return *Loc; + }; + + if (isOriginalRecordConstructor(RecordPRValue)) { + auto *Val = cast_or_null(getValue(RecordPRValue)); + // The builtin transfer function should have created a `RecordValue` for all + // original record constructors. + assert(Val); + if (!Val) + return FallbackForAssertFailure(); + return Val->getLoc(); + } + + if (auto *Op = dyn_cast(&RecordPRValue); + Op && Op->isCommaOp()) { + return getResultObjectLocation(*Op->getRHS()); + } + + // All other expression nodes that propagate a record prvalue should have + // exactly one child. + llvm::SmallVector children(RecordPRValue.child_begin(), + RecordPRValue.child_end()); + assert(children.size() == 1); + if (children.empty()) + return FallbackForAssertFailure(); + + return getResultObjectLocation(*cast(children[0])); } PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { return DACtx->getOrCreateNullPointerValue(PointeeType); } -void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, - QualType Type) { +void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc) { llvm::DenseSet Visited; int CreatedValuesCount = 0; - initializeFieldsWithValues(Loc, Type, Visited, 0, CreatedValuesCount); + initializeFieldsWithValues(Loc, Visited, 0, CreatedValuesCount); if (CreatedValuesCount > MaxCompositeValueSize) { - llvm::errs() << "Attempting to initialize a huge value of type: " << Type - << '\n'; + llvm::errs() << "Attempting to initialize a huge value of type: " + << Loc.getType() << '\n'; } } @@ -1052,7 +848,8 @@ void Environment::setValue(const Expr &E, Value &Val) { const Expr &CanonE = ignoreCFGOmittedNodes(E); if (auto *RecordVal = dyn_cast(&Val)) { - assert(&RecordVal->getLoc() == &getResultObjectLocation(CanonE)); + assert(isOriginalRecordConstructor(CanonE) || + &RecordVal->getLoc() == &getResultObjectLocation(CanonE)); (void)RecordVal; } @@ -1131,8 +928,7 @@ Value *Environment::createValueUnlessSelfReferential( if (Type->isRecordType()) { CreatedValuesCount++; auto &Loc = cast(createStorageLocation(Type)); - initializeFieldsWithValues(Loc, Loc.getType(), Visited, Depth, - CreatedValuesCount); + initializeFieldsWithValues(Loc, Visited, Depth, CreatedValuesCount); return &refreshRecordValue(Loc, *this); } @@ -1164,7 +960,6 @@ Environment::createLocAndMaybeValue(QualType Ty, } void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, - QualType Type, llvm::DenseSet &Visited, int Depth, int &CreatedValuesCount) { @@ -1172,8 +967,8 @@ void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, if (FieldType->isRecordType()) { auto &FieldRecordLoc = cast(FieldLoc); setValue(FieldRecordLoc, create(FieldRecordLoc)); - initializeFieldsWithValues(FieldRecordLoc, FieldRecordLoc.getType(), - Visited, Depth + 1, CreatedValuesCount); + initializeFieldsWithValues(FieldRecordLoc, Visited, Depth + 1, + CreatedValuesCount); } else { if (!Visited.insert(FieldType.getCanonicalType()).second) return; @@ -1184,7 +979,7 @@ void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, } }; - for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { + for (const auto &[Field, FieldLoc] : Loc.children()) { assert(Field != nullptr); QualType FieldType = Field->getType(); @@ -1193,12 +988,14 @@ void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, &createLocAndMaybeValue(FieldType, Visited, Depth + 1, CreatedValuesCount)); } else { - StorageLocation *FieldLoc = Loc.getChild(*Field); assert(FieldLoc != nullptr); initField(FieldType, *FieldLoc); } } - for (const auto &[FieldName, FieldType] : DACtx->getSyntheticFields(Type)) { + for (const auto &[FieldName, FieldLoc] : Loc.synthetic_fields()) { + assert(FieldLoc != nullptr); + QualType FieldType = FieldLoc->getType(); + // Synthetic fields cannot have reference type, so we don't need to deal // with this case. assert(!FieldType->isReferenceType()); @@ -1225,36 +1022,38 @@ StorageLocation &Environment::createObjectInternal(const ValueDecl *D, return createObjectInternal(D, Ty.getNonReferenceType(), nullptr); } + Value *Val = nullptr; + if (InitExpr) { + // In the (few) cases where an expression is intentionally + // "uninterpreted", `InitExpr` is not associated with a value. There are + // two ways to handle this situation: propagate the status, so that + // uninterpreted initializers result in uninterpreted variables, or + // provide a default value. We choose the latter so that later refinements + // of the variable can be used for reasoning about the surrounding code. + // For this reason, we let this case be handled by the `createValue()` + // call below. + // + // FIXME. If and when we interpret all language cases, change this to + // assert that `InitExpr` is interpreted, rather than supplying a + // default value (assuming we don't update the environment API to return + // references). + Val = getValue(*InitExpr); + + if (!Val && isa(InitExpr) && + InitExpr->getType()->isPointerType()) + Val = &getOrCreateNullPointerValue(InitExpr->getType()->getPointeeType()); + } + if (!Val) + Val = createValue(Ty); + + if (Ty->isRecordType()) + return cast(Val)->getLoc(); + StorageLocation &Loc = D ? createStorageLocation(*D) : createStorageLocation(Ty); - if (Ty->isRecordType()) { - auto &RecordLoc = cast(Loc); - if (!InitExpr) - initializeFieldsWithValues(RecordLoc); - refreshRecordValue(RecordLoc, *this); - } else { - Value *Val = nullptr; - if (InitExpr) - // In the (few) cases where an expression is intentionally - // "uninterpreted", `InitExpr` is not associated with a value. There are - // two ways to handle this situation: propagate the status, so that - // uninterpreted initializers result in uninterpreted variables, or - // provide a default value. We choose the latter so that later refinements - // of the variable can be used for reasoning about the surrounding code. - // For this reason, we let this case be handled by the `createValue()` - // call below. - // - // FIXME. If and when we interpret all language cases, change this to - // assert that `InitExpr` is interpreted, rather than supplying a - // default value (assuming we don't update the environment API to return - // references). - Val = getValue(*InitExpr); - if (!Val) - Val = createValue(Ty); - if (Val) - setValue(Loc, *Val); - } + if (Val) + setValue(Loc, *Val); return Loc; } @@ -1273,8 +1072,6 @@ bool Environment::allows(const Formula &F) const { void Environment::dump(raw_ostream &OS) const { llvm::DenseMap LocToName; - if (LocForRecordReturnVal != nullptr) - LocToName[LocForRecordReturnVal] = "(returned record)"; if (ThisPointeeLoc != nullptr) LocToName[ThisPointeeLoc] = "this"; @@ -1305,9 +1102,6 @@ void Environment::dump(raw_ostream &OS) const { if (auto Iter = LocToName.find(ReturnLoc); Iter != LocToName.end()) OS << " (" << Iter->second << ")"; OS << "\n"; - } else if (Func->getReturnType()->isRecordType() || - isa(Func)) { - OS << "LocForRecordReturnVal: " << LocForRecordReturnVal << "\n"; } else if (!Func->getReturnType()->isVoidType()) { if (ReturnVal == nullptr) OS << "ReturnVal: nullptr\n"; @@ -1328,22 +1122,6 @@ void Environment::dump() const { dump(llvm::dbgs()); } -Environment::PrValueToResultObject Environment::buildResultObjectMap( - DataflowAnalysisContext *DACtx, const FunctionDecl *FuncDecl, - RecordStorageLocation *ThisPointeeLoc, - RecordStorageLocation *LocForRecordReturnVal) { - assert(FuncDecl->doesThisDeclarationHaveABody()); - - PrValueToResultObject Map; - - ResultObjectVisitor Visitor(Map, LocForRecordReturnVal, *DACtx); - if (const auto *Ctor = dyn_cast(FuncDecl)) - Visitor.TraverseConstructorInits(Ctor, ThisPointeeLoc); - Visitor.TraverseStmt(FuncDecl->getBody()); - - return Map; -} - RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE, const Environment &Env) { Expr *ImplicitObject = MCE.getImplicitObjectArgument(); @@ -1438,11 +1216,24 @@ RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) { RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) { assert(Expr.getType()->isRecordType()); - if (Expr.isPRValue()) - refreshRecordValue(Env.getResultObjectLocation(Expr), Env); + if (Expr.isPRValue()) { + if (auto *ExistingVal = Env.get(Expr)) { + auto &NewVal = Env.create(ExistingVal->getLoc()); + Env.setValue(Expr, NewVal); + Env.setValue(NewVal.getLoc(), NewVal); + return NewVal; + } - if (auto *Loc = Env.get(Expr)) - refreshRecordValue(*Loc, Env); + auto &NewVal = *cast(Env.createValue(Expr.getType())); + Env.setValue(Expr, NewVal); + return NewVal; + } + + if (auto *Loc = Env.get(Expr)) { + auto &NewVal = Env.create(*Loc); + Env.setValue(*Loc, NewVal); + return NewVal; + } auto &NewVal = *cast(Env.createValue(Expr.getType())); Env.setStorageLocation(Expr, NewVal.getLoc()); diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp index 88a9c0eccbebc..0a2e8368d541d 100644 --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -460,9 +460,11 @@ class TransferVisitor : public ConstStmtVisitor { // So make sure we have a value if we didn't propagate one above. if (S->isPRValue() && S->getType()->isRecordType()) { if (Env.getValue(*S) == nullptr) { - auto &Loc = Env.getResultObjectLocation(*S); - Env.initializeFieldsWithValues(Loc); - refreshRecordValue(Loc, Env); + Value *Val = Env.createValue(S->getType()); + // We're guaranteed to always be able to create a value for record + // types. + assert(Val != nullptr); + Env.setValue(*S, *Val); } } } @@ -470,13 +472,6 @@ class TransferVisitor : public ConstStmtVisitor { void VisitCXXDefaultInitExpr(const CXXDefaultInitExpr *S) { const Expr *InitExpr = S->getExpr(); assert(InitExpr != nullptr); - - // If this is a prvalue of record type, the handler for `*InitExpr` (if one - // exists) will initialize the result object; there is no value to propgate - // here. - if (S->getType()->isRecordType() && S->isPRValue()) - return; - propagateValueOrStorageLocation(*InitExpr, *S, Env); } @@ -484,17 +479,6 @@ class TransferVisitor : public ConstStmtVisitor { const CXXConstructorDecl *ConstructorDecl = S->getConstructor(); assert(ConstructorDecl != nullptr); - // `CXXConstructExpr` can have array type if default-initializing an array - // of records. We don't handle this specifically beyond potentially inlining - // the call. - if (!S->getType()->isRecordType()) { - transferInlineCall(S, ConstructorDecl); - return; - } - - RecordStorageLocation &Loc = Env.getResultObjectLocation(*S); - Env.setValue(*S, refreshRecordValue(Loc, Env)); - if (ConstructorDecl->isCopyOrMoveConstructor()) { // It is permissible for a copy/move constructor to have additional // parameters as long as they have default arguments defined for them. @@ -507,14 +491,24 @@ class TransferVisitor : public ConstStmtVisitor { if (ArgLoc == nullptr) return; - // Even if the copy/move constructor call is elidable, we choose to copy - // the record in all cases (which isn't wrong, just potentially not - // optimal). - copyRecord(*ArgLoc, Loc, Env); + if (S->isElidable()) { + if (Value *Val = Env.getValue(*ArgLoc)) + Env.setValue(*S, *Val); + } else { + auto &Val = *cast(Env.createValue(S->getType())); + Env.setValue(*S, Val); + copyRecord(*ArgLoc, Val.getLoc(), Env); + } return; } - Env.initializeFieldsWithValues(Loc, S->getType()); + // `CXXConstructExpr` can have array type if default-initializing an array + // of records, and we currently can't create values for arrays. So check if + // we've got a record type. + if (S->getType()->isRecordType()) { + auto &InitialVal = *cast(Env.createValue(S->getType())); + Env.setValue(*S, InitialVal); + } transferInlineCall(S, ConstructorDecl); } @@ -557,15 +551,19 @@ class TransferVisitor : public ConstStmtVisitor { if (S->isGLValue()) { Env.setStorageLocation(*S, *LocDst); } else if (S->getType()->isRecordType()) { - // Assume that the assignment returns the assigned value. - copyRecord(*LocDst, Env.getResultObjectLocation(*S), Env); + // Make sure that we have a `RecordValue` for this expression so that + // `Environment::getResultObjectLocation()` is able to return a location + // for it. + if (Env.getValue(*S) == nullptr) + refreshRecordValue(*S, Env); } return; } - // `CXXOperatorCallExpr` can be a prvalue. Call `VisitCallExpr`() to - // initialize the prvalue's fields with values. + // CXXOperatorCallExpr can be prvalues. Call `VisitCallExpr`() to create + // a `RecordValue` for them so that `Environment::getResultObjectLocation()` + // can return a value. VisitCallExpr(S); } @@ -582,6 +580,11 @@ class TransferVisitor : public ConstStmtVisitor { } } + void VisitCXXTemporaryObjectExpr(const CXXTemporaryObjectExpr *S) { + if (Value *Val = Env.createValue(S->getType())) + Env.setValue(*S, *Val); + } + void VisitCallExpr(const CallExpr *S) { // Of clang's builtins, only `__builtin_expect` is handled explicitly, since // others (like trap, debugtrap, and unreachable) are handled by CFG @@ -609,14 +612,13 @@ class TransferVisitor : public ConstStmtVisitor { } else if (const FunctionDecl *F = S->getDirectCallee()) { transferInlineCall(S, F); - // If this call produces a prvalue of record type, initialize its fields - // with values. + // If this call produces a prvalue of record type, make sure that we have + // a `RecordValue` for it. This is required so that + // `Environment::getResultObjectLocation()` is able to return a location + // for this `CallExpr`. if (S->getType()->isRecordType() && S->isPRValue()) - if (Env.getValue(*S) == nullptr) { - RecordStorageLocation &Loc = Env.getResultObjectLocation(*S); - Env.initializeFieldsWithValues(Loc); - Env.setValue(*S, refreshRecordValue(Loc, Env)); - } + if (Env.getValue(*S) == nullptr) + refreshRecordValue(*S, Env); } } @@ -664,10 +666,8 @@ class TransferVisitor : public ConstStmtVisitor { // `getLogicOperatorSubExprValue()`. if (S->isGLValue()) Env.setStorageLocation(*S, Env.createObject(S->getType())); - else if (!S->getType()->isRecordType()) { - if (Value *Val = Env.createValue(S->getType())) - Env.setValue(*S, *Val); - } + else if (Value *Val = Env.createValue(S->getType())) + Env.setValue(*S, *Val); } void VisitInitListExpr(const InitListExpr *S) { @@ -688,51 +688,71 @@ class TransferVisitor : public ConstStmtVisitor { return; } - RecordStorageLocation &Loc = Env.getResultObjectLocation(*S); - Env.setValue(*S, refreshRecordValue(Loc, Env)); - - // Initialization of base classes and fields of record type happens when we - // visit the nested `CXXConstructExpr` or `InitListExpr` for that base class - // or field. We therefore only need to deal with fields of non-record type - // here. - + llvm::DenseMap FieldLocs; RecordInitListHelper InitListHelper(S); + for (auto [Base, Init] : InitListHelper.base_inits()) { + assert(Base->getType().getCanonicalType() == + Init->getType().getCanonicalType()); + auto *BaseVal = Env.get(*Init); + if (!BaseVal) + BaseVal = cast(Env.createValue(Init->getType())); + // Take ownership of the fields of the `RecordValue` for the base class + // and incorporate them into the "flattened" set of fields for the + // derived class. + auto Children = BaseVal->getLoc().children(); + FieldLocs.insert(Children.begin(), Children.end()); + } + for (auto [Field, Init] : InitListHelper.field_inits()) { - if (Field->getType()->isRecordType()) - continue; - if (Field->getType()->isReferenceType()) { - assert(Field->getType().getCanonicalType()->getPointeeType() == - Init->getType().getCanonicalType()); - Loc.setChild(*Field, &Env.createObject(Field->getType(), Init)); - continue; - } - assert(Field->getType().getCanonicalType().getUnqualifiedType() == - Init->getType().getCanonicalType().getUnqualifiedType()); - StorageLocation *FieldLoc = Loc.getChild(*Field); - // Locations for non-reference fields must always be non-null. - assert(FieldLoc != nullptr); - Value *Val = Env.getValue(*Init); - if (Val == nullptr && isa(Init) && - Init->getType()->isPointerType()) - Val = - &Env.getOrCreateNullPointerValue(Init->getType()->getPointeeType()); - if (Val == nullptr) - Val = Env.createValue(Field->getType()); - if (Val != nullptr) - Env.setValue(*FieldLoc, *Val); + assert( + // The types are same, or + Field->getType().getCanonicalType().getUnqualifiedType() == + Init->getType().getCanonicalType().getUnqualifiedType() || + // The field's type is T&, and initializer is T + (Field->getType()->isReferenceType() && + Field->getType().getCanonicalType()->getPointeeType() == + Init->getType().getCanonicalType())); + auto& Loc = Env.createObject(Field->getType(), Init); + FieldLocs.insert({Field, &Loc}); } - for (const auto &[FieldName, FieldLoc] : Loc.synthetic_fields()) { - QualType FieldType = FieldLoc->getType(); - if (FieldType->isRecordType()) { - Env.initializeFieldsWithValues(*cast(FieldLoc)); - } else { - if (Value *Val = Env.createValue(FieldType)) - Env.setValue(*FieldLoc, *Val); + // In the case of a union, we don't in general have initializers for all + // of the fields. Create storage locations for the remaining fields (but + // don't associate them with values). + if (Type->isUnionType()) { + for (const FieldDecl *Field : + Env.getDataflowAnalysisContext().getModeledFields(Type)) { + if (auto [it, inserted] = FieldLocs.insert({Field, nullptr}); inserted) + it->second = &Env.createStorageLocation(Field->getType()); } } + // Check that we satisfy the invariant that a `RecordStorageLoation` + // contains exactly the set of modeled fields for that type. + // `ModeledFields` includes fields from all the bases, but only the + // modeled ones. However, if a class type is initialized with an + // `InitListExpr`, all fields in the class, including those from base + // classes, are included in the set of modeled fields. The code above + // should therefore populate exactly the modeled fields. + assert(containsSameFields( + Env.getDataflowAnalysisContext().getModeledFields(Type), FieldLocs)); + + RecordStorageLocation::SyntheticFieldMap SyntheticFieldLocs; + for (const auto &Entry : + Env.getDataflowAnalysisContext().getSyntheticFields(Type)) { + SyntheticFieldLocs.insert( + {Entry.getKey(), &Env.createObject(Entry.getValue())}); + } + + auto &Loc = Env.getDataflowAnalysisContext().createRecordStorageLocation( + Type, std::move(FieldLocs), std::move(SyntheticFieldLocs)); + RecordValue &RecordVal = Env.create(Loc); + + Env.setValue(Loc, RecordVal); + + Env.setValue(*S, RecordVal); + // FIXME: Implement array initialization. } diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index 1b73c5d683016..595f70f819ddb 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -369,10 +369,17 @@ builtinTransferInitializer(const CFGInitializer &Elt, ParentLoc->setChild(*Member, InitExprLoc); } else if (auto *InitExprVal = Env.getValue(*InitExpr)) { assert(MemberLoc != nullptr); - // Record-type initializers construct themselves directly into the result - // object, so there is no need to handle them here. - if (!Member->getType()->isRecordType()) + if (Member->getType()->isRecordType()) { + auto *InitValStruct = cast(InitExprVal); + // FIXME: Rather than performing a copy here, we should really be + // initializing the field in place. This would require us to propagate the + // storage location of the field to the AST node that creates the + // `RecordValue`. + copyRecord(InitValStruct->getLoc(), + *cast(MemberLoc), Env); + } else { Env.setValue(*MemberLoc, *InitExprVal); + } } } diff --git a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp index cc20623f881ff..465a8e21690c4 100644 --- a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp @@ -24,7 +24,6 @@ namespace { using namespace clang; using namespace dataflow; -using ::clang::dataflow::test::findValueDecl; using ::clang::dataflow::test::getFieldValue; using ::testing::Contains; using ::testing::IsNull; @@ -200,48 +199,6 @@ TEST_F(EnvironmentTest, JoinRecords) { } } -TEST_F(EnvironmentTest, DifferentReferenceLocInJoin) { - // This tests the case where the storage location for a reference-type - // variable is different for two states being joined. We used to believe this - // could not happen and therefore had an assertion disallowing this; this test - // exists to demonstrate that we can handle this condition without a failing - // assertion. See also the discussion here: - // https://discourse.llvm.org/t/70086/6 - - using namespace ast_matchers; - - std::string Code = R"cc( - void f(int &ref) {} - )cc"; - - auto Unit = - tooling::buildASTFromCodeWithArgs(Code, {"-fsyntax-only", "-std=c++11"}); - auto &Context = Unit->getASTContext(); - - ASSERT_EQ(Context.getDiagnostics().getClient()->getNumErrors(), 0U); - - const ValueDecl *Ref = findValueDecl(Context, "ref"); - - Environment Env1(DAContext); - StorageLocation &Loc1 = Env1.createStorageLocation(Context.IntTy); - Env1.setStorageLocation(*Ref, Loc1); - - Environment Env2(DAContext); - StorageLocation &Loc2 = Env2.createStorageLocation(Context.IntTy); - Env2.setStorageLocation(*Ref, Loc2); - - EXPECT_NE(&Loc1, &Loc2); - - Environment::ValueModel Model; - Environment EnvJoined = - Environment::join(Env1, Env2, Model, Environment::DiscardExprState); - - // Joining environments with different storage locations for the same - // declaration results in the declaration being removed from the joined - // environment. - EXPECT_EQ(EnvJoined.getStorageLocation(*Ref), nullptr); -} - TEST_F(EnvironmentTest, InitGlobalVarsFun) { using namespace ast_matchers; diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index 00dafb2988c69..ca055a462a286 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -1582,9 +1582,10 @@ TEST(TransferTest, FieldsDontHaveValuesInConstructorWithBaseClass) { [](const llvm::StringMap> &Results, ASTContext &ASTCtx) { const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); - // The field of the base class should already have been initialized with - // a value by the base constructor. - EXPECT_NE(getFieldValue(Env.getThisPointeeStorageLocation(), "BaseVal", + // FIXME: The field of the base class should already have been + // initialized with a value by the base constructor. This test documents + // the current buggy behavior. + EXPECT_EQ(getFieldValue(Env.getThisPointeeStorageLocation(), "BaseVal", ASTCtx, Env), nullptr); EXPECT_EQ(getFieldValue(Env.getThisPointeeStorageLocation(), "Val", @@ -2997,12 +2998,8 @@ TEST(TransferTest, ResultObjectLocation) { TEST(TransferTest, ResultObjectLocationForDefaultArgExpr) { std::string Code = R"( - struct Inner {}; - struct Outer { - Inner I = {}; - }; - - void funcWithDefaultArg(Outer O = {}); + struct S {}; + void funcWithDefaultArg(S s = S()); void target() { funcWithDefaultArg(); // [[p]] @@ -3061,7 +3058,13 @@ TEST(TransferTest, ResultObjectLocationForDefaultInitExpr) { RecordStorageLocation &Loc = Env.getResultObjectLocation(*DefaultInit); - EXPECT_EQ(&Loc, Env.getThisPointeeStorageLocation()->getChild(*SField)); + // FIXME: The result object location for the `CXXDefaultInitExpr` should + // be the location of the member variable being initialized, but we + // don't do this correctly yet; see also comments in + // `builtinTransferInitializer()`. + // For the time being, we just document the current erroneous behavior + // here (this should be `EXPECT_EQ` when the behavior is fixed). + EXPECT_NE(&Loc, Env.getThisPointeeStorageLocation()->getChild(*SField)); }); } @@ -3098,79 +3101,6 @@ TEST(TransferTest, ResultObjectLocationForCXXOperatorCallExpr) { }); } -TEST(TransferTest, ResultObjectLocationForStdInitializerListExpr) { - std::string Code = R"( - namespace std { - template - struct initializer_list {}; - } // namespace std - - void target() { - std::initializer_list list = {1}; - // [[p]] - } - )"; - - using ast_matchers::cxxStdInitializerListExpr; - using ast_matchers::match; - using ast_matchers::selectFirst; - runDataflow( - Code, - [](const llvm::StringMap> &Results, - ASTContext &ASTCtx) { - const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); - - auto *StdInitList = selectFirst( - "std_init_list", - match(cxxStdInitializerListExpr().bind("std_init_list"), ASTCtx)); - ASSERT_NE(StdInitList, nullptr); - - EXPECT_EQ(&Env.getResultObjectLocation(*StdInitList), - &getLocForDecl(ASTCtx, Env, "list")); - }); -} - -TEST(TransferTest, ResultObjectLocationPropagatesThroughConditionalOperator) { - std::string Code = R"( - struct A { - A(int); - }; - - void target(bool b) { - A a = b ? A(0) : A(1); - (void)0; // [[p]] - } - )"; - using ast_matchers::cxxConstructExpr; - using ast_matchers::equals; - using ast_matchers::hasArgument; - using ast_matchers::integerLiteral; - using ast_matchers::match; - using ast_matchers::selectFirst; - using ast_matchers::traverse; - runDataflow( - Code, - [](const llvm::StringMap> &Results, - ASTContext &ASTCtx) { - const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); - - auto *ConstructExpr0 = selectFirst( - "construct", - match(cxxConstructExpr(hasArgument(0, integerLiteral(equals(0)))) - .bind("construct"), - ASTCtx)); - auto *ConstructExpr1 = selectFirst( - "construct", - match(cxxConstructExpr(hasArgument(0, integerLiteral(equals(1)))) - .bind("construct"), - ASTCtx)); - - auto &ALoc = getLocForDecl(ASTCtx, Env, "a"); - EXPECT_EQ(&Env.getResultObjectLocation(*ConstructExpr0), &ALoc); - EXPECT_EQ(&Env.getResultObjectLocation(*ConstructExpr1), &ALoc); - }); -} - TEST(TransferTest, StaticCast) { std::string Code = R"( void target(int Foo) { @@ -5956,38 +5886,6 @@ TEST(TransferTest, ContextSensitiveReturnRecord) { {BuiltinOptions{ContextSensitiveOptions{}}}); } -TEST(TransferTest, ContextSensitiveReturnSelfReferentialRecord) { - std::string Code = R"( - struct S { - S() { self = this; } - S *self; - }; - - S makeS() { - // RVO guarantees that this will be constructed directly into `MyS`. - return S(); - } - - void target() { - S MyS = makeS(); - // [[p]] - } - )"; - runDataflow( - Code, - [](const llvm::StringMap> &Results, - ASTContext &ASTCtx) { - const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); - - auto &MySLoc = getLocForDecl(ASTCtx, Env, "MyS"); - - auto *SelfVal = - cast(getFieldValue(&MySLoc, "self", ASTCtx, Env)); - EXPECT_EQ(&SelfVal->getPointeeLoc(), &MySLoc); - }, - {BuiltinOptions{ContextSensitiveOptions{}}}); -} - TEST(TransferTest, ContextSensitiveMethodLiteral) { std::string Code = R"( class MyClass { @@ -6932,6 +6830,50 @@ TEST(TransferTest, LambdaCaptureThis) { }); } +TEST(TransferTest, DifferentReferenceLocInJoin) { + // This test triggers a case where the storage location for a reference-type + // variable is different for two states being joined. We used to believe this + // could not happen and therefore had an assertion disallowing this; this test + // exists to demonstrate that we can handle this condition without a failing + // assertion. See also the discussion here: + // https://discourse.llvm.org/t/70086/6 + std::string Code = R"( + namespace std { + template struct initializer_list { + const T* begin(); + const T* end(); + }; + } + + void target(char* p, char* end) { + while (p != end) { + if (*p == ' ') { + p++; + continue; + } + + auto && range = {1, 2}; + for (auto b = range.begin(), e = range.end(); b != e; ++b) { + } + (void)0; + // [[p]] + } + } + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + // Joining environments with different storage locations for the same + // declaration results in the declaration being removed from the joined + // environment. + const ValueDecl *VD = findValueDecl(ASTCtx, "range"); + ASSERT_EQ(Env.getStorageLocation(*VD), nullptr); + }); +} + // This test verifies correct modeling of a relational dependency that goes // through unmodeled functions (the simple `cond()` in this case). TEST(TransferTest, ConditionalRelation) { From a6d1366b736cad85b3bb9fbdda340e07488d6cde Mon Sep 17 00:00:00 2001 From: erichkeane Date: Wed, 10 Apr 2024 12:41:26 -0700 Subject: [PATCH 063/886] [NFC] Remove a pair of incorrect comments from ParseOpenACC We attempt to continue parsing, but the comment says the opposite. Just remove the inaccurate comments in this patch. --- clang/lib/Parse/ParseOpenACC.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp index 59a4a5f534676..b487a1968d1ec 100644 --- a/clang/lib/Parse/ParseOpenACC.cpp +++ b/clang/lib/Parse/ParseOpenACC.cpp @@ -843,8 +843,7 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams( } case OpenACCClauseKind::If: { ExprResult CondExpr = ParseOpenACCConditionalExpr(*this); - // An invalid expression can be just about anything, so just give up on - // this clause list. + if (CondExpr.isInvalid()) { Parens.skipToEnd(); return OpenACCCanContinue(); @@ -966,8 +965,7 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams( case OpenACCClauseKind::Self: { assert(DirKind != OpenACCDirectiveKind::Update); ExprResult CondExpr = ParseOpenACCConditionalExpr(*this); - // An invalid expression can be just about anything, so just give up on - // this clause list. + if (CondExpr.isInvalid()) { Parens.skipToEnd(); return OpenACCCanContinue(); From b3792ae42a4adda5cb51d53f3d6a4b9b025b11fd Mon Sep 17 00:00:00 2001 From: Xing Xue Date: Wed, 10 Apr 2024 16:06:31 -0400 Subject: [PATCH 064/886] [OpenMP][AIX] Fix test config for AIX (#88272) This patch fixes the test config so that it works for `tasking/omp50_taskdep_depobj.c` which uses different flags to test with compiler's `omp.h`. * set test environment variable `OBJECT_MODE` to `64` if it is set explicitly to `64` in the AIX environment. `OBJECT_MODE` is default to `32` and is recognized by AIX compilers and toolchain. In this way, we don't need to set `-m64` for all compiler flags for 64-bit mode * add option `-Wl,-bmaxdata` to 32-bit `test_openmp_flags` used by `tasking/omp50_taskdep_depobj.c` --- openmp/runtime/test/lit.cfg | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/openmp/runtime/test/lit.cfg b/openmp/runtime/test/lit.cfg index e27e52bb4289b..e8f7f3470580e 100644 --- a/openmp/runtime/test/lit.cfg +++ b/openmp/runtime/test/lit.cfg @@ -112,13 +112,15 @@ if config.operating_system == 'AIX': config.available_features.add("aix") object_mode = os.environ.get('OBJECT_MODE', '32') if object_mode == '64': - config.test_flags += " -m64" + # Set OBJECT_MODE to 64 for LIT test if it is explicitly set. + config.environment['OBJECT_MODE'] = os.environ['OBJECT_MODE'] elif object_mode == '32': # Set user data area to 2GB since the default size 256MB in 32-bit mode # is not sufficient to run LIT tests on systems that have a lot of # CPUs when creating one worker thread for each CPU and each worker # thread uses 4MB stack size. config.test_flags += " -Wl,-bmaxdata:0x80000000" + config.test_openmp_flags += " -Wl,-bmaxdata:0x80000000" if 'Linux' in config.operating_system: config.available_features.add("linux") From a12836647e08c4ad203b9834ac55892fa0b9f2d3 Mon Sep 17 00:00:00 2001 From: David Pagan Date: Wed, 10 Apr 2024 13:09:17 -0700 Subject: [PATCH 065/886] [OpenMP][CodeGen] Improved codegen for combined loop directives (#87278) IR for 'target teams loop' is now dependent on suitability of associated loop-nest. If a loop-nest: - does not contain a function call, or - the -fopenmp-assume-no-nested-parallelism has been specified, - or the call is to an OpenMP API AND - does not contain nested loop bind(parallel) directives then it can be emitted as 'target teams distribute parallel for', which is the current default. Otherwise, it is emitted as 'target teams distribute'. Added debug output indicating how 'target teams loop' was emitted. Flag is -mllvm -debug-only=target-teams-loop-codegen Added LIT tests explicitly verifying 'target teams loop' emitted as a parallel loop and a distribute loop. Updated other 'loop' related tests as needed to reflect change in IR. - These updates account for most of the changed files and additions/deletions. --- clang/include/clang/AST/StmtOpenMP.h | 11 +- clang/lib/AST/StmtOpenMP.cpp | 3 +- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 15 +- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 7 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 87 +- clang/lib/Sema/SemaOpenMP.cpp | 86 +- clang/lib/Serialization/ASTReaderStmt.cpp | 1 + clang/lib/Serialization/ASTWriterStmt.cpp | 1 + ...vptx_target_teams_generic_loop_codegen.cpp | 48 +- ...eams_generic_loop_generic_mode_codegen.cpp | 397 +- .../target_teams_generic_loop_codegen.cpp | 1132 +---- ...ams_generic_loop_codegen_as_distribute.cpp | 587 +++ ...s_generic_loop_codegen_as_parallel_for.cpp | 3998 +++++++++++++++++ .../target_teams_generic_loop_if_codegen.cpp | 710 +-- ...get_teams_generic_loop_private_codegen.cpp | 1438 +----- .../OpenMP/teams_generic_loop_codegen-1.cpp | 1360 +----- .../OpenMP/teams_generic_loop_codegen.cpp | 630 +-- .../teams_generic_loop_collapse_codegen.cpp | 808 +--- .../teams_generic_loop_private_codegen.cpp | 685 +-- .../teams_generic_loop_reduction_codegen.cpp | 785 +--- 20 files changed, 5886 insertions(+), 6903 deletions(-) create mode 100644 clang/test/OpenMP/target_teams_generic_loop_codegen_as_distribute.cpp create mode 100644 clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 3cb3c1014d73b..f735fa5643aec 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -6109,6 +6109,8 @@ class OMPTeamsGenericLoopDirective final : public OMPLoopDirective { class OMPTargetTeamsGenericLoopDirective final : public OMPLoopDirective { friend class ASTStmtReader; friend class OMPExecutableDirective; + /// true if loop directive's associated loop can be a parallel for. + bool CanBeParallelFor = false; /// Build directive with the given start and end location. /// /// \param StartLoc Starting location of the directive kind. @@ -6131,6 +6133,9 @@ class OMPTargetTeamsGenericLoopDirective final : public OMPLoopDirective { llvm::omp::OMPD_target_teams_loop, SourceLocation(), SourceLocation(), CollapsedNum) {} + /// Set whether associated loop can be a parallel for. + void setCanBeParallelFor(bool ParFor) { CanBeParallelFor = ParFor; } + public: /// Creates directive with a list of \p Clauses. /// @@ -6145,7 +6150,7 @@ class OMPTargetTeamsGenericLoopDirective final : public OMPLoopDirective { static OMPTargetTeamsGenericLoopDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, - Stmt *AssociatedStmt, const HelperExprs &Exprs); + Stmt *AssociatedStmt, const HelperExprs &Exprs, bool CanBeParallelFor); /// Creates an empty directive with the place /// for \a NumClauses clauses. @@ -6159,6 +6164,10 @@ class OMPTargetTeamsGenericLoopDirective final : public OMPLoopDirective { unsigned CollapsedNum, EmptyShell); + /// Return true if current loop directive's associated loop can be a + /// parallel for. + bool canBeParallelFor() const { return CanBeParallelFor; } + static bool classof(const Stmt *T) { return T->getStmtClass() == OMPTargetTeamsGenericLoopDirectiveClass; } diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 426b35848cb5c..d8519b2071e6d 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -2431,7 +2431,7 @@ OMPTeamsGenericLoopDirective::CreateEmpty(const ASTContext &C, OMPTargetTeamsGenericLoopDirective *OMPTargetTeamsGenericLoopDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs) { + const HelperExprs &Exprs, bool CanBeParallelFor) { auto *Dir = createDirective( C, Clauses, AssociatedStmt, numLoopChildren(CollapsedNum, OMPD_target_teams_loop), StartLoc, EndLoc, @@ -2473,6 +2473,7 @@ OMPTargetTeamsGenericLoopDirective *OMPTargetTeamsGenericLoopDirective::Create( Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); + Dir->setCanBeParallelFor(CanBeParallelFor); return Dir; } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 8eb10584699fa..2ae11e129c75e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2656,11 +2656,12 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, - isOpenMPDistributeDirective(DKind) + isOpenMPDistributeDirective(DKind) || + (DKind == OMPD_target_teams_loop) ? OMP_IDENT_WORK_DISTRIBUTE - : isOpenMPLoopDirective(DKind) - ? OMP_IDENT_WORK_LOOP - : OMP_IDENT_WORK_SECTIONS), + : isOpenMPLoopDirective(DKind) + ? OMP_IDENT_WORK_LOOP + : OMP_IDENT_WORK_SECTIONS), getThreadID(CGF, Loc)}; auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); if (isOpenMPDistributeDirective(DKind) && @@ -8885,7 +8886,8 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: - // For now, just treat 'target teams loop' as if it's distributed. + // For now, treat 'target' with nested 'teams loop' as if it's + // distributed (target teams distribute). if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop) return NestedDir; if (DKind == OMPD_teams) { @@ -9369,7 +9371,8 @@ llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall( SizeEmitter) { OpenMPDirectiveKind Kind = D.getDirectiveKind(); const OMPExecutableDirective *TD = &D; - // Get nested teams distribute kind directive, if any. + // Get nested teams distribute kind directive, if any. For now, treat + // 'target_teams_loop' as if it's really a target_teams_distribute. if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) && Kind != OMPD_target_teams_loop) TD = getNestedDistributeDirective(CGM.getContext(), D); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 5baac8f0e3e26..59ba03c6b8625 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -646,7 +646,6 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_target: case OMPD_target_teams: return hasNestedSPMDDirective(Ctx, D); - case OMPD_target_teams_loop: case OMPD_target_parallel_loop: case OMPD_target_parallel: case OMPD_target_parallel_for: @@ -658,6 +657,12 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, return true; case OMPD_target_teams_distribute: return false; + case OMPD_target_teams_loop: + // Whether this is true or not depends on how the directive will + // eventually be emitted. + if (auto *TTLD = dyn_cast(&D)) + return TTLD->canBeParallelFor(); + return false; case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index e6d504bcdeca5..3bf99366b69ce 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -24,6 +24,7 @@ #include "clang/AST/StmtVisitor.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/PrettyStackTrace.h" +#include "clang/Basic/SourceManager.h" #include "llvm/ADT/SmallSet.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" @@ -34,11 +35,14 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Debug.h" #include using namespace clang; using namespace CodeGen; using namespace llvm::omp; +#define TTL_CODEGEN_TYPE "target-teams-loop-codegen" + static const VarDecl *getBaseDecl(const Expr *Ref); namespace { @@ -1432,9 +1436,12 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( *this, D.getBeginLoc(), isOpenMPWorksharingDirective(D.getDirectiveKind())); } + bool TeamsLoopCanBeParallel = false; + if (auto *TTLD = dyn_cast(&D)) + TeamsLoopCanBeParallel = TTLD->canBeParallelFor(); bool WithNowait = D.getSingleClause() || isOpenMPParallelDirective(D.getDirectiveKind()) || - ReductionKind == OMPD_simd; + TeamsLoopCanBeParallel || ReductionKind == OMPD_simd; bool SimpleReduction = ReductionKind == OMPD_simd; // Emit nowait reduction if nowait clause is present or directive is a // parallel directive (it always has implicit barrier). @@ -7928,11 +7935,9 @@ void CodeGenFunction::EmitOMPParallelGenericLoopDirective( void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( const OMPTeamsGenericLoopDirective &S) { // To be consistent with current behavior of 'target teams loop', emit - // 'teams loop' as if its constituent constructs are 'distribute, - // 'parallel, and 'for'. + // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, - S.getDistInc()); + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); }; // Emit teams region as a standalone region. @@ -7946,15 +7951,33 @@ void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( CodeGenDistribute); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; - emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); emitPostUpdateForReductionClause(*this, S, [](CodeGenFunction &) { return nullptr; }); } -static void -emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF, - const OMPTargetTeamsGenericLoopDirective &S, - PrePostActionTy &Action) { +static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF, + std::string StatusMsg, + const OMPExecutableDirective &D) { +#ifndef NDEBUG + bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice; + if (IsDevice) + StatusMsg += ": DEVICE"; + else + StatusMsg += ": HOST"; + SourceLocation L = D.getBeginLoc(); + auto &SM = CGF.getContext().getSourceManager(); + PresumedLoc PLoc = SM.getPresumedLoc(L); + const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr; + unsigned LineNo = + PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L); + llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n"; +#endif +} + +static void emitTargetTeamsGenericLoopRegionAsParallel( + CodeGenFunction &CGF, PrePostActionTy &Action, + const OMPTargetTeamsGenericLoopDirective &S) { Action.Enter(CGF); // Emit 'teams loop' as if its constituent constructs are 'distribute, // 'parallel, and 'for'. @@ -7974,19 +7997,50 @@ emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF, CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; - + DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, + emitTargetTeamsLoopCodegenStatus( + CGF, TTL_CODEGEN_TYPE " as parallel for", S)); emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, CodeGenTeams); emitPostUpdateForReductionClause(CGF, S, [](CodeGenFunction &) { return nullptr; }); } -/// Emit combined directive 'target teams loop' as if its constituent -/// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'. +static void emitTargetTeamsGenericLoopRegionAsDistribute( + CodeGenFunction &CGF, PrePostActionTy &Action, + const OMPTargetTeamsGenericLoopDirective &S) { + Action.Enter(CGF); + // Emit 'teams loop' as if its constituent construct is 'distribute'. + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective( + CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, + emitTargetTeamsLoopCodegenStatus( + CGF, TTL_CODEGEN_TYPE " as distribute", S)); + emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective( const OMPTargetTeamsGenericLoopDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { - emitTargetTeamsGenericLoopRegion(CGF, S, Action); + if (S.canBeParallelFor()) + emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); + else + emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); }; emitCommonOMPTargetDirective(*this, S, CodeGen); } @@ -7996,7 +8050,10 @@ void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( const OMPTargetTeamsGenericLoopDirective &S) { // Emit SPMD target parallel loop region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { - emitTargetTeamsGenericLoopRegion(CGF, S, Action); + if (S.canBeParallelFor()) + emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); + else + emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); }; llvm::Function *Fn; llvm::Constant *Addr; diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 0ba54a3a9cae3..c814535ad6bdb 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4478,6 +4478,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { Params); break; } + // For 'target teams loop', collect all captured regions so codegen can + // later decide the best IR to emit given the associated loop-nest. case OMPD_target_teams_loop: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: { @@ -6135,6 +6137,79 @@ processImplicitMapsWithDefaultMappers(Sema &S, DSAStackTy *Stack, } } +namespace { +/// A 'teams loop' with a nested 'loop bind(parallel)' or generic function +/// call in the associated loop-nest cannot be a 'parallel for'. +class TeamsLoopChecker final : public ConstStmtVisitor { + Sema &SemaRef; + +public: + bool teamsLoopCanBeParallelFor() const { return TeamsLoopCanBeParallelFor; } + + // Is there a nested OpenMP loop bind(parallel) + void VisitOMPExecutableDirective(const OMPExecutableDirective *D) { + if (D->getDirectiveKind() == llvm::omp::Directive::OMPD_loop) { + if (const auto *C = D->getSingleClause()) + if (C->getBindKind() == OMPC_BIND_parallel) { + TeamsLoopCanBeParallelFor = false; + // No need to continue visiting any more + return; + } + } + for (const Stmt *Child : D->children()) + if (Child) + Visit(Child); + } + + void VisitCallExpr(const CallExpr *C) { + // Function calls inhibit parallel loop translation of 'target teams loop' + // unless the assume-no-nested-parallelism flag has been specified. + // OpenMP API runtime library calls do not inhibit parallel loop + // translation, regardless of the assume-no-nested-parallelism. + if (C) { + bool IsOpenMPAPI = false; + auto *FD = dyn_cast_or_null(C->getCalleeDecl()); + if (FD) { + std::string Name = FD->getNameInfo().getAsString(); + IsOpenMPAPI = Name.find("omp_") == 0; + } + TeamsLoopCanBeParallelFor = + IsOpenMPAPI || SemaRef.getLangOpts().OpenMPNoNestedParallelism; + if (!TeamsLoopCanBeParallelFor) + return; + } + for (const Stmt *Child : C->children()) + if (Child) + Visit(Child); + } + + void VisitCapturedStmt(const CapturedStmt *S) { + if (!S) + return; + Visit(S->getCapturedDecl()->getBody()); + } + + void VisitStmt(const Stmt *S) { + if (!S) + return; + for (const Stmt *Child : S->children()) + if (Child) + Visit(Child); + } + explicit TeamsLoopChecker(Sema &SemaRef) + : SemaRef(SemaRef), TeamsLoopCanBeParallelFor(true) {} + +private: + bool TeamsLoopCanBeParallelFor; +}; +} // namespace + +static bool teamsLoopCanBeParallelFor(Stmt *AStmt, Sema &SemaRef) { + TeamsLoopChecker Checker(SemaRef); + Checker.Visit(AStmt); + return Checker.teamsLoopCanBeParallelFor(); +} + bool Sema::mapLoopConstruct(llvm::SmallVector &ClausesWithoutBind, ArrayRef Clauses, OpenMPBindClauseKind &BindKind, @@ -10895,7 +10970,8 @@ StmtResult Sema::ActOnOpenMPTargetTeamsGenericLoopDirective( setFunctionHasBranchProtectedScope(); return OMPTargetTeamsGenericLoopDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + teamsLoopCanBeParallelFor(AStmt, *this)); } StmtResult Sema::ActOnOpenMPParallelGenericLoopDirective( @@ -15645,6 +15721,12 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( if (NameModifier == OMPD_unknown || NameModifier == OMPD_parallel) CaptureRegion = OMPD_target; break; + case OMPD_teams_loop: + case OMPD_target_teams_loop: + // For [target] teams loop, assume capture region is 'teams' so it's + // available for codegen later to use if/when necessary. + CaptureRegion = OMPD_teams; + break; case OMPD_target_teams_distribute_parallel_for_simd: if (OpenMPVersion >= 50 && (NameModifier == OMPD_unknown || NameModifier == OMPD_simd)) { @@ -15652,7 +15734,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( break; } [[fallthrough]]; - case OMPD_target_teams_loop: case OMPD_target_teams_distribute_parallel_for: // If this clause applies to the nested 'parallel' region, capture within // the 'teams' region, otherwise do not capture. @@ -15775,7 +15856,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_loop: - case OMPD_teams_loop: case OMPD_teams: case OMPD_tile: case OMPD_unroll: diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 3ddb15b10f48b..ca0460800898b 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2769,6 +2769,7 @@ void ASTStmtReader::VisitOMPTeamsGenericLoopDirective( void ASTStmtReader::VisitOMPTargetTeamsGenericLoopDirective( OMPTargetTeamsGenericLoopDirective *D) { VisitOMPLoopDirective(D); + D->setCanBeParallelFor(Record.readBool()); } void ASTStmtReader::VisitOMPParallelGenericLoopDirective( diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index ddee5db2b69ef..e3816181e2b2b 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2821,6 +2821,7 @@ void ASTStmtWriter::VisitOMPTeamsGenericLoopDirective( void ASTStmtWriter::VisitOMPTargetTeamsGenericLoopDirective( OMPTargetTeamsGenericLoopDirective *D) { VisitOMPLoopDirective(D); + Record.writeBool(D->canBeParallelFor()); Code = serialization::STMT_OMP_TARGET_TEAMS_GENERIC_LOOP_DIRECTIVE; } diff --git a/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp index 5226b7498e4ce..c89c6eb65706a 100644 --- a/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp @@ -320,23 +320,31 @@ int bar(int n){ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33 -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -344,11 +352,12 @@ int bar(int n){ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -360,6 +369,7 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 @@ -1566,23 +1576,31 @@ int bar(int n){ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33 -// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 +// CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1590,11 +1608,12 @@ int bar(int n){ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -1606,6 +1625,7 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 @@ -2801,23 +2821,31 @@ int bar(int n){ // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33 -// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment, ptr [[DYN_PTR]]) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -2825,11 +2853,12 @@ int bar(int n){ // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -2841,6 +2870,7 @@ int bar(int n){ // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp index ca2670f0cd643..f0effa760dcdb 100644 --- a/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp @@ -30,17 +30,20 @@ int main(int argc, char **argv) { #endif // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24 -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 @@ -50,9 +53,14 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -60,56 +68,55 @@ int main(int argc, char **argv) { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 @@ -122,177 +129,54 @@ int main(int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARGC_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: br label [[COND_END12:%.*]] -// CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END12]] -// CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] -// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]]) -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR4:[0-9]+]] +// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR4]] // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]] +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR4]] // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] // CHECK1-NEXT: store i32 [[ADD10]], ptr [[TMP0]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24 -// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 @@ -302,9 +186,14 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 +// CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -312,56 +201,55 @@ int main(int argc, char **argv) { // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() // CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 @@ -374,155 +262,34 @@ int main(int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP16]], ptr [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to ptr -// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to ptr -// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to ptr -// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK2: cond.true10: -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: br label [[COND_END12:%.*]] -// CHECK2: cond.false11: -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END12]] -// CHECK2: cond.end12: -// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE10]] ], [ [[TMP36]], [[COND_FALSE11]] ] -// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP39]]) -// CHECK2-NEXT: br label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.end: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK2: omp.precond.then: -// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I3]]) #[[ATTR5:[0-9]+]] -// CHECK2-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]] -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CALL]], [[CALL5]] -// CHECK2-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]] -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD6]], [[CALL7]] -// CHECK2-NEXT: store i32 [[ADD8]], ptr [[TMP0]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR4:[0-9]+]] +// CHECK2-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR4]] +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] +// CHECK2-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR4]] +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] +// CHECK2-NEXT: store i32 [[ADD10]], ptr [[TMP0]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP16]]) +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp index 22cf534bf0ba2..3f752ac663f41 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp @@ -28,1118 +28,6 @@ int foo() { return 0; } #endif -// IR-PCH-HOST-LABEL: define {{[^@]+}}@_Z3foov -// IR-PCH-HOST-SAME: () #[[ATTR0:[0-9]+]] { -// IR-PCH-HOST-NEXT: entry: -// IR-PCH-HOST-NEXT: [[I:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[J:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 -// IR-PCH-HOST-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load i32, ptr [[J]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[TMP0]], ptr [[J_CASTED]], align 4 -// IR-PCH-HOST-NEXT: [[TMP1:%.*]] = load i64, ptr [[J_CASTED]], align 8 -// IR-PCH-HOST-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22(i64 [[TMP1]], ptr [[SUM]]) #[[ATTR2:[0-9]+]] -// IR-PCH-HOST-NEXT: ret i32 0 -// IR-PCH-HOST-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 -// IR-PCH-HOST-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { -// IR-PCH-HOST-NEXT: entry: -// IR-PCH-HOST-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 -// IR-PCH-HOST-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP1:%.*]] = load i32, ptr [[J_ADDR]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[TMP1]], ptr [[J_CASTED]], align 4 -// IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load i64, ptr [[J_CASTED]], align 8 -// IR-PCH-HOST-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP2]], ptr [[TMP0]]) -// IR-PCH-HOST-NEXT: ret void -// IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp_outlined. -// IR-PCH-HOST-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { -// IR-PCH-HOST-NEXT: entry: -// IR-PCH-HOST-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16 -// IR-PCH-HOST-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[J3:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[I:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[J4:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// IR-PCH-HOST-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 -// IR-PCH-HOST-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0 -// IR-PCH-HOST-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] -// IR-PCH-HOST: omp.arrayinit.body: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// IR-PCH-HOST-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] -// IR-PCH-HOST: omp.arrayinit.done: -// IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// IR-PCH-HOST-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 -// IR-PCH-HOST-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// IR-PCH-HOST-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// IR-PCH-HOST-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// IR-PCH-HOST-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// IR-PCH-HOST-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// IR-PCH-HOST: cond.true: -// IR-PCH-HOST-NEXT: br label [[COND_END:%.*]] -// IR-PCH-HOST: cond.false: -// IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// IR-PCH-HOST-NEXT: br label [[COND_END]] -// IR-PCH-HOST: cond.end: -// IR-PCH-HOST-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// IR-PCH-HOST-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// IR-PCH-HOST-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// IR-PCH-HOST: omp.inner.for.cond: -// IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// IR-PCH-HOST-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// IR-PCH-HOST-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// IR-PCH-HOST-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// IR-PCH-HOST: omp.inner.for.body: -// IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// IR-PCH-HOST-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// IR-PCH-HOST-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// IR-PCH-HOST-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// IR-PCH-HOST-NEXT: [[TMP13:%.*]] = load i32, ptr [[J3]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[TMP13]], ptr [[J_CASTED]], align 4 -// IR-PCH-HOST-NEXT: [[TMP14:%.*]] = load i64, ptr [[J_CASTED]], align 8 -// IR-PCH-HOST-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined..1, i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], ptr [[SUM1]]) -// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// IR-PCH-HOST: omp.inner.for.inc: -// IR-PCH-HOST-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// IR-PCH-HOST-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND]] -// IR-PCH-HOST: omp.inner.for.end: -// IR-PCH-HOST-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// IR-PCH-HOST: omp.loop.exit: -// IR-PCH-HOST-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// IR-PCH-HOST-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]]) -// IR-PCH-HOST-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-PCH-HOST-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// IR-PCH-HOST-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// IR-PCH-HOST: .omp.lastprivate.then: -// IR-PCH-HOST-NEXT: store i32 10, ptr [[J3]], align 4 -// IR-PCH-HOST-NEXT: [[TMP21:%.*]] = load i32, ptr [[J3]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[TMP21]], ptr [[J_ADDR]], align 4 -// IR-PCH-HOST-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// IR-PCH-HOST: .omp.lastprivate.done: -// IR-PCH-HOST-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// IR-PCH-HOST-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8 -// IR-PCH-HOST-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// IR-PCH-HOST-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-HOST-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// IR-PCH-HOST-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// IR-PCH-HOST-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// IR-PCH-HOST-NEXT: ] -// IR-PCH-HOST: .omp.reduction.case1: -// IR-PCH-HOST-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP26]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR-PCH-HOST: omp.arraycpy.body: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 -// IR-PCH-HOST-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// IR-PCH-HOST-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// IR-PCH-HOST: omp.arraycpy.done10: -// IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR-PCH-HOST: .omp.reduction.case2: -// IR-PCH-HOST-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP0]], [[TMP29]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] -// IR-PCH-HOST: omp.arraycpy.body12: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// IR-PCH-HOST-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 -// IR-PCH-HOST-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP30]] monotonic, align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] -// IR-PCH-HOST: omp.arraycpy.done18: -// IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR-PCH-HOST: .omp.reduction.default: -// IR-PCH-HOST-NEXT: ret void -// IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp_outlined..1 -// IR-PCH-HOST-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { -// IR-PCH-HOST-NEXT: entry: -// IR-PCH-HOST-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[J3:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 -// IR-PCH-HOST-NEXT: [[I:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[J5:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// IR-PCH-HOST-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// IR-PCH-HOST-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// IR-PCH-HOST-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 -// IR-PCH-HOST-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 -// IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// IR-PCH-HOST-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-HOST-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// IR-PCH-HOST-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// IR-PCH-HOST-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-PCH-HOST-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 -// IR-PCH-HOST-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] -// IR-PCH-HOST: omp.arrayinit.body: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// IR-PCH-HOST-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] -// IR-PCH-HOST: omp.arrayinit.done: -// IR-PCH-HOST-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// IR-PCH-HOST-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// IR-PCH-HOST-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-HOST-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 -// IR-PCH-HOST-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// IR-PCH-HOST: cond.true: -// IR-PCH-HOST-NEXT: br label [[COND_END:%.*]] -// IR-PCH-HOST: cond.false: -// IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-HOST-NEXT: br label [[COND_END]] -// IR-PCH-HOST: cond.end: -// IR-PCH-HOST-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] -// IR-PCH-HOST-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// IR-PCH-HOST-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// IR-PCH-HOST: omp.inner.for.cond: -// IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] -// IR-PCH-HOST-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// IR-PCH-HOST-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// IR-PCH-HOST: omp.inner.for.body: -// IR-PCH-HOST-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 10 -// IR-PCH-HOST-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP13]], 10 -// IR-PCH-HOST-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 -// IR-PCH-HOST-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[MUL8]] -// IR-PCH-HOST-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 -// IR-PCH-HOST-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] -// IR-PCH-HOST-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// IR-PCH-HOST-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] -// IR-PCH-HOST-NEXT: [[TMP16:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 -// IR-PCH-HOST-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] -// IR-PCH-HOST-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP14]] -// IR-PCH-HOST-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// IR-PCH-HOST: omp.body.continue: -// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// IR-PCH-HOST: omp.inner.for.inc: -// IR-PCH-HOST-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP18]], 1 -// IR-PCH-HOST-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] -// IR-PCH-HOST: omp.inner.for.end: -// IR-PCH-HOST-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// IR-PCH-HOST: omp.loop.exit: -// IR-PCH-HOST-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// IR-PCH-HOST-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) -// IR-PCH-HOST-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// IR-PCH-HOST-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8 -// IR-PCH-HOST-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// IR-PCH-HOST-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-HOST-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// IR-PCH-HOST-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// IR-PCH-HOST-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// IR-PCH-HOST-NEXT: ] -// IR-PCH-HOST: .omp.reduction.case1: -// IR-PCH-HOST-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR-PCH-HOST: omp.arraycpy.body: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// IR-PCH-HOST-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// IR-PCH-HOST-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] -// IR-PCH-HOST: omp.arraycpy.done19: -// IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR-PCH-HOST: .omp.reduction.case2: -// IR-PCH-HOST-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP0]], [[TMP28]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] -// IR-PCH-HOST: omp.arraycpy.body21: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] -// IR-PCH-HOST-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 -// IR-PCH-HOST-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP29]] monotonic, align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] -// IR-PCH-HOST: omp.arraycpy.done27: -// IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR-PCH-HOST: .omp.reduction.default: -// IR-PCH-HOST-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-PCH-HOST-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// IR-PCH-HOST-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// IR-PCH-HOST: .omp.lastprivate.then: -// IR-PCH-HOST-NEXT: store i32 10, ptr [[J3]], align 4 -// IR-PCH-HOST-NEXT: [[TMP33:%.*]] = load i32, ptr [[J3]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[TMP33]], ptr [[J_ADDR]], align 4 -// IR-PCH-HOST-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// IR-PCH-HOST: .omp.lastprivate.done: -// IR-PCH-HOST-NEXT: ret void -// IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func -// IR-PCH-HOST-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { -// IR-PCH-HOST-NEXT: entry: -// IR-PCH-HOST-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// IR-PCH-HOST-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// IR-PCH-HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// IR-PCH-HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// IR-PCH-HOST-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR-PCH-HOST: omp.arraycpy.body: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] -// IR-PCH-HOST: omp.arraycpy.done2: -// IR-PCH-HOST-NEXT: ret void -// IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2 -// IR-PCH-HOST-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { -// IR-PCH-HOST-NEXT: entry: -// IR-PCH-HOST-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// IR-PCH-HOST-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// IR-PCH-HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// IR-PCH-HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// IR-PCH-HOST-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR-PCH-HOST: omp.arraycpy.body: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] -// IR-PCH-HOST: omp.arraycpy.done2: -// IR-PCH-HOST-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l23 -// CHECK-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr -// CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr -// CHECK-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr -// CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr -// CHECK-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr -// CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false) -// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK: user_code.entry: -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[J_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP3]], ptr [[J_CASTED_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 -// CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 -// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) -// CHECK-NEXT: ret void -// CHECK: worker.exit: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[_TMP2:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[J4:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr -// CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr -// CHECK-NEXT: [[SUM1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM1]] to ptr -// CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr -// CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr -// CHECK-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr -// CHECK-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr -// CHECK-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr -// CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr -// CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr -// CHECK-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr -// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr -// CHECK-NEXT: [[J4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J4]] to ptr -// CHECK-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr -// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1_ASCAST]], i32 0, i32 0, i32 0 -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 -// CHECK-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] -// CHECK-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] -// CHECK: omp.arrayinit.body: -// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] -// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] -// CHECK: omp.arrayinit.done: -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 -// CHECK-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK: cond.true: -// CHECK-NEXT: br label [[COND_END:%.*]] -// CHECK: cond.false: -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: br label [[COND_END]] -// CHECK: cond.end: -// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK: omp.inner.for.cond: -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP7]], 100 -// CHECK-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK: omp.inner.for.body: -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP12]], ptr [[J_CASTED_ASCAST]], align 4 -// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 -// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP9]] to ptr -// CHECK-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 -// CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 -// CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP13]] to ptr -// CHECK-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 -// CHECK-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP20]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP22]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__.1, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4) -// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK: omp.inner.for.inc: -// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP29]], 99 -// CHECK-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK: cond.true9: -// CHECK-NEXT: br label [[COND_END11:%.*]] -// CHECK: cond.false10: -// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: br label [[COND_END11]] -// CHECK: cond.end11: -// CHECK-NEXT: [[COND12:%.*]] = phi i32 [ 99, [[COND_TRUE9]] ], [ [[TMP30]], [[COND_FALSE10]] ] -// CHECK-NEXT: store i32 [[COND12]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK: omp.inner.for.end: -// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK: omp.loop.exit: -// CHECK-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP33]]) -// CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 -// CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK: .omp.lastprivate.then: -// CHECK-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 -// CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP36]], ptr [[J_ADDR_ASCAST]], align 4 -// CHECK-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK: .omp.lastprivate.done: -// CHECK-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 -// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP39]], align 8 -// CHECK-NEXT: [[TMP40:%.*]] = load ptr, ptr addrspace(1) @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK-NEXT: [[TMP41:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP38]], ptr [[TMP40]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func.3, ptr @_omp_reduction_inter_warp_copy_func.4, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) -// CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[TMP41]], 1 -// CHECK-NEXT: br i1 [[TMP42]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK: .omp.reduction.then: -// CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 -// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP43]] -// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// CHECK: omp.arraycpy.body: -// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 -// CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK-NEXT: store i32 [[ADD14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 -// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP43]] -// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] -// CHECK: omp.arraycpy.done17: -// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK: .omp.reduction.done: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__.1 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) -// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[J5:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr -// CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr -// CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr -// CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr -// CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr -// CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr -// CHECK-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr -// CHECK-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr -// CHECK-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr -// CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr -// CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr -// CHECK-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr -// CHECK-NEXT: [[SUM4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM4]] to ptr -// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr -// CHECK-NEXT: [[J5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J5]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr -// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 -// CHECK-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB_ASCAST]], align 4 -// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 -// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i32 0, i32 0, i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 -// CHECK-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] -// CHECK: omp.arrayinit.body: -// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] -// CHECK: omp.arrayinit.done: -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK: omp.inner.for.cond: -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] -// CHECK-NEXT: [[CONV6:%.*]] = sext i32 [[TMP7]] to i64 -// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV6]], [[TMP8]] -// CHECK-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK: omp.inner.for.body: -// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP9]], 10 -// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP11]], 10 -// CHECK-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 -// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], [[MUL8]] -// CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] -// CHECK-NEXT: store i32 [[ADD10]], ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i64 0, i64 [[IDXPROM]] -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP15]], [[TMP12]] -// CHECK-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK: omp.body.continue: -// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK: omp.inner.for.inc: -// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK: omp.inner.for.end: -// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK: omp.loop.exit: -// CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP19]]) -// CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: store ptr [[SUM4_ASCAST]], ptr [[TMP22]], align 8 -// CHECK-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP21]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) -// CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP23]], 1 -// CHECK-NEXT: br i1 [[TMP24]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK: .omp.reduction.then: -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 -// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] -// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// CHECK: omp.arraycpy.body: -// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] -// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] -// CHECK: omp.arraycpy.done19: -// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK: .omp.reduction.done: -// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 -// CHECK-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK: .omp.lastprivate.then: -// CHECK-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 -// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP30]], ptr [[J_ADDR_ASCAST]], align 4 -// CHECK-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK: .omp.lastprivate.done: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) -// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) -// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr -// CHECK-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 -// CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 -// CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 -// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 -// CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] -// CHECK: .shuffle.pre_cond: -// CHECK-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] -// CHECK-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] -// CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 -// CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 -// CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] -// CHECK-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 -// CHECK-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] -// CHECK: .shuffle.then: -// CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 -// CHECK-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) -// CHECK-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 -// CHECK-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 -// CHECK-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 -// CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND]] -// CHECK: .shuffle.exit: -// CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 -// CHECK-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 -// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] -// CHECK-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 -// CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 -// CHECK-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 -// CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] -// CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 -// CHECK-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] -// CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] -// CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] -// CHECK-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK: then: -// CHECK-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] -// CHECK-NEXT: br label [[IFCONT:%.*]] -// CHECK: else: -// CHECK-NEXT: br label [[IFCONT]] -// CHECK: ifcont: -// CHECK-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK: then4: -// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 -// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) -// CHECK-NEXT: br label [[IFCONT6:%.*]] -// CHECK: else5: -// CHECK-NEXT: br label [[IFCONT6]] -// CHECK: ifcont6: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 -// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 -// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 -// CHECK-NEXT: br label [[PRECOND:%.*]] -// CHECK: precond: -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 -// CHECK-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] -// CHECK: body: -// CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4:[0-9]+]] to ptr), i32 [[TMP2]]) -// CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK: then: -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 -// CHECK-NEXT: br label [[IFCONT:%.*]] -// CHECK: else: -// CHECK-NEXT: br label [[IFCONT]] -// CHECK: ifcont: -// CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] -// CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] -// CHECK: then2: -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] -// CHECK-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 -// CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 -// CHECK-NEXT: br label [[IFCONT4:%.*]] -// CHECK: else3: -// CHECK-NEXT: br label [[IFCONT4]] -// CHECK: ifcont4: -// CHECK-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 -// CHECK-NEXT: br label [[PRECOND]] -// CHECK: exit: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func.3 -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) -// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) -// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr -// CHECK-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 -// CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 -// CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 -// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 -// CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] -// CHECK: .shuffle.pre_cond: -// CHECK-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] -// CHECK-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] -// CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 -// CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 -// CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] -// CHECK-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 -// CHECK-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] -// CHECK: .shuffle.then: -// CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 -// CHECK-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) -// CHECK-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 -// CHECK-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 -// CHECK-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 -// CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND]] -// CHECK: .shuffle.exit: -// CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 -// CHECK-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 -// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] -// CHECK-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 -// CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 -// CHECK-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 -// CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] -// CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 -// CHECK-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] -// CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] -// CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] -// CHECK-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK: then: -// CHECK-NEXT: call void @"_omp$reduction$reduction_func.2"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] -// CHECK-NEXT: br label [[IFCONT:%.*]] -// CHECK: else: -// CHECK-NEXT: br label [[IFCONT]] -// CHECK: ifcont: -// CHECK-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK: then4: -// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 -// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) -// CHECK-NEXT: br label [[IFCONT6:%.*]] -// CHECK: else5: -// CHECK-NEXT: br label [[IFCONT6]] -// CHECK: ifcont6: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func.4 -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 -// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 -// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 -// CHECK-NEXT: br label [[PRECOND:%.*]] -// CHECK: precond: -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 -// CHECK-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] -// CHECK: body: -// CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) -// CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK: then: -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 -// CHECK-NEXT: br label [[IFCONT:%.*]] -// CHECK: else: -// CHECK-NEXT: br label [[IFCONT]] -// CHECK: ifcont: -// CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] -// CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] -// CHECK: then2: -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] -// CHECK-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 -// CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 -// CHECK-NEXT: br label [[IFCONT4:%.*]] -// CHECK: else3: -// CHECK-NEXT: br label [[IFCONT4]] -// CHECK: ifcont4: -// CHECK-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 -// CHECK-NEXT: br label [[PRECOND]] -// CHECK: exit: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP8]], ptr align 4 [[TMP7]], i64 400, i1 false) -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] -// CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: call void @"_omp$reduction$reduction_func.2"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP7]]) #[[ATTR2]] -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 128 [[TMP8]], i64 400, i1 false) -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] -// CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: call void @"_omp$reduction$reduction_func.2"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]] -// CHECK-NEXT: ret void // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 // IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR0:[0-9]+]] { // IR-GPU-NEXT: entry: @@ -2015,7 +903,7 @@ int foo() { // IR-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8 // IR-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -2036,7 +924,7 @@ int foo() { // IR-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]] // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] // IR: omp.arraycpy.done10: -// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR: .omp.reduction.case2: // IR-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 @@ -2052,7 +940,6 @@ int foo() { // IR-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]] // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] // IR: omp.arraycpy.done18: -// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR: .omp.reduction.default: // IR-NEXT: ret void @@ -2171,7 +1058,7 @@ int foo() { // IR-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8 // IR-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -2192,7 +1079,7 @@ int foo() { // IR-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] // IR: omp.arraycpy.done19: -// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR: .omp.reduction.case2: // IR-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 @@ -2208,7 +1095,6 @@ int foo() { // IR-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]] // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] // IR: omp.arraycpy.done27: -// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR: .omp.reduction.default: // IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 @@ -2412,7 +1298,7 @@ int foo() { // IR-PCH-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8 // IR-PCH-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -2433,7 +1319,7 @@ int foo() { // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]] // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] // IR-PCH: omp.arraycpy.done10: -// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR-PCH: .omp.reduction.case2: // IR-PCH-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 @@ -2449,7 +1335,6 @@ int foo() { // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]] // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] // IR-PCH: omp.arraycpy.done18: -// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR-PCH: .omp.reduction.default: // IR-PCH-NEXT: ret void @@ -2568,7 +1453,7 @@ int foo() { // IR-PCH-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8 // IR-PCH-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -2589,7 +1474,7 @@ int foo() { // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] // IR-PCH: omp.arraycpy.done19: -// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR-PCH: .omp.reduction.case2: // IR-PCH-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 @@ -2605,7 +1490,6 @@ int foo() { // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]] // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] // IR-PCH: omp.arraycpy.done27: -// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR-PCH: .omp.reduction.default: // IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen_as_distribute.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen_as_distribute.cpp new file mode 100644 index 0000000000000..f3bbbc6229abd --- /dev/null +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen_as_distribute.cpp @@ -0,0 +1,587 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 2 +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU + +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH +extern int foo(int i); + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER +int N = 100000; +int main() +{ + int i; + int a[N]; + int b[N]; + + // Presence of call. Cannot use 'parallel for', must use 'distribute' when + // assume-no-neseted-parallelism isn't specified. + #pragma omp target teams loop + for (i=0; i < N; i++) { + for (int j=0; j < N; j++) { + a[i] = b[i] * N + foo(j); + } + } + return 0; +} +#endif +// IR-GPU-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27 +// IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment to ptr), ptr [[DYN_PTR]]) +// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 +// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// IR-GPU: user_code.entry: +// IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]] +// IR-GPU-NEXT: call void @__kmpc_target_deinit() +// IR-GPU-NEXT: ret void +// IR-GPU: worker.exit: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1:[0-9]+]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I5:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr +// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[I5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I5]] to ptr +// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU: omp.precond.then: +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) +// IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-GPU-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-GPU: cond.true: +// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END:%.*]] +// IR-GPU: cond.false: +// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END]] +// IR-GPU: cond.end: +// IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// IR-GPU-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-GPU-NEXT: store i32 [[ADD]], ptr [[I5_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[FOR_COND:%.*]] +// IR-GPU: for.cond: +// IR-GPU-NEXT: [[TMP18:%.*]] = load i32, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP18]], [[TMP19]] +// IR-GPU-NEXT: br i1 [[CMP8]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// IR-GPU: for.body: +// IR-GPU-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP21]], [[TMP22]] +// IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP23]]) #[[ATTR4:[0-9]+]] +// IR-GPU-NEXT: [[ADD10:%.*]] = add nsw i32 [[MUL9]], [[CALL]] +// IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM11]] +// IR-GPU-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4 +// IR-GPU-NEXT: br label [[FOR_INC:%.*]] +// IR-GPU: for.inc: +// IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 +// IR-GPU-NEXT: store i32 [[INC]], ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// IR-GPU: for.end: +// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-GPU: omp.body.continue: +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], 1 +// IR-GPU-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP28]]) +// IR-GPU-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.end: +// IR-GPU-NEXT: ret void +// +// +// IR-LABEL: define dso_local noundef i32 @main +// IR-SAME: () #[[ATTR0:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// IR-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// IR-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0() +// IR-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 +// IR-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 +// IR-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// IR-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16 +// IR-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]] +// IR-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// IR-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP7]]) +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4 +// IR-NEXT: ret i32 [[TMP8]] +// +// +// IR-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27 +// IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NEXT: ret void +// +// +// IR-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I5:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 0, ptr [[I]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: omp.precond.then: +// IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// IR-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 +// IR-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NEXT: br label [[FOR_COND:%.*]] +// IR: for.cond: +// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP18]], [[TMP19]] +// IR-NEXT: br i1 [[CMP8]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// IR: for.body: +// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4 +// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP21]], [[TMP22]] +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP23]]) +// IR-NEXT: [[ADD10:%.*]] = add nsw i32 [[MUL9]], [[CALL]] +// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4 +// IR-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64 +// IR-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM11]] +// IR-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4 +// IR-NEXT: br label [[FOR_INC:%.*]] +// IR: for.inc: +// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 +// IR-NEXT: store i32 [[INC]], ptr [[J]], align 4 +// IR-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// IR: for.end: +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], 1 +// IR-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: ret void +// +// +// IR-PCH-LABEL: define dso_local noundef i32 @main +// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// IR-PCH-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0() +// IR-PCH-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 +// IR-PCH-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 +// IR-PCH-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// IR-PCH-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16 +// IR-PCH-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]] +// IR-PCH-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// IR-PCH-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP7]]) +// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4 +// IR-PCH-NEXT: ret i32 [[TMP8]] +// +// +// IR-PCH-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27 +// IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I5:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH: omp.precond.then: +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-PCH-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// IR-PCH-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NEXT: br label [[FOR_COND:%.*]] +// IR-PCH: for.cond: +// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP18]], [[TMP19]] +// IR-PCH-NEXT: br i1 [[CMP8]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// IR-PCH: for.body: +// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4 +// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP21]], [[TMP22]] +// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP23]]) +// IR-PCH-NEXT: [[ADD10:%.*]] = add nsw i32 [[MUL9]], [[CALL]] +// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4 +// IR-PCH-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM11]] +// IR-PCH-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4 +// IR-PCH-NEXT: br label [[FOR_INC:%.*]] +// IR-PCH: for.inc: +// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 +// IR-PCH-NEXT: store i32 [[INC]], ptr [[J]], align 4 +// IR-PCH-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// IR-PCH: for.end: +// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH: omp.body.continue: +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], 1 +// IR-PCH-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// IR-PCH-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.end: +// IR-PCH-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp new file mode 100644 index 0000000000000..7c7cdc53fa2d2 --- /dev/null +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp @@ -0,0 +1,3998 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU + +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH + + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-assume-no-nested-parallelism -DNESTED -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-assume-no-nested-parallelism -DNESTED -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU-NESTED + +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-assume-no-nested-parallelism -DNESTED -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-NESTED + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-assume-no-nested-parallelism -DNESTED -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -fopenmp-assume-no-nested-parallelism -DNESTED -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH-NESTED + +// expected-no-diagnostics + +#ifndef NESTED +extern int omp_get_num_teams(void); +#endif + +#ifndef HEADER +#define HEADER +extern int foo(int i); + +int N = 100000; +int main() +{ + int a[N]; + int b[N]; + +#ifndef NESTED + // Should be transformed into 'target teams distribute parallel for' + #pragma omp target teams loop + for (int j = 0; j != N; j++) + a[j]=b[j]; + + // Should be transformed into 'target teams distribute parallel for' + #pragma omp target teams loop collapse(2) + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + a[i] = b[i] * N + j; + } + } + + int nt = 0; + // Should be transformed into 'target teams distribute parallel for' + #pragma omp target teams loop num_teams(32) + for (int i=0; i < N; i++) { + if (!nt) nt = omp_get_num_teams(); + for (int j=0; j < N; j++) + a[j] = b[j] * N + nt; + } +#else + // Should be transformed into 'target teams distribute parallel for' + // even with function call because of assume-no-nested-parallelism. + #pragma omp target teams loop collapse(2) + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + a[i] = b[i] * N + foo(j); + } + } +#endif + return 0; +} +#endif +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 +// IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment to ptr), ptr [[DYN_PTR]]) +// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 +// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// IR-GPU: user_code.entry: +// IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]] +// IR-GPU-NEXT: call void @__kmpc_target_deinit() +// IR-GPU-NEXT: ret void +// IR-GPU: worker.exit: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1:[0-9]+]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J5:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr +// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[J5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J5]] to ptr +// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU: omp.precond.then: +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// IR-GPU-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-GPU-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-GPU: cond.true: +// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END:%.*]] +// IR-GPU: cond.false: +// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END]] +// IR-GPU: cond.end: +// IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 +// IR-GPU-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// IR-GPU-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP21]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr +// IR-GPU-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// IR-GPU-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// IR-GPU-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr +// IR-GPU-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// IR-GPU-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// IR-GPU-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP22]] to ptr +// IR-GPU-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 +// IR-GPU-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 +// IR-GPU-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP0]] to ptr +// IR-GPU-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 +// IR-GPU-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4 +// IR-GPU-NEXT: store ptr [[TMP1]], ptr [[TMP31]], align 8 +// IR-GPU-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5 +// IR-GPU-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP2]] to ptr +// IR-GPU-NEXT: store ptr [[TMP33]], ptr [[TMP32]], align 8 +// IR-GPU-NEXT: [[TMP34:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6 +// IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP34]], align 8 +// IR-GPU-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP36]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 7) +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// IR-GPU-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// IR-GPU-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// IR-GPU-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] +// IR-GPU-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// IR-GPU: cond.true12: +// IR-GPU-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END14:%.*]] +// IR-GPU: cond.false13: +// IR-GPU-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END14]] +// IR-GPU: cond.end14: +// IR-GPU-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE12]] ], [ [[TMP46]], [[COND_FALSE13]] ] +// IR-GPU-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP49]]) +// IR-GPU-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.end: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J6:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr +// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[J6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J6]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU: omp.precond.then: +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 +// IR-GPU-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 +// IR-GPU-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) +// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 +// IR-GPU-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP14]] +// IR-GPU-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-GPU-NEXT: store i32 [[ADD]], ptr [[J6_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[J6_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-GPU-NEXT: [[TMP18:%.*]] = load i32, ptr [[J6_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM9]] +// IR-GPU-NEXT: store i32 [[TMP17]], ptr [[ARRAYIDX10]], align 4 +// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-GPU: omp.body.continue: +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// IR-GPU-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP22]]) +// IR-GPU-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.end: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46 +// IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_kernel_environment to ptr), ptr [[DYN_PTR]]) +// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 +// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// IR-GPU: user_code.entry: +// IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2]] +// IR-GPU-NEXT: call void @__kmpc_target_deinit() +// IR-GPU-NEXT: ret void +// IR-GPU: worker.exit: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr +// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr +// IR-GPU-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr +// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-GPU-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-GPU-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-GPU-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-GPU-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-GPU-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU: land.lhs.true: +// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-GPU-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.then: +// IR-GPU-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// IR-GPU-NEXT: [[CONV13:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 +// IR-GPU-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 [[CONV13]]) +// IR-GPU-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: [[CMP14:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]] +// IR-GPU-NEXT: br i1 [[CMP14]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-GPU: cond.true: +// IR-GPU-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[COND_END:%.*]] +// IR-GPU: cond.false: +// IR-GPU-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[COND_END]] +// IR-GPU: cond.end: +// IR-GPU-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// IR-GPU-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP19]], 1 +// IR-GPU-NEXT: [[CMP15:%.*]] = icmp slt i64 [[TMP18]], [[ADD]] +// IR-GPU-NEXT: br i1 [[CMP15]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP22]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP20]] to ptr +// IR-GPU-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// IR-GPU-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// IR-GPU-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP21]] to ptr +// IR-GPU-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 +// IR-GPU-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// IR-GPU-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP23]] to ptr +// IR-GPU-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8 +// IR-GPU-NEXT: [[TMP30:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 +// IR-GPU-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP0]] to ptr +// IR-GPU-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// IR-GPU-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4 +// IR-GPU-NEXT: store ptr [[TMP1]], ptr [[TMP32]], align 8 +// IR-GPU-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5 +// IR-GPU-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP2]] to ptr +// IR-GPU-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8 +// IR-GPU-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6 +// IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP35]], align 8 +// IR-GPU-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 7) +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] +// IR-GPU-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP40]], [[TMP41]] +// IR-GPU-NEXT: store i64 [[ADD17]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP42]], [[TMP43]] +// IR-GPU-NEXT: store i64 [[ADD18]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP44:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: [[CMP19:%.*]] = icmp sgt i64 [[TMP44]], [[TMP45]] +// IR-GPU-NEXT: br i1 [[CMP19]], label [[COND_TRUE20:%.*]], label [[COND_FALSE21:%.*]] +// IR-GPU: cond.true20: +// IR-GPU-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[COND_END22:%.*]] +// IR-GPU: cond.false21: +// IR-GPU-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[COND_END22]] +// IR-GPU: cond.end22: +// IR-GPU-NEXT: [[COND23:%.*]] = phi i64 [ [[TMP46]], [[COND_TRUE20]] ], [ [[TMP47]], [[COND_FALSE21]] ] +// IR-GPU-NEXT: store i64 [[COND23]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[TMP48]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP50]]) +// IR-GPU-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.end: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr +// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr +// IR-GPU-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-GPU-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-GPU-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-GPU-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-GPU-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-GPU-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU: land.lhs.true: +// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-GPU-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.then: +// IR-GPU-NEXT: store i64 0, ptr [[DOTOMP_LB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP14]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 1) +// IR-GPU-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// IR-GPU-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP19]], 0 +// IR-GPU-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 +// IR-GPU-NEXT: [[MUL16:%.*]] = mul nsw i32 1, [[DIV15]] +// IR-GPU-NEXT: [[CONV17:%.*]] = sext i32 [[MUL16]] to i64 +// IR-GPU-NEXT: [[DIV18:%.*]] = sdiv i64 [[TMP18]], [[CONV17]] +// IR-GPU-NEXT: [[MUL19:%.*]] = mul nsw i64 [[DIV18]], 1 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL19]] +// IR-GPU-NEXT: [[CONV20:%.*]] = trunc i64 [[ADD]] to i32 +// IR-GPU-NEXT: store i32 [[CONV20]], ptr [[I11_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP22]], 0 +// IR-GPU-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 +// IR-GPU-NEXT: [[MUL23:%.*]] = mul nsw i32 1, [[DIV22]] +// IR-GPU-NEXT: [[CONV24:%.*]] = sext i32 [[MUL23]] to i64 +// IR-GPU-NEXT: [[DIV25:%.*]] = sdiv i64 [[TMP21]], [[CONV24]] +// IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP23]], 0 +// IR-GPU-NEXT: [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1 +// IR-GPU-NEXT: [[MUL28:%.*]] = mul nsw i32 1, [[DIV27]] +// IR-GPU-NEXT: [[CONV29:%.*]] = sext i32 [[MUL28]] to i64 +// IR-GPU-NEXT: [[MUL30:%.*]] = mul nsw i64 [[DIV25]], [[CONV29]] +// IR-GPU-NEXT: [[SUB31:%.*]] = sub nsw i64 [[TMP20]], [[MUL30]] +// IR-GPU-NEXT: [[MUL32:%.*]] = mul nsw i64 [[SUB31]], 1 +// IR-GPU-NEXT: [[ADD33:%.*]] = add nsw i64 0, [[MUL32]] +// IR-GPU-NEXT: [[CONV34:%.*]] = trunc i64 [[ADD33]] to i32 +// IR-GPU-NEXT: store i32 [[CONV34]], ptr [[J12_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[I11_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[MUL35:%.*]] = mul nsw i32 [[TMP25]], [[TMP26]] +// IR-GPU-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD36:%.*]] = add nsw i32 [[MUL35]], [[TMP27]] +// IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM37:%.*]] = sext i32 [[TMP28]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM37]] +// IR-GPU-NEXT: store i32 [[ADD36]], ptr [[ARRAYIDX38]], align 4 +// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-GPU: omp.body.continue: +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP29]], [[TMP30]] +// IR-GPU-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP32]]) +// IR-GPU-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.end: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55 +// IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR4:[0-9]+]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[NT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NEXT: [[NT_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_CASTED]] to ptr +// IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[NT]], ptr [[NT_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_kernel_environment to ptr), ptr [[DYN_PTR]]) +// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 +// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// IR-GPU: user_code.entry: +// IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP8]], ptr [[NT_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP9:%.*]] = load i64, ptr [[NT_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2]] +// IR-GPU-NEXT: call void @__kmpc_target_deinit() +// IR-GPU-NEXT: ret void +// IR-GPU: worker.exit: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I5:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x ptr], align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[NT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr +// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[I5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I5]] to ptr +// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NEXT: [[NT_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_CASTED]] to ptr +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[NT]], ptr [[NT_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU: omp.precond.then: +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// IR-GPU-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-GPU-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-GPU: cond.true: +// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END:%.*]] +// IR-GPU: cond.false: +// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END]] +// IR-GPU: cond.end: +// IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 +// IR-GPU-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// IR-GPU-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP21]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP23]], ptr [[NT_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP24:%.*]] = load i64, ptr [[NT_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP25:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP18]] to ptr +// IR-GPU-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// IR-GPU-NEXT: [[TMP27:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// IR-GPU-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP20]] to ptr +// IR-GPU-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 +// IR-GPU-NEXT: [[TMP29:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// IR-GPU-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP22]] to ptr +// IR-GPU-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 +// IR-GPU-NEXT: [[TMP31:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 +// IR-GPU-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP24]] to ptr +// IR-GPU-NEXT: store ptr [[TMP32]], ptr [[TMP31]], align 8 +// IR-GPU-NEXT: [[TMP33:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4 +// IR-GPU-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP0]] to ptr +// IR-GPU-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8 +// IR-GPU-NEXT: [[TMP35:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5 +// IR-GPU-NEXT: store ptr [[TMP1]], ptr [[TMP35]], align 8 +// IR-GPU-NEXT: [[TMP36:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6 +// IR-GPU-NEXT: [[TMP37:%.*]] = inttoptr i64 [[TMP2]] to ptr +// IR-GPU-NEXT: store ptr [[TMP37]], ptr [[TMP36]], align 8 +// IR-GPU-NEXT: [[TMP38:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 7 +// IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP38]], align 8 +// IR-GPU-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP40]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 8) +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// IR-GPU-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// IR-GPU-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] +// IR-GPU-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP47]], [[TMP48]] +// IR-GPU-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// IR-GPU: cond.true12: +// IR-GPU-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END14:%.*]] +// IR-GPU: cond.false13: +// IR-GPU-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END14]] +// IR-GPU: cond.end14: +// IR-GPU-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP49]], [[COND_TRUE12]] ], [ [[TMP50]], [[COND_FALSE13]] ] +// IR-GPU-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP51]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP52:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP52]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP53]]) +// IR-GPU-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.end: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I6:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[NT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr +// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[I6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I6]] to ptr +// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[NT]], ptr [[NT_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU: omp.precond.then: +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 +// IR-GPU-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 +// IR-GPU-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) +// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 +// IR-GPU-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP14]] +// IR-GPU-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-GPU-NEXT: store i32 [[ADD]], ptr [[I6_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// IR-GPU-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +// IR-GPU: if.then: +// IR-GPU-NEXT: [[CALL:%.*]] = call noundef i32 @_Z17omp_get_num_teamsv() #[[ATTR6:[0-9]+]] +// IR-GPU-NEXT: store i32 [[CALL]], ptr [[NT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[IF_END]] +// IR-GPU: if.end: +// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[FOR_COND:%.*]] +// IR-GPU: for.cond: +// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] +// IR-GPU-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// IR-GPU: for.body: +// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-GPU-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL10]], [[TMP22]] +// IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP23]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM12]] +// IR-GPU-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX13]], align 4 +// IR-GPU-NEXT: br label [[FOR_INC:%.*]] +// IR-GPU: for.inc: +// IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP24]], 1 +// IR-GPU-NEXT: store i32 [[INC]], ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// IR-GPU: for.end: +// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-GPU: omp.body.continue: +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// IR-GPU-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP28]]) +// IR-GPU-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.end: +// IR-GPU-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@main +// IR-SAME: () #[[ATTR0:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// IR-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// IR-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: [[N_CASTED2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[NT:%.*]] = alloca i32, align 4 +// IR-NEXT: [[N_CASTED3:%.*]] = alloca i64, align 8 +// IR-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// IR-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0() +// IR-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 +// IR-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 +// IR-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// IR-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16 +// IR-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]] +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[N_CASTED2]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[N_CASTED2]], align 8 +// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46(i64 [[TMP8]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]] +// IR-NEXT: store i32 0, ptr [[NT]], align 4 +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: store i32 [[TMP9]], ptr [[N_CASTED3]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i64, ptr [[N_CASTED3]], align 8 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[NT]], align 4 +// IR-NEXT: store i32 [[TMP11]], ptr [[NT_CASTED]], align 4 +// IR-NEXT: [[TMP12:%.*]] = load i64, ptr [[NT_CASTED]], align 8 +// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]] +// IR-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// IR-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP13]]) +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 +// IR-NEXT: ret i32 [[TMP14]] +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 +// IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J5:%.*]] = alloca i32, align 4 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: omp.precond.then: +// IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// IR-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J6:%.*]] = alloca i32, align 4 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: omp.precond.then: +// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 +// IR-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 +// IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// IR-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// IR-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], ptr [[J6]], align 4 +// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[J6]], align 4 +// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[J6]], align 4 +// IR-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP22]] to i64 +// IR-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM9]] +// IR-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX10]], align 4 +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP23]], 1 +// IR-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46 +// IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: store i32 0, ptr [[I]], align 4 +// IR-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: land.lhs.true: +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR: omp.precond.then: +// IR-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]] +// IR-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// IR-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]] +// IR-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// IR-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]] +// IR-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: store i32 0, ptr [[I]], align 4 +// IR-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: land.lhs.true: +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR: omp.precond.then: +// IR-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// IR-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8 +// IR-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8 +// IR-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8 +// IR-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]] +// IR-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// IR-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// IR-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// IR-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]] +// IR-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0 +// IR-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// IR-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// IR-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// IR-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]] +// IR-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// IR-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// IR-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// IR-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 +// IR-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0 +// IR-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// IR-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// IR-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// IR-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]] +// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0 +// IR-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// IR-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// IR-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// IR-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// IR-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]] +// IR-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// IR-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// IR-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// IR-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4 +// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]] +// IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4 +// IR-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[TMP31]] +// IR-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4 +// IR-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64 +// IR-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]] +// IR-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4 +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1 +// IR-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP35]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55 +// IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 32, i32 0) +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[NT_CASTED]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[NT_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined, i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP3]], ptr [[TMP4]]) +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I5:%.*]] = alloca i32, align 4 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 0, ptr [[I]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: omp.precond.then: +// IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// IR-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP23]], ptr [[NT_CASTED]], align 4 +// IR-NEXT: [[TMP24:%.*]] = load i64, ptr [[NT_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 8, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I6:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 0, ptr [[I]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: omp.precond.then: +// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 +// IR-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 +// IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// IR-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// IR-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], ptr [[I6]], align 4 +// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP20]], 0 +// IR-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +// IR: if.then: +// IR-NEXT: [[CALL:%.*]] = call noundef i32 @_Z17omp_get_num_teamsv() +// IR-NEXT: store i32 [[CALL]], ptr [[NT_ADDR]], align 4 +// IR-NEXT: br label [[IF_END]] +// IR: if.end: +// IR-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NEXT: br label [[FOR_COND:%.*]] +// IR: for.cond: +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP21]], [[TMP22]] +// IR-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// IR: for.body: +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP24]], [[TMP25]] +// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL10]], [[TMP26]] +// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// IR-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM12]] +// IR-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX13]], align 4 +// IR-NEXT: br label [[FOR_INC:%.*]] +// IR: for.inc: +// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP28]], 1 +// IR-NEXT: store i32 [[INC]], ptr [[J]], align 4 +// IR-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// IR: for.end: +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1 +// IR-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP31]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@main +// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[N_CASTED2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[NT:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[N_CASTED3:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// IR-PCH-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0() +// IR-PCH-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 +// IR-PCH-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 +// IR-PCH-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// IR-PCH-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16 +// IR-PCH-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]] +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[N_CASTED2]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[N_CASTED2]], align 8 +// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46(i64 [[TMP8]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]] +// IR-PCH-NEXT: store i32 0, ptr [[NT]], align 4 +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: store i32 [[TMP9]], ptr [[N_CASTED3]], align 4 +// IR-PCH-NEXT: [[TMP10:%.*]] = load i64, ptr [[N_CASTED3]], align 8 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[NT]], align 4 +// IR-PCH-NEXT: store i32 [[TMP11]], ptr [[NT_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP12:%.*]] = load i64, ptr [[NT_CASTED]], align 8 +// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]] +// IR-PCH-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-PCH-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// IR-PCH-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP13]]) +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 +// IR-PCH-NEXT: ret i32 [[TMP14]] +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 +// IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J5:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH: omp.precond.then: +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-PCH-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// IR-PCH-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// IR-PCH-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.end: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J6:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH: omp.precond.then: +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 +// IR-PCH-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 +// IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// IR-PCH-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// IR-PCH-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[J6]], align 4 +// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[J6]], align 4 +// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[J6]], align 4 +// IR-PCH-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP22]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM9]] +// IR-PCH-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX10]], align 4 +// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH: omp.body.continue: +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP23]], 1 +// IR-PCH-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// IR-PCH-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.end: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46 +// IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-PCH-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-PCH-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-PCH-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-PCH-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH: land.lhs.true: +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-PCH-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.then: +// IR-PCH-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-PCH-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-PCH-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]] +// IR-PCH-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-PCH-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]] +// IR-PCH-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-PCH-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]] +// IR-PCH-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// IR-PCH-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.end: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-PCH-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-PCH-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-PCH-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-PCH-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH: land.lhs.true: +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-PCH-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.then: +// IR-PCH-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// IR-PCH-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8 +// IR-PCH-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-PCH-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]] +// IR-PCH-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// IR-PCH-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]] +// IR-PCH-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0 +// IR-PCH-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// IR-PCH-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// IR-PCH-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// IR-PCH-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]] +// IR-PCH-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// IR-PCH-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// IR-PCH-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 +// IR-PCH-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0 +// IR-PCH-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// IR-PCH-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// IR-PCH-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// IR-PCH-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]] +// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0 +// IR-PCH-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// IR-PCH-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// IR-PCH-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// IR-PCH-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// IR-PCH-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]] +// IR-PCH-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// IR-PCH-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// IR-PCH-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// IR-PCH-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4 +// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-PCH-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]] +// IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4 +// IR-PCH-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[TMP31]] +// IR-PCH-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4 +// IR-PCH-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]] +// IR-PCH-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4 +// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH: omp.body.continue: +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1 +// IR-PCH-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP35]]) +// IR-PCH-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.end: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55 +// IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 32, i32 0) +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[NT_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[NT_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined, i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP3]], ptr [[TMP4]]) +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I5:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH: omp.precond.then: +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-PCH-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// IR-PCH-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP23]], ptr [[NT_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP24:%.*]] = load i64, ptr [[NT_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 8, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// IR-PCH-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.end: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I6:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH: omp.precond.then: +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 +// IR-PCH-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 +// IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// IR-PCH-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// IR-PCH-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I6]], align 4 +// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-PCH-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP20]], 0 +// IR-PCH-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +// IR-PCH: if.then: +// IR-PCH-NEXT: [[CALL:%.*]] = call noundef i32 @_Z17omp_get_num_teamsv() +// IR-PCH-NEXT: store i32 [[CALL]], ptr [[NT_ADDR]], align 4 +// IR-PCH-NEXT: br label [[IF_END]] +// IR-PCH: if.end: +// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NEXT: br label [[FOR_COND:%.*]] +// IR-PCH: for.cond: +// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP21]], [[TMP22]] +// IR-PCH-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// IR-PCH: for.body: +// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP24]], [[TMP25]] +// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-PCH-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL10]], [[TMP26]] +// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM12]] +// IR-PCH-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX13]], align 4 +// IR-PCH-NEXT: br label [[FOR_INC:%.*]] +// IR-PCH: for.inc: +// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: [[INC:%.*]] = add nsw i32 [[TMP28]], 1 +// IR-PCH-NEXT: store i32 [[INC]], ptr [[J]], align 4 +// IR-PCH-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// IR-PCH: for.end: +// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH: omp.body.continue: +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1 +// IR-PCH-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP31]]) +// IR-PCH-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.end: +// IR-PCH-NEXT: ret void +// +// +// IR-GPU-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64 +// IR-GPU-NESTED-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// IR-GPU-NESTED-NEXT: entry: +// IR-GPU-NESTED-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NESTED-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment to ptr), ptr [[DYN_PTR]]) +// IR-GPU-NESTED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 +// IR-GPU-NESTED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// IR-GPU-NESTED: user_code.entry: +// IR-GPU-NESTED-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// IR-GPU-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]] +// IR-GPU-NESTED-NEXT: call void @__kmpc_target_deinit() +// IR-GPU-NESTED-NEXT: ret void +// IR-GPU-NESTED: worker.exit: +// IR-GPU-NESTED-NEXT: ret void +// +// +// IR-GPU-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined +// IR-GPU-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1:[0-9]+]] { +// IR-GPU-NESTED-NEXT: entry: +// IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NESTED-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NESTED-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NESTED-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr +// IR-GPU-NESTED-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NESTED-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NESTED-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr +// IR-GPU-NESTED-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr +// IR-GPU-NESTED-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NESTED-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-GPU-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-GPU-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-GPU-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-GPU-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-GPU-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-GPU-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-GPU-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU-NESTED: land.lhs.true: +// IR-GPU-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-GPU-NESTED: omp.precond.then: +// IR-GPU-NESTED-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// IR-GPU-NESTED-NEXT: [[CONV13:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 +// IR-GPU-NESTED-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// IR-GPU-NESTED-NEXT: call void @__kmpc_distribute_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 [[CONV13]]) +// IR-GPU-NESTED-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[CMP14:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP14]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-GPU-NESTED: cond.true: +// IR-GPU-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[COND_END:%.*]] +// IR-GPU-NESTED: cond.false: +// IR-GPU-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[COND_END]] +// IR-GPU-NESTED: cond.end: +// IR-GPU-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// IR-GPU-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU-NESTED: omp.inner.for.cond: +// IR-GPU-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP19]], 1 +// IR-GPU-NESTED-NEXT: [[CMP15:%.*]] = icmp slt i64 [[TMP18]], [[ADD]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP15]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU-NESTED: omp.inner.for.body: +// IR-GPU-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 [[TMP22]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// IR-GPU-NESTED-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP20]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// IR-GPU-NESTED-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP21]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// IR-GPU-NESTED-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP23]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP30:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 +// IR-GPU-NESTED-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP0]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4 +// IR-GPU-NESTED-NEXT: store ptr [[TMP1]], ptr [[TMP32]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5 +// IR-GPU-NESTED-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP2]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6 +// IR-GPU-NESTED-NEXT: store ptr [[TMP3]], ptr [[TMP35]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// IR-GPU-NESTED-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 7) +// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU-NESTED: omp.inner.for.inc: +// IR-GPU-NESTED-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] +// IR-GPU-NESTED-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP40]], [[TMP41]] +// IR-GPU-NESTED-NEXT: store i64 [[ADD17]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP42]], [[TMP43]] +// IR-GPU-NESTED-NEXT: store i64 [[ADD18]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP44:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[CMP19:%.*]] = icmp sgt i64 [[TMP44]], [[TMP45]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP19]], label [[COND_TRUE20:%.*]], label [[COND_FALSE21:%.*]] +// IR-GPU-NESTED: cond.true20: +// IR-GPU-NESTED-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[COND_END22:%.*]] +// IR-GPU-NESTED: cond.false21: +// IR-GPU-NESTED-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[COND_END22]] +// IR-GPU-NESTED: cond.end22: +// IR-GPU-NESTED-NEXT: [[COND23:%.*]] = phi i64 [ [[TMP46]], [[COND_TRUE20]] ], [ [[TMP47]], [[COND_FALSE21]] ] +// IR-GPU-NESTED-NEXT: store i64 [[COND23]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[TMP48]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU-NESTED: omp.inner.for.end: +// IR-GPU-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU-NESTED: omp.loop.exit: +// IR-GPU-NESTED-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 +// IR-GPU-NESTED-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP50]]) +// IR-GPU-NESTED-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU-NESTED: omp.precond.end: +// IR-GPU-NESTED-NEXT: ret void +// +// +// IR-GPU-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined_omp_outlined +// IR-GPU-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { +// IR-GPU-NESTED-NEXT: entry: +// IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NESTED-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NESTED-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NESTED-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr +// IR-GPU-NESTED-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NESTED-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NESTED-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr +// IR-GPU-NESTED-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-GPU-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-GPU-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-GPU-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-GPU-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-GPU-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-GPU-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-GPU-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU-NESTED: land.lhs.true: +// IR-GPU-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-GPU-NESTED: omp.precond.then: +// IR-GPU-NESTED-NEXT: store i64 0, ptr [[DOTOMP_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// IR-GPU-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP14]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 1) +// IR-GPU-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU-NESTED: omp.inner.for.cond: +// IR-GPU-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU-NESTED: omp.inner.for.body: +// IR-GPU-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP19]], 0 +// IR-GPU-NESTED-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 +// IR-GPU-NESTED-NEXT: [[MUL16:%.*]] = mul nsw i32 1, [[DIV15]] +// IR-GPU-NESTED-NEXT: [[CONV17:%.*]] = sext i32 [[MUL16]] to i64 +// IR-GPU-NESTED-NEXT: [[DIV18:%.*]] = sdiv i64 [[TMP18]], [[CONV17]] +// IR-GPU-NESTED-NEXT: [[MUL19:%.*]] = mul nsw i64 [[DIV18]], 1 +// IR-GPU-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL19]] +// IR-GPU-NESTED-NEXT: [[CONV20:%.*]] = trunc i64 [[ADD]] to i32 +// IR-GPU-NESTED-NEXT: store i32 [[CONV20]], ptr [[I11_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP22]], 0 +// IR-GPU-NESTED-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 +// IR-GPU-NESTED-NEXT: [[MUL23:%.*]] = mul nsw i32 1, [[DIV22]] +// IR-GPU-NESTED-NEXT: [[CONV24:%.*]] = sext i32 [[MUL23]] to i64 +// IR-GPU-NESTED-NEXT: [[DIV25:%.*]] = sdiv i64 [[TMP21]], [[CONV24]] +// IR-GPU-NESTED-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP23]], 0 +// IR-GPU-NESTED-NEXT: [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1 +// IR-GPU-NESTED-NEXT: [[MUL28:%.*]] = mul nsw i32 1, [[DIV27]] +// IR-GPU-NESTED-NEXT: [[CONV29:%.*]] = sext i32 [[MUL28]] to i64 +// IR-GPU-NESTED-NEXT: [[MUL30:%.*]] = mul nsw i64 [[DIV25]], [[CONV29]] +// IR-GPU-NESTED-NEXT: [[SUB31:%.*]] = sub nsw i64 [[TMP20]], [[MUL30]] +// IR-GPU-NESTED-NEXT: [[MUL32:%.*]] = mul nsw i64 [[SUB31]], 1 +// IR-GPU-NESTED-NEXT: [[ADD33:%.*]] = add nsw i64 0, [[MUL32]] +// IR-GPU-NESTED-NEXT: [[CONV34:%.*]] = trunc i64 [[ADD33]] to i32 +// IR-GPU-NESTED-NEXT: store i32 [[CONV34]], ptr [[J12_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP24:%.*]] = load i32, ptr [[I11_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// IR-GPU-NESTED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-GPU-NESTED-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP26:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[MUL35:%.*]] = mul nsw i32 [[TMP25]], [[TMP26]] +// IR-GPU-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP27]]) #[[ATTR5:[0-9]+]] +// IR-GPU-NESTED-NEXT: [[ADD36:%.*]] = add nsw i32 [[MUL35]], [[CALL]] +// IR-GPU-NESTED-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[IDXPROM37:%.*]] = sext i32 [[TMP28]] to i64 +// IR-GPU-NESTED-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM37]] +// IR-GPU-NESTED-NEXT: store i32 [[ADD36]], ptr [[ARRAYIDX38]], align 4 +// IR-GPU-NESTED-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-GPU-NESTED: omp.body.continue: +// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU-NESTED: omp.inner.for.inc: +// IR-GPU-NESTED-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP29]], [[TMP30]] +// IR-GPU-NESTED-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU-NESTED: omp.inner.for.end: +// IR-GPU-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU-NESTED: omp.loop.exit: +// IR-GPU-NESTED-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// IR-GPU-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP32]]) +// IR-GPU-NESTED-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU-NESTED: omp.precond.end: +// IR-GPU-NESTED-NEXT: ret void +// +// +// IR-NESTED-LABEL: define {{[^@]+}}@main +// IR-NESTED-SAME: () #[[ATTR0:[0-9]+]] { +// IR-NESTED-NEXT: entry: +// IR-NESTED-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-NESTED-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4 +// IR-NESTED-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// IR-NESTED-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0() +// IR-NESTED-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 +// IR-NESTED-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 +// IR-NESTED-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// IR-NESTED-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4 +// IR-NESTED-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// IR-NESTED-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16 +// IR-NESTED-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8 +// IR-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4 +// IR-NESTED-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-NESTED-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NESTED-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]] +// IR-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-NESTED-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// IR-NESTED-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP7]]) +// IR-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4 +// IR-NESTED-NEXT: ret i32 [[TMP8]] +// +// +// IR-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64 +// IR-NESTED-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-NESTED-NEXT: entry: +// IR-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NESTED-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-NESTED-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NESTED-NEXT: ret void +// +// +// IR-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined +// IR-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NESTED-NEXT: entry: +// IR-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: store i32 0, ptr [[I]], align 4 +// IR-NESTED-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-NESTED: land.lhs.true: +// IR-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-NESTED: omp.precond.then: +// IR-NESTED-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NESTED-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// IR-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-NESTED-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NESTED-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]] +// IR-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-NESTED: cond.true: +// IR-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: br label [[COND_END:%.*]] +// IR-NESTED: cond.false: +// IR-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NESTED-NEXT: br label [[COND_END]] +// IR-NESTED: cond.end: +// IR-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// IR-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-NESTED-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-NESTED: omp.inner.for.cond: +// IR-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]] +// IR-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-NESTED: omp.inner.for.body: +// IR-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NESTED-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4 +// IR-NESTED-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-NESTED: omp.inner.for.inc: +// IR-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// IR-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]] +// IR-NESTED-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-NESTED: omp.inner.for.end: +// IR-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-NESTED: omp.loop.exit: +// IR-NESTED-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// IR-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// IR-NESTED-NEXT: br label [[OMP_PRECOND_END]] +// IR-NESTED: omp.precond.end: +// IR-NESTED-NEXT: ret void +// +// +// IR-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined +// IR-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NESTED-NEXT: entry: +// IR-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: store i32 0, ptr [[I]], align 4 +// IR-NESTED-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-NESTED: land.lhs.true: +// IR-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-NESTED: omp.precond.then: +// IR-NESTED-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// IR-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8 +// IR-NESTED-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8 +// IR-NESTED-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8 +// IR-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NESTED-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// IR-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]] +// IR-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-NESTED: cond.true: +// IR-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: br label [[COND_END:%.*]] +// IR-NESTED: cond.false: +// IR-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-NESTED-NEXT: br label [[COND_END]] +// IR-NESTED: cond.end: +// IR-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// IR-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// IR-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// IR-NESTED-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-NESTED: omp.inner.for.cond: +// IR-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]] +// IR-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-NESTED: omp.inner.for.body: +// IR-NESTED-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0 +// IR-NESTED-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// IR-NESTED-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// IR-NESTED-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// IR-NESTED-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]] +// IR-NESTED-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// IR-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// IR-NESTED-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// IR-NESTED-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 +// IR-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0 +// IR-NESTED-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// IR-NESTED-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// IR-NESTED-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// IR-NESTED-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]] +// IR-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0 +// IR-NESTED-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// IR-NESTED-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// IR-NESTED-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// IR-NESTED-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// IR-NESTED-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]] +// IR-NESTED-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// IR-NESTED-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// IR-NESTED-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// IR-NESTED-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// IR-NESTED-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4 +// IR-NESTED-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// IR-NESTED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-NESTED-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-NESTED-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NESTED-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]] +// IR-NESTED-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4 +// IR-NESTED-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP31]]) +// IR-NESTED-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[CALL]] +// IR-NESTED-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4 +// IR-NESTED-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64 +// IR-NESTED-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]] +// IR-NESTED-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4 +// IR-NESTED-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-NESTED: omp.body.continue: +// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-NESTED: omp.inner.for.inc: +// IR-NESTED-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1 +// IR-NESTED-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-NESTED: omp.inner.for.end: +// IR-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-NESTED: omp.loop.exit: +// IR-NESTED-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// IR-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP35]]) +// IR-NESTED-NEXT: br label [[OMP_PRECOND_END]] +// IR-NESTED: omp.precond.end: +// IR-NESTED-NEXT: ret void +// +// +// IR-PCH-NESTED-LABEL: define {{[^@]+}}@main +// IR-PCH-NESTED-SAME: () #[[ATTR0:[0-9]+]] { +// IR-PCH-NESTED-NEXT: entry: +// IR-PCH-NESTED-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0() +// IR-PCH-NESTED-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 +// IR-PCH-NESTED-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 +// IR-PCH-NESTED-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// IR-PCH-NESTED-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16 +// IR-PCH-NESTED-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NESTED-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NESTED-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]] +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// IR-PCH-NESTED-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP7]]) +// IR-PCH-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4 +// IR-PCH-NESTED-NEXT: ret i32 [[TMP8]] +// +// +// IR-PCH-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64 +// IR-PCH-NESTED-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-PCH-NESTED-NEXT: entry: +// IR-PCH-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NESTED-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NESTED-NEXT: ret void +// +// +// IR-PCH-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined +// IR-PCH-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NESTED-NEXT: entry: +// IR-PCH-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-PCH-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-PCH-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-PCH-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-PCH-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-PCH-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-PCH-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-PCH-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[I]], align 4 +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH-NESTED: land.lhs.true: +// IR-PCH-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-PCH-NESTED: omp.precond.then: +// IR-PCH-NESTED-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-PCH-NESTED-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH-NESTED: cond.true: +// IR-PCH-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: br label [[COND_END:%.*]] +// IR-PCH-NESTED: cond.false: +// IR-PCH-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NESTED-NEXT: br label [[COND_END]] +// IR-PCH-NESTED: cond.end: +// IR-PCH-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// IR-PCH-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH-NESTED: omp.inner.for.cond: +// IR-PCH-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH-NESTED: omp.inner.for.body: +// IR-PCH-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NESTED-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH-NESTED: omp.inner.for.inc: +// IR-PCH-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// IR-PCH-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]] +// IR-PCH-NESTED-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH-NESTED: omp.inner.for.end: +// IR-PCH-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH-NESTED: omp.loop.exit: +// IR-PCH-NESTED-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// IR-PCH-NESTED-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH-NESTED: omp.precond.end: +// IR-PCH-NESTED-NEXT: ret void +// +// +// IR-PCH-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined +// IR-PCH-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NESTED-NEXT: entry: +// IR-PCH-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-PCH-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-PCH-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-PCH-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-PCH-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-PCH-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-PCH-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-PCH-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[I]], align 4 +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH-NESTED: land.lhs.true: +// IR-PCH-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-PCH-NESTED: omp.precond.then: +// IR-PCH-NESTED-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-PCH-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH-NESTED: cond.true: +// IR-PCH-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: br label [[COND_END:%.*]] +// IR-PCH-NESTED: cond.false: +// IR-PCH-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NESTED-NEXT: br label [[COND_END]] +// IR-PCH-NESTED: cond.end: +// IR-PCH-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// IR-PCH-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH-NESTED: omp.inner.for.cond: +// IR-PCH-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH-NESTED: omp.inner.for.body: +// IR-PCH-NESTED-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0 +// IR-PCH-NESTED-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// IR-PCH-NESTED-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// IR-PCH-NESTED-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// IR-PCH-NESTED-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]] +// IR-PCH-NESTED-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// IR-PCH-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// IR-PCH-NESTED-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// IR-PCH-NESTED-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0 +// IR-PCH-NESTED-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// IR-PCH-NESTED-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// IR-PCH-NESTED-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// IR-PCH-NESTED-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]] +// IR-PCH-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0 +// IR-PCH-NESTED-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// IR-PCH-NESTED-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// IR-PCH-NESTED-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// IR-PCH-NESTED-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// IR-PCH-NESTED-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]] +// IR-PCH-NESTED-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// IR-PCH-NESTED-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// IR-PCH-NESTED-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// IR-PCH-NESTED-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4 +// IR-PCH-NESTED-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// IR-PCH-NESTED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-PCH-NESTED-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NESTED-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]] +// IR-PCH-NESTED-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4 +// IR-PCH-NESTED-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP31]]) +// IR-PCH-NESTED-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[CALL]] +// IR-PCH-NESTED-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4 +// IR-PCH-NESTED-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64 +// IR-PCH-NESTED-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]] +// IR-PCH-NESTED-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4 +// IR-PCH-NESTED-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH-NESTED: omp.body.continue: +// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH-NESTED: omp.inner.for.inc: +// IR-PCH-NESTED-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1 +// IR-PCH-NESTED-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH-NESTED: omp.inner.for.end: +// IR-PCH-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH-NESTED: omp.loop.exit: +// IR-PCH-NESTED-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP35]]) +// IR-PCH-NESTED-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH-NESTED: omp.precond.end: +// IR-PCH-NESTED-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp index 1edcbfe2d7779..e1a6aad65b796 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp @@ -332,78 +332,8 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z9gtid_testv() @@ -411,14 +341,14 @@ int main() { // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: ret void // // @@ -586,78 +516,8 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn4v() @@ -665,14 +525,14 @@ int main() { // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: ret void // // @@ -695,7 +555,6 @@ int main() { // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 @@ -725,82 +584,8 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn5v() @@ -808,14 +593,14 @@ int main() { // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: ret void // // @@ -847,7 +632,6 @@ int main() { // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -878,30 +662,18 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] -// CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_IF_END:%.*]] -// CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined.omp_outlined(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: br label [[OMP_IF_END]] -// CHECK1: omp_if.end: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: call void @_Z3fn6v() +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -910,85 +682,19 @@ int main() { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiEiT_ +// CHECK1-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0]] comdat { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn6v() -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiEiT_ -// CHECK1-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0]] comdat { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 // CHECK1-NEXT: store i32 [[ARG]], ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 // CHECK1-NEXT: store i32 3, ptr [[TMP0]], align 4 @@ -1026,44 +732,61 @@ int main() { // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64() #[[ATTR2]] // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0 -// CHECK1-NEXT: store i32 3, ptr [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1 -// CHECK1-NEXT: store i32 0, ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 // CHECK1-NEXT: store ptr null, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 8 -// CHECK1-NEXT: store i64 100, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS2]]) -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]] -// CHECK1: omp_offload.failed3: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68() #[[ATTR2]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT4]] -// CHECK1: omp_offload.cont4: +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK1-NEXT: store i32 3, ptr [[TMP24]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP35]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP36]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS5]]) +// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK1-NEXT: br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK1: omp_offload.failed6: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68(i64 [[TMP17]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK1: omp_offload.cont7: // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68() #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68(i64 [[TMP17]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret i32 0 @@ -1117,78 +840,8 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn1v() @@ -1196,14 +849,14 @@ int main() { // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: ret void // // @@ -1226,7 +879,6 @@ int main() { // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 @@ -1256,82 +908,8 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64.omp_outlined.omp_outlined(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn2v() @@ -1339,29 +917,38 @@ int main() { // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68 -// CHECK1-SAME: () #[[ATTR1]] { +// CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined) +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined, i64 [[TMP1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -1371,6 +958,7 @@ int main() { // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1398,78 +986,8 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn3v() @@ -1477,13 +995,13 @@ int main() { // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp index 99416f76e409c..9b3d77f5b0adc 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp @@ -326,7 +326,7 @@ int main() { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -340,7 +340,7 @@ int main() { // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) // CHECK1-NEXT: ret void // // @@ -403,149 +403,48 @@ int main() { // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done3: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK1: omp.inner.for.cond.cleanup: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // @@ -596,7 +495,7 @@ int main() { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -645,7 +544,7 @@ int main() { // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) // CHECK1-NEXT: ret void // // @@ -711,149 +610,45 @@ int main() { // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done5: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK1: omp.inner.for.cond.cleanup: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done9: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done8: // CHECK1-NEXT: ret void // // @@ -1059,7 +854,7 @@ int main() { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -1073,7 +868,7 @@ int main() { // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) // CHECK3-NEXT: ret void // // @@ -1136,138 +931,41 @@ int main() { // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 -// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done3: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK3: omp.inner.for.cond.cleanup: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]] // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1323,7 +1021,7 @@ int main() { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -1372,7 +1070,7 @@ int main() { // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) // CHECK3-NEXT: ret void // // @@ -1438,138 +1136,38 @@ int main() { // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 -// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done5: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK3: omp.inner.for.cond.cleanup: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1760,7 +1358,7 @@ int main() { // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 // CHECK5-SAME: () #[[ATTR4:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined) +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined) // CHECK5-NEXT: ret void // // @@ -1781,6 +1379,7 @@ int main() { // CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -1812,17 +1411,29 @@ int main() { // CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK5-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK5-NEXT: store volatile i32 1, ptr [[TMP8]], align 4 +// CHECK5-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK5-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1831,98 +1442,8 @@ int main() { // CHECK5-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 -// CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8 -// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK5: cond.true: -// CHECK5-NEXT: br label [[COND_END:%.*]] -// CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: br label [[COND_END]] -// CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK5-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 -// CHECK5-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 -// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) -// CHECK5-NEXT: ret void -// -// -// CHECK5-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_teams_generic_loop_private_codegen.cpp -// CHECK5-SAME: () #[[ATTR0]] { +// CHECK5-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_teams_generic_loop_private_codegen.cpp +// CHECK5-SAME: () #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: call void @__cxx_global_var_init() // CHECK5-NEXT: call void @__cxx_global_var_init.1() @@ -1935,7 +1456,7 @@ int main() { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) // CHECK13-NEXT: ret void // // @@ -1998,35 +1519,48 @@ int main() { // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK13-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK13-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK13-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4 +// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] -// CHECK13-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done3: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done7: // CHECK13-NEXT: ret void // // @@ -2040,120 +1574,6 @@ int main() { // CHECK13-NEXT: ret void // // -// CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { -// CHECK13-NEXT: entry: -// CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK13: arrayctor.loop: -// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i64 1 -// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK13: arrayctor.cont: -// CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK13: cond.true: -// CHECK13-NEXT: br label [[COND_END:%.*]] -// CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: br label [[COND_END]] -// CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK13: omp.inner.for.cond.cleanup: -// CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] -// CHECK13-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 -// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK13: omp.body.continue: -// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK13: omp.inner.for.end: -// CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 -// CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done8: -// CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIfED1Ev // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat { // CHECK13-NEXT: entry: @@ -2169,7 +1589,7 @@ int main() { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) // CHECK13-NEXT: ret void // // @@ -2235,17 +1655,27 @@ int main() { // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) +// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2254,16 +1684,16 @@ int main() { // CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done5: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done8: // CHECK13-NEXT: ret void // // @@ -2277,120 +1707,6 @@ int main() { // CHECK13-NEXT: ret void // // -// CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { -// CHECK13-NEXT: entry: -// CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 -// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK13: arrayctor.loop: -// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 -// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK13: arrayctor.cont: -// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK13: cond.true: -// CHECK13-NEXT: br label [[COND_END:%.*]] -// CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: br label [[COND_END]] -// CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK13: omp.inner.for.cond.cleanup: -// CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) -// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK13: omp.body.continue: -// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK13: omp.inner.for.end: -// CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 -// CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done9: -// CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat { // CHECK13-NEXT: entry: @@ -2449,7 +1765,7 @@ int main() { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) // CHECK15-NEXT: ret void // // @@ -2512,148 +1828,41 @@ int main() { // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] -// CHECK15-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 -// CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done3: -// CHECK15-NEXT: ret void -// -// -// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev -// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: call void @_ZN1SIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] -// CHECK15-NEXT: ret void -// -// -// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK15: arrayctor.loop: -// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i32 1 -// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK15: arrayctor.cont: -// CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK15: cond.true: -// CHECK15-NEXT: br label [[COND_END:%.*]] -// CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: br label [[COND_END]] -// CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: // CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK15: omp.inner.for.cond.cleanup: -// CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]] // CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] // CHECK15-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK15-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK15-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -2662,6 +1871,16 @@ int main() { // CHECK15-NEXT: ret void // // +// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev +// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK15-NEXT: ret void +// +// // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfED1Ev // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK15-NEXT: entry: @@ -2677,7 +1896,7 @@ int main() { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) // CHECK15-NEXT: ret void // // @@ -2743,148 +1962,38 @@ int main() { // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 -// CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done5: -// CHECK15-NEXT: ret void -// -// -// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: call void @_ZN1SIiEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] -// CHECK15-NEXT: ret void -// -// -// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 -// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK15: arrayctor.loop: -// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 -// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK15: arrayctor.cont: -// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK15: cond.true: -// CHECK15-NEXT: br label [[COND_END:%.*]] -// CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: br label [[COND_END]] -// CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: // CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK15: omp.inner.for.cond.cleanup: -// CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 // CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false) // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -2893,6 +2002,16 @@ int main() { // CHECK15-NEXT: ret void // // +// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIiEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK15-NEXT: ret void +// +// // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK15-NEXT: entry: @@ -2951,7 +2070,7 @@ int main() { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined) +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined) // CHECK17-NEXT: ret void // // @@ -2972,6 +2091,7 @@ int main() { // CHECK17-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -3003,111 +2123,33 @@ int main() { // CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK17: omp.inner.for.end: -// CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: ret void -// -// -// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { -// CHECK17-NEXT: entry: -// CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 -// CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8 -// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK17: cond.true: -// CHECK17-NEXT: br label [[COND_END:%.*]] -// CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: br label [[COND_END]] -// CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK17: omp.inner.for.cond: // CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK17-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK17-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK17-NEXT: store volatile i32 1, ptr [[TMP8]], align 4 // CHECK17-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK17-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK17-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8 // CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK17-NEXT: ret void // diff --git a/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp b/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp index d4a98f07fe24d..23c5c9db9c700 100644 --- a/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp +++ b/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp @@ -278,7 +278,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store [3 x i32] [[TMP28]], ptr [[TMP40]], align 4 // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP41]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 [[TMP21]], i32 [[TMP22]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 [[TMP21]], i32 [[TMP22]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 // CHECK1-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -338,7 +338,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP69]], align 4 // CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP70]], align 4 -// CHECK1-NEXT: [[TMP71:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.region_id, ptr [[KERNEL_ARGS15]]) +// CHECK1-NEXT: [[TMP71:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.region_id, ptr [[KERNEL_ARGS15]]) // CHECK1-NEXT: [[TMP72:%.*]] = icmp ne i32 [[TMP71]], 0 // CHECK1-NEXT: br i1 [[TMP72]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]] // CHECK1: omp_offload.failed16: @@ -356,7 +356,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK1-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK1-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -364,8 +364,8 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined, ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined, ptr [[N_ADDR]], ptr [[TMP1]]) // CHECK1-NEXT: ret void // // @@ -434,126 +434,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined, i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -567,7 +469,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined, ptr [[N_ADDR]], ptr [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -636,126 +538,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined, i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -865,7 +669,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store [3 x i32] [[TMP28]], ptr [[TMP40]], align 4 // CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP41]], align 4 -// CHECK3-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 [[TMP21]], i32 [[TMP22]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 [[TMP21]], i32 [[TMP22]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 // CHECK3-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -925,7 +729,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP69]], align 4 // CHECK3-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP70]], align 4 -// CHECK3-NEXT: [[TMP71:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.region_id, ptr [[KERNEL_ARGS15]]) +// CHECK3-NEXT: [[TMP71:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.region_id, ptr [[KERNEL_ARGS15]]) // CHECK3-NEXT: [[TMP72:%.*]] = icmp ne i32 [[TMP71]], 0 // CHECK3-NEXT: br i1 [[TMP72]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]] // CHECK3: omp_offload.failed16: @@ -943,7 +747,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK3-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -951,8 +755,8 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 -// CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined, ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined, ptr [[N_ADDR]], ptr [[TMP1]]) // CHECK3-NEXT: ret void // // @@ -1021,121 +825,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined, i32 [[TMP15]], i32 [[TMP16]], ptr [[TMP0]], ptr [[TMP1]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP16]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -1149,7 +859,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined, ptr [[N_ADDR]], ptr [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -1218,121 +928,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined, i32 [[TMP15]], i32 [[TMP16]], ptr [[TMP0]], ptr [[TMP1]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP16]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -1424,7 +1040,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK9-NEXT: store i32 0, ptr [[TMP35]], align 4 -// CHECK9-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.region_id, ptr [[KERNEL_ARGS]]) // CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 // CHECK9-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: @@ -1449,7 +1065,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) // CHECK9-NEXT: ret void // // @@ -1521,129 +1137,28 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) -// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK9: omp.inner.for.end: -// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK9-NEXT: br label [[OMP_PRECOND_END]] -// CHECK9: omp.precond.end: -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK9: omp.precond.then: -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: br label [[COND_END:%.*]] -// CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: br label [[COND_END]] -// CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] -// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1735,7 +1250,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4 // CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK11-NEXT: store i32 0, ptr [[TMP35]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.region_id, ptr [[KERNEL_ARGS]]) // CHECK11-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 // CHECK11-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: @@ -1760,7 +1275,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) // CHECK11-NEXT: ret void // // @@ -1832,124 +1347,27 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: br label [[OMP_PRECOND_END]] -// CHECK11: omp.precond.end: -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK11: omp.precond.then: -// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: br label [[COND_END:%.*]] -// CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END]] -// CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] -// CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: // CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2009,7 +1427,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 // CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK17-NEXT: store i32 0, ptr [[TMP17]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.region_id, ptr [[KERNEL_ARGS]]) +// CHECK17-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.region_id, ptr [[KERNEL_ARGS]]) // CHECK17-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 // CHECK17-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK17: omp_offload.failed: @@ -2028,7 +1446,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined, ptr [[TMP0]]) +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined, ptr [[TMP0]]) // CHECK17-NEXT: ret void // // @@ -2041,135 +1459,62 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 -// CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK17: cond.true: -// CHECK17-NEXT: br label [[COND_END:%.*]] -// CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: br label [[COND_END]] -// CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) -// CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK17: omp.inner.for.end: -// CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: ret void -// -// -// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { -// CHECK17-NEXT: entry: -// CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK17-NEXT: ret void // // @@ -2227,7 +1572,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 // CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK19-NEXT: store i32 0, ptr [[TMP17]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.region_id, ptr [[KERNEL_ARGS]]) +// CHECK19-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.region_id, ptr [[KERNEL_ARGS]]) // CHECK19-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 // CHECK19-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK19: omp_offload.failed: @@ -2246,7 +1591,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined, ptr [[TMP0]]) +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined, ptr [[TMP0]]) // CHECK19-NEXT: ret void // // @@ -2294,95 +1639,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) -// CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK19: omp.inner.for.end: -// CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: ret void -// -// -// CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { -// CHECK19-NEXT: entry: -// CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 -// CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK19: cond.true: -// CHECK19-NEXT: br label [[COND_END:%.*]] -// CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: br label [[COND_END]] -// CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK19: omp.inner.for.cond: // CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK19-NEXT: ret void // // @@ -2478,7 +1754,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4 // CHECK25-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK25-NEXT: store i32 0, ptr [[TMP35]], align 4 -// CHECK25-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.region_id, ptr [[KERNEL_ARGS]]) +// CHECK25-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.region_id, ptr [[KERNEL_ARGS]]) // CHECK25-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 // CHECK25-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK25: omp_offload.failed: @@ -2505,7 +1781,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) // CHECK25-NEXT: ret void // // @@ -2577,129 +1853,28 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK25-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) -// CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK25: omp.inner.for.end: -// CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK25-NEXT: br label [[OMP_PRECOND_END]] -// CHECK25: omp.precond.end: -// CHECK25-NEXT: ret void -// -// -// CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { -// CHECK25-NEXT: entry: -// CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK25-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK25: omp.precond.then: -// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK25-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -// CHECK25-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK25: cond.true: -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: br label [[COND_END:%.*]] -// CHECK25: cond.false: -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: br label [[COND_END]] -// CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] -// CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK25: omp.inner.for.cond: // CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK25-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK25-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK25-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK25-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK25-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK25-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) // CHECK25-NEXT: br label [[OMP_PRECOND_END]] // CHECK25: omp.precond.end: // CHECK25-NEXT: ret void @@ -2778,7 +1953,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: store [3 x i32] [[TMP18]], ptr [[TMP30]], align 4 // CHECK25-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK25-NEXT: store i32 0, ptr [[TMP31]], align 4 -// CHECK25-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 [[TMP15]], i32 [[TMP16]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.region_id, ptr [[KERNEL_ARGS]]) +// CHECK25-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 [[TMP15]], i32 [[TMP16]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.region_id, ptr [[KERNEL_ARGS]]) // CHECK25-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 // CHECK25-NEXT: br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK25: omp_offload.failed: @@ -2794,15 +1969,15 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK25-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK25-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 // CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 -// CHECK25-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined, ptr [[TMP1]]) +// CHECK25-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined, ptr [[TMP1]]) // CHECK25-NEXT: ret void // // @@ -2850,99 +2025,26 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK25-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) -// CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK25: omp.inner.for.end: -// CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK25-NEXT: ret void -// -// -// CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { -// CHECK25-NEXT: entry: -// CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK25-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 -// CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK25: cond.true: -// CHECK25-NEXT: br label [[COND_END:%.*]] -// CHECK25: cond.false: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: br label [[COND_END]] -// CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK25: omp.inner.for.cond: // CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK25-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK25-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK25-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK25-NEXT: ret void // // @@ -3038,7 +2140,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4 // CHECK27-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK27-NEXT: store i32 0, ptr [[TMP35]], align 4 -// CHECK27-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.region_id, ptr [[KERNEL_ARGS]]) +// CHECK27-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.region_id, ptr [[KERNEL_ARGS]]) // CHECK27-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 // CHECK27-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK27: omp_offload.failed: @@ -3065,7 +2167,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) // CHECK27-NEXT: ret void // // @@ -3137,124 +2239,27 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) -// CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK27: omp.inner.for.end: -// CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK27-NEXT: br label [[OMP_PRECOND_END]] -// CHECK27: omp.precond.end: -// CHECK27-NEXT: ret void -// -// -// CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { -// CHECK27-NEXT: entry: -// CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK27-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK27: omp.precond.then: -// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -// CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK27: cond.true: -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: br label [[COND_END:%.*]] -// CHECK27: cond.false: -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: br label [[COND_END]] -// CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] -// CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK27: omp.inner.for.cond: // CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK27-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK27-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) // CHECK27-NEXT: br label [[OMP_PRECOND_END]] // CHECK27: omp.precond.end: // CHECK27-NEXT: ret void @@ -3333,7 +2338,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: store [3 x i32] [[TMP18]], ptr [[TMP30]], align 4 // CHECK27-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK27-NEXT: store i32 0, ptr [[TMP31]], align 4 -// CHECK27-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 [[TMP15]], i32 [[TMP16]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.region_id, ptr [[KERNEL_ARGS]]) +// CHECK27-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 [[TMP15]], i32 [[TMP16]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.region_id, ptr [[KERNEL_ARGS]]) // CHECK27-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 // CHECK27-NEXT: br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK27: omp_offload.failed: @@ -3349,15 +2354,15 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK27-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 // CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 -// CHECK27-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined, ptr [[TMP1]]) +// CHECK27-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined, ptr [[TMP1]]) // CHECK27-NEXT: ret void // // @@ -3405,93 +2410,24 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) -// CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK27: omp.inner.for.end: -// CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK27-NEXT: ret void -// -// -// CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { -// CHECK27-NEXT: entry: -// CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 -// CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK27: cond.true: -// CHECK27-NEXT: br label [[COND_END:%.*]] -// CHECK27: cond.false: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: br label [[COND_END]] -// CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK27: omp.inner.for.cond: // CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK27-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK27-NEXT: ret void // diff --git a/clang/test/OpenMP/teams_generic_loop_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_codegen.cpp index 2499fbb6811c9..85dcae26970bc 100644 --- a/clang/test/OpenMP/teams_generic_loop_codegen.cpp +++ b/clang/test/OpenMP/teams_generic_loop_codegen.cpp @@ -29,7 +29,7 @@ int foo() { // IR-NEXT: [[I:%.*]] = alloca i32, align 4 // IR-NEXT: [[J:%.*]] = alloca i32, align 4 // IR-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 -// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @_Z3foov.omp_outlined, ptr [[J]], ptr [[SUM]]) +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 2, ptr @_Z3foov.omp_outlined, ptr [[J]], ptr [[SUM]]) // IR-NEXT: ret i32 0 // // @@ -96,277 +96,100 @@ int foo() { // IR-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // IR-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // IR: omp.inner.for.body: -// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// IR-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// IR-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @_Z3foov.omp_outlined.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[J3]], ptr [[SUM1]]) +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[DIV6:%.*]] = sdiv i32 [[TMP12]], 10 +// IR-NEXT: [[MUL7:%.*]] = mul nsw i32 [[DIV6]], 10 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL7]] +// IR-NEXT: [[MUL8:%.*]] = mul nsw i32 [[SUB]], 1 +// IR-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] +// IR-NEXT: store i32 [[ADD9]], ptr [[J3]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i64 0, i64 [[IDXPROM]] +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[J3]], align 4 +// IR-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP15]] to i64 +// IR-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM10]] +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4 +// IR-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP16]], [[TMP13]] +// IR-NEXT: store i32 [[ADD12]], ptr [[ARRAYIDX11]], align 4 +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: // IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // IR: omp.inner.for.inc: -// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], 1 +// IR-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 // IR-NEXT: br label [[OMP_INNER_FOR_COND]] // IR: omp.inner.for.end: // IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // IR: omp.loop.exit: -// IR-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// IR-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// IR-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // IR: .omp.lastprivate.then: // IR-NEXT: store i32 10, ptr [[J3]], align 4 -// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[J3]], align 4 -// IR-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4 +// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[J3]], align 4 +// IR-NEXT: store i32 [[TMP22]], ptr [[TMP0]], align 4 // IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // IR: .omp.lastprivate.done: -// IR-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// IR-NEXT: store ptr [[SUM1]], ptr [[TMP21]], align 8 -// IR-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// IR-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// IR-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// IR-NEXT: store ptr [[SUM1]], ptr [[TMP23]], align 8 +// IR-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// IR-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // IR-NEXT: ] // IR: .omp.reduction.case1: -// IR-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP25]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP27]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // IR: omp.arraycpy.body: // IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 -// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// IR-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 -// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4 +// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// IR-NEXT: store i32 [[ADD15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 // IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP25]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// IR: omp.arraycpy.done10: -// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) -// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR: .omp.reduction.case2: -// IR-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP1]], [[TMP28]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] -// IR: omp.arraycpy.body12: -// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 -// IR-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP29]] monotonic, align 4 -// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP28]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] +// IR-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP27]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] // IR: omp.arraycpy.done18: -// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR: .omp.reduction.default: -// IR-NEXT: ret void -// -// -// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined -// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { -// IR-NEXT: entry: -// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// IR-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8 -// IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 -// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// IR-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// IR-NEXT: [[J3:%.*]] = alloca i32, align 4 -// IR-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 -// IR-NEXT: [[I:%.*]] = alloca i32, align 4 -// IR-NEXT: [[J5:%.*]] = alloca i32, align 4 -// IR-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// IR-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8 -// IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 -// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8 -// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 -// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// IR-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 -// IR-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// IR-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// IR-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 -// IR-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 -// IR-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] -// IR-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] -// IR: omp.arrayinit.body: -// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// IR-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] -// IR: omp.arrayinit.done: -// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 -// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// IR: cond.true: -// IR-NEXT: br label [[COND_END:%.*]] -// IR: cond.false: -// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-NEXT: br label [[COND_END]] -// IR: cond.end: -// IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// IR-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// IR: omp.inner.for.cond: -// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] -// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// IR-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// IR: omp.inner.for.body: -// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP12]], 10 -// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP14]], 10 -// IR-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 -// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], [[MUL8]] -// IR-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 -// IR-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] -// IR-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] -// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP17]] to i64 -// IR-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] -// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP18]], [[TMP15]] -// IR-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// IR: omp.body.continue: -// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// IR: omp.inner.for.inc: -// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], 1 -// IR-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] -// IR: omp.inner.for.end: -// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// IR: omp.loop.exit: -// IR-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) -// IR-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// IR-NEXT: store ptr [[SUM4]], ptr [[TMP22]], align 8 -// IR-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// IR-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// IR-NEXT: ] -// IR: .omp.reduction.case1: -// IR-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP26]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR: omp.arraycpy.body: -// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// IR-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP26]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] -// IR: omp.arraycpy.done19: -// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR: .omp.reduction.case2: -// IR-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP1]], [[TMP29]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] -// IR: omp.arraycpy.body21: -// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] -// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] -// IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 -// IR-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP30]] monotonic, align 4 -// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP29]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] -// IR: omp.arraycpy.done27: +// IR-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[TMP1]], [[TMP30]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] +// IR: omp.arraycpy.body20: +// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 4 +// IR-NEXT: [[TMP32:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 [[TMP31]] monotonic, align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP30]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY20]] +// IR: omp.arraycpy.done26: // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR: .omp.reduction.default: -// IR-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// IR-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// IR: .omp.lastprivate.then: -// IR-NEXT: store i32 10, ptr [[J3]], align 4 -// IR-NEXT: [[TMP34:%.*]] = load i32, ptr [[J3]], align 4 -// IR-NEXT: store i32 [[TMP34]], ptr [[TMP0]], align 4 -// IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// IR: .omp.lastprivate.done: -// IR-NEXT: ret void -// -// -// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func -// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { -// IR-NEXT: entry: -// IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// IR-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// IR-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// IR-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// IR-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 -// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR: omp.arraycpy.body: -// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// IR-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] -// IR: omp.arraycpy.done2: // IR-NEXT: ret void // // // IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp.reduction.reduction_func -// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { // IR-NEXT: entry: // IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 @@ -402,7 +225,7 @@ int foo() { // IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 // IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 // IR-PCH-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 -// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @_Z3foov.omp_outlined, ptr [[J]], ptr [[SUM]]) +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 2, ptr @_Z3foov.omp_outlined, ptr [[J]], ptr [[SUM]]) // IR-PCH-NEXT: ret i32 0 // // @@ -469,277 +292,100 @@ int foo() { // IR-PCH-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // IR-PCH-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // IR-PCH: omp.inner.for.body: -// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// IR-PCH-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// IR-PCH-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @_Z3foov.omp_outlined.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[J3]], ptr [[SUM1]]) +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[DIV6:%.*]] = sdiv i32 [[TMP12]], 10 +// IR-PCH-NEXT: [[MUL7:%.*]] = mul nsw i32 [[DIV6]], 10 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL7]] +// IR-PCH-NEXT: [[MUL8:%.*]] = mul nsw i32 [[SUB]], 1 +// IR-PCH-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] +// IR-PCH-NEXT: store i32 [[ADD9]], ptr [[J3]], align 4 +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i64 0, i64 [[IDXPROM]] +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[J3]], align 4 +// IR-PCH-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP15]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM10]] +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4 +// IR-PCH-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP16]], [[TMP13]] +// IR-PCH-NEXT: store i32 [[ADD12]], ptr [[ARRAYIDX11]], align 4 +// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH: omp.body.continue: // IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // IR-PCH: omp.inner.for.inc: -// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], 1 +// IR-PCH-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] // IR-PCH: omp.inner.for.end: // IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // IR-PCH: omp.loop.exit: -// IR-PCH-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-PCH-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// IR-PCH-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR-PCH-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// IR-PCH-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // IR-PCH: .omp.lastprivate.then: // IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4 -// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[J3]], align 4 -// IR-PCH-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4 +// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[J3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP22]], ptr [[TMP0]], align 4 // IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // IR-PCH: .omp.lastprivate.done: -// IR-PCH-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// IR-PCH-NEXT: store ptr [[SUM1]], ptr [[TMP21]], align 8 -// IR-PCH-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// IR-PCH-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// IR-PCH-NEXT: store ptr [[SUM1]], ptr [[TMP23]], align 8 +// IR-PCH-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// IR-PCH-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // IR-PCH-NEXT: ] // IR-PCH: .omp.reduction.case1: -// IR-PCH-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP25]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-PCH-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP27]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // IR-PCH: omp.arraycpy.body: // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 -// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-PCH-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// IR-PCH-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4 +// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// IR-PCH-NEXT: store i32 [[ADD15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP25]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// IR-PCH: omp.arraycpy.done10: -// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR-PCH: .omp.reduction.case2: -// IR-PCH-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP1]], [[TMP28]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] -// IR-PCH: omp.arraycpy.body12: -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 -// IR-PCH-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP29]] monotonic, align 4 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP28]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP27]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] // IR-PCH: omp.arraycpy.done18: -// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR-PCH: .omp.reduction.default: -// IR-PCH-NEXT: ret void -// -// -// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined -// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { -// IR-PCH-NEXT: entry: -// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 -// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[J5:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// IR-PCH-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8 -// IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 -// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8 -// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 -// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 -// IR-PCH-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// IR-PCH-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// IR-PCH-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-PCH-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 -// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 -// IR-PCH-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] -// IR-PCH: omp.arrayinit.body: -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// IR-PCH-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] -// IR-PCH: omp.arrayinit.done: -// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 -// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// IR-PCH: cond.true: -// IR-PCH-NEXT: br label [[COND_END:%.*]] -// IR-PCH: cond.false: -// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-NEXT: br label [[COND_END]] -// IR-PCH: cond.end: -// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// IR-PCH-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// IR-PCH: omp.inner.for.cond: -// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] -// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// IR-PCH-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// IR-PCH: omp.inner.for.body: -// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP12]], 10 -// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP14]], 10 -// IR-PCH-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 -// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], [[MUL8]] -// IR-PCH-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 -// IR-PCH-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] -// IR-PCH-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] -// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP17]] to i64 -// IR-PCH-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] -// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP18]], [[TMP15]] -// IR-PCH-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// IR-PCH: omp.body.continue: -// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// IR-PCH: omp.inner.for.inc: -// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], 1 -// IR-PCH-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] -// IR-PCH: omp.inner.for.end: -// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// IR-PCH: omp.loop.exit: -// IR-PCH-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) -// IR-PCH-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// IR-PCH-NEXT: store ptr [[SUM4]], ptr [[TMP22]], align 8 -// IR-PCH-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// IR-PCH-NEXT: ] -// IR-PCH: .omp.reduction.case1: -// IR-PCH-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP26]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR-PCH: omp.arraycpy.body: -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-PCH-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// IR-PCH-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP26]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] -// IR-PCH: omp.arraycpy.done19: -// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR-PCH: .omp.reduction.case2: -// IR-PCH-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP1]], [[TMP29]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] -// IR-PCH: omp.arraycpy.body21: -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] -// IR-PCH-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 -// IR-PCH-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP30]] monotonic, align 4 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP29]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] -// IR-PCH: omp.arraycpy.done27: +// IR-PCH-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[TMP1]], [[TMP30]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] +// IR-PCH: omp.arraycpy.body20: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 4 +// IR-PCH-NEXT: [[TMP32:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 [[TMP31]] monotonic, align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP30]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY20]] +// IR-PCH: omp.arraycpy.done26: // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR-PCH: .omp.reduction.default: -// IR-PCH-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-PCH-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// IR-PCH-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// IR-PCH: .omp.lastprivate.then: -// IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4 -// IR-PCH-NEXT: [[TMP34:%.*]] = load i32, ptr [[J3]], align 4 -// IR-PCH-NEXT: store i32 [[TMP34]], ptr [[TMP0]], align 4 -// IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// IR-PCH: .omp.lastprivate.done: -// IR-PCH-NEXT: ret void -// -// -// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func -// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { -// IR-PCH-NEXT: entry: -// IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// IR-PCH-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// IR-PCH-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// IR-PCH-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// IR-PCH-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR-PCH: omp.arraycpy.body: -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// IR-PCH-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] -// IR-PCH: omp.arraycpy.done2: // IR-PCH-NEXT: ret void // // // IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp.reduction.reduction_func -// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { // IR-PCH-NEXT: entry: // IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 diff --git a/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp index c0c04986f147e..901b6552a22b6 100644 --- a/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp @@ -157,7 +157,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 // CHECK1-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -177,7 +177,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined, ptr [[TMP0]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined, ptr [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -227,114 +227,39 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 -// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] -// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -393,7 +318,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 // CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -413,7 +338,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined, ptr [[TMP0]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined, ptr [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -463,108 +388,37 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -693,7 +547,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 // CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK9-NEXT: store i32 0, ptr [[TMP48]], align 4 -// CHECK9-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) // CHECK9-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 // CHECK9-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: @@ -725,7 +579,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined, ptr [[N_ADDR]], ptr [[M_ADDR]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined, ptr [[N_ADDR]], ptr [[M_ADDR]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) // CHECK9-NEXT: ret void // // @@ -820,178 +674,58 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined, i64 [[TMP21]], i64 [[TMP22]], ptr [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]]) -// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP23]], [[TMP24]] -// CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK9: omp.inner.for.end: -// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK9-NEXT: br label [[OMP_PRECOND_END]] -// CHECK9: omp.precond.end: -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK9: omp.precond.then: -// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK9: cond.true: -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: br label [[COND_END:%.*]] -// CHECK9: cond.false: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: br label [[COND_END]] -// CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] -// CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 // CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] // CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP23]], [[CONV18]] +// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] // CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] // CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 // CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 // CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] // CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP26]], [[CONV25]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 // CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] // CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 // CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP25]], [[MUL31]] +// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] // CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 // CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] // CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 // CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[TMP30:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP30]] -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP28]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64 // CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP32]], 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1 // CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1043,7 +777,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK9-NEXT: store i32 0, ptr [[TMP17]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) // CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 // CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: @@ -1059,7 +793,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined, ptr [[TMP0]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined, ptr [[TMP0]]) // CHECK9-NEXT: ret void // // @@ -1109,113 +843,38 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) -// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK9: omp.inner.for.end: -// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 -// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK9: cond.true: -// CHECK9-NEXT: br label [[COND_END:%.*]] -// CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: br label [[COND_END]] -// CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 -// CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] -// CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // @@ -1343,7 +1002,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP46]], align 4 // CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK11-NEXT: store i32 0, ptr [[TMP47]], align 4 -// CHECK11-NEXT: [[TMP48:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP48:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) // CHECK11-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 // CHECK11-NEXT: br i1 [[TMP49]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: @@ -1375,7 +1034,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined, ptr [[N_ADDR]], ptr [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined, ptr [[N_ADDR]], ptr [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) // CHECK11-NEXT: ret void // // @@ -1470,180 +1129,56 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined, i32 [[TMP22]], i32 [[TMP24]], ptr [[TMP0]], ptr [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]]) -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] -// CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK11-NEXT: br label [[OMP_PRECOND_END]] -// CHECK11: omp.precond.end: -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK11: omp.precond.then: -// CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK11-NEXT: store i64 [[CONV11]], ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[CONV12]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK11: cond.true: -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: br label [[COND_END:%.*]] -// CHECK11: cond.false: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: br label [[COND_END]] -// CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] -// CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: // CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] +// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 // CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP23]], [[CONV20]] -// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP26]], [[CONV27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP25]], [[MUL33]] -// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = mul nsw i32 [[TMP29]], [[TMP3]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[TMP30]] -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[J14]], align 4 -// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP31]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP3]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[TMP28]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP29]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP32]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP30]], 1 +// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1695,7 +1230,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 // CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK11-NEXT: store i32 0, ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) // CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 // CHECK11-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: @@ -1711,7 +1246,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined, ptr [[TMP0]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined, ptr [[TMP0]]) // CHECK11-NEXT: ret void // // @@ -1761,106 +1296,35 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 -// CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK11: cond.true: -// CHECK11-NEXT: br label [[COND_END:%.*]] -// CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END]] -// CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: // CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp index 303bce8c648c2..e955db129d1da 100644 --- a/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp +++ b/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp @@ -288,7 +288,7 @@ int main() { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -302,7 +302,7 @@ int main() { // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined) // CHECK1-NEXT: ret void // // @@ -365,149 +365,48 @@ int main() { // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done3: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK1: omp.inner.for.cond.cleanup: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // @@ -558,7 +457,7 @@ int main() { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -607,7 +506,7 @@ int main() { // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined) // CHECK1-NEXT: ret void // // @@ -673,149 +572,45 @@ int main() { // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done5: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK1: omp.inner.for.cond.cleanup: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done9: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done8: // CHECK1-NEXT: ret void // // @@ -1021,7 +816,7 @@ int main() { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -1035,7 +830,7 @@ int main() { // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined) // CHECK3-NEXT: ret void // // @@ -1098,138 +893,41 @@ int main() { // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 -// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done3: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK3: omp.inner.for.cond.cleanup: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]] // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1285,7 +983,7 @@ int main() { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -1334,7 +1032,7 @@ int main() { // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined) // CHECK3-NEXT: ret void // // @@ -1400,138 +1098,38 @@ int main() { // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 -// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done5: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK3: omp.inner.for.cond.cleanup: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1726,7 +1324,7 @@ int main() { // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75.omp_outlined) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75.omp_outlined) // CHECK9-NEXT: ret void // // @@ -1747,6 +1345,7 @@ int main() { // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -1778,112 +1377,34 @@ int main() { // CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK9: omp.inner.for.end: -// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75.omp_outlined.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK9: cond.true: -// CHECK9-NEXT: br label [[COND_END:%.*]] -// CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: br label [[COND_END]] -// CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4 // CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK9-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp index d80c003c01099..f0067cb93d9b0 100644 --- a/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp @@ -141,7 +141,7 @@ int main() { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 // CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -157,7 +157,7 @@ int main() { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined, ptr [[SIVAR_ADDR]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined, ptr [[SIVAR_ADDR]]) // CHECK1-NEXT: ret void // // @@ -208,165 +208,50 @@ int main() { // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// CHECK1-NEXT: ] -// CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK1: .omp.reduction.default: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func -// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 -// CHECK1-NEXT: ret void -// -// // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func -// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 @@ -435,7 +320,7 @@ int main() { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 // CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -450,7 +335,7 @@ int main() { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined, ptr [[T_VAR_ADDR]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined, ptr [[T_VAR_ADDR]]) // CHECK1-NEXT: ret void // // @@ -501,163 +386,48 @@ int main() { // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// CHECK1-NEXT: ] -// CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK1: .omp.reduction.default: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func -// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 -// CHECK1-NEXT: ret void -// -// // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func // CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: @@ -726,7 +496,7 @@ int main() { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 // CHECK3-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -742,7 +512,7 @@ int main() { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined, ptr [[SIVAR_ADDR]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined, ptr [[SIVAR_ADDR]]) // CHECK3-NEXT: ret void // // @@ -793,161 +563,50 @@ int main() { // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// CHECK3-NEXT: ] -// CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK3: .omp.reduction.default: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4 @@ -1016,7 +675,7 @@ int main() { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 // CHECK3-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -1031,7 +690,7 @@ int main() { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined, ptr [[T_VAR_ADDR]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined, ptr [[T_VAR_ADDR]]) // CHECK3-NEXT: ret void // // @@ -1082,159 +741,48 @@ int main() { // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// CHECK3-NEXT: ] -// CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK3: .omp.reduction.default: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func // CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: @@ -1270,7 +818,7 @@ int main() { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined, ptr [[SIVAR_ADDR]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined, ptr [[SIVAR_ADDR]]) // CHECK9-NEXT: ret void // // @@ -1288,6 +836,7 @@ int main() { // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 @@ -1321,169 +870,53 @@ int main() { // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) -// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK9: omp.inner.for.end: -// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// CHECK9-NEXT: ] -// CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK9: .omp.reduction.default: -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 -// CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 -// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK9: cond.true: -// CHECK9-NEXT: br label [[COND_END:%.*]] -// CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: br label [[COND_END]] -// CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP17]] monotonic, align 4 // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined.omp.reduction.reduction_func -// CHECK9-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 -// CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp.reduction.reduction_func -// CHECK9-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK9-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 From d347235bddbeba2a72d94ebe9d8f98dc675c3776 Mon Sep 17 00:00:00 2001 From: Christopher Di Bella Date: Wed, 10 Apr 2024 13:15:22 -0700 Subject: [PATCH 066/886] [Flang] responds to Clang Tidy feedback (#87847) Line 267: performance-unnecessary-copy-initialization Line 592: readability-container-size-empty --- clang/lib/Driver/ToolChains/Flang.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 9699443603d36..b00068c8098b9 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -264,7 +264,7 @@ static void addVSDefines(const ToolChain &TC, const ArgList &Args, CmdArgs.push_back(Args.MakeArgString("-D_MSC_FULL_VER=" + Twine(ver))); CmdArgs.push_back(Args.MakeArgString("-D_WIN32")); - llvm::Triple triple = TC.getTriple(); + const llvm::Triple &triple = TC.getTriple(); if (triple.isAArch64()) { CmdArgs.push_back("-D_M_ARM64=1"); } else if (triple.isX86() && triple.isArch32Bit()) { @@ -589,7 +589,7 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, if (!HonorINFs && !HonorNaNs && AssociativeMath && ReciprocalMath && ApproxFunc && !SignedZeros && - (FPContract == "fast" || FPContract == "")) { + (FPContract == "fast" || FPContract.empty())) { CmdArgs.push_back("-ffast-math"); return; } From 05093e243859a371f96ffa1c320a4b51579c3da7 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi <1802579+farzonl@users.noreply.github.com> Date: Wed, 10 Apr 2024 16:27:44 -0400 Subject: [PATCH 067/886] [Spirv][HLSL] Add OpAll lowering and float vec support (#87952) The main point of this change was to add support for HLSL's all intrinsic. In the process of doing that I found a few issues around creating an `OpConstantComposite` via `buildZerosVal`. First the current code didn't support floats so the process of adding `buildZerosValF` meant I needed a float version of `getOrCreateIntConstVector`. After doing so I renamed both versions to `getOrCreateConstVector`. That meant I needed to create a float type version of `getOrCreateIntCompositeOrNull`. Luckily the type information was low for this function so was able to split it out into a helpwe and rename `getOrCreateIntCompositeOrNull` to `getOrCreateCompositeOrNull` With the exception of type handling differences of the code and Null vs 0 Constant Op codes these functions should be identical. To handle scalar floats I could not use `buildConstantFP` like this PR did: https://github.com/llvm/llvm-project/commit/0a2aaab5aba46#diff-733a189c5a8c3211f3a04fd6e719952a3fa231eadd8a7f11e6ecf1e584d57411R1603 because that would create too many superfluous registers (that causes problems in the validator), I had to create a float version of `getOrCreateConstInt` which I called `getOrCreateConstFP`. similar problems with doing it like this: https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp#L1540. `buildZerosValF` also has a use of a function `getZeroFP`. This is because half, float, and double scalar values of 0 would collide in `SPIRVDuplicatesTracker CT` if you use `APFloat(0.0f)`. `getORCreateConstFP` needed its own version of `getOrCreateConstIntReg` which I called `getOrCreateConstFloatReg` The one difference in this function is `getOrCreateConstFloatReg` returns a bit width so we don't have to call `getScalarOrVectorBitWidth` twice ie when it is used again in `getOrCreateConstFP` for `OpConstantF` `addNumImm`. `getOrCreateConstFloatReg` needed an `assignFloatTypeToVReg` helper which called a `getOrCreateSPIRVFloatType` helper. There was no equivalent IntegerType::get for floats so I handled this with a switch statement on bit widths to get the right LLVM float type. Finally, there is the use of `bool ZeroAsNull = STI.isOpenCLEnv();` This is partly a cosmetic change. When Zeros are treated as nulls, we don't create `OpConstantComposite` vectors which is something we do in the DXCs SPIRV backend. The DXC SPIRV backend also does not use `OpConstantNull`. Finally, I needed a means to test the behavior of the OpConstantNull and `OpConstantComposite` changes and this was one way I could do that via the same tests. --- llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp | 218 +++++++++++++++--- llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h | 42 +++- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 98 +++++++- .../test/CodeGen/SPIRV/hlsl-intrinsics/all.ll | 187 +++++++++++++++ 4 files changed, 506 insertions(+), 39 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/all.ll diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 9592f3e81b402..70197e948c658 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -20,7 +20,12 @@ #include "SPIRVSubtarget.h" #include "SPIRVTargetMachine.h" #include "SPIRVUtils.h" +#include "llvm/ADT/APInt.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Type.h" #include "llvm/IR/TypedPointerType.h" +#include "llvm/Support/Casting.h" +#include using namespace llvm; SPIRVGlobalRegistry::SPIRVGlobalRegistry(unsigned PointerSize) @@ -35,6 +40,15 @@ SPIRVType *SPIRVGlobalRegistry::assignIntTypeToVReg(unsigned BitWidth, return SpirvType; } +SPIRVType * +SPIRVGlobalRegistry::assignFloatTypeToVReg(unsigned BitWidth, Register VReg, + MachineInstr &I, + const SPIRVInstrInfo &TII) { + SPIRVType *SpirvType = getOrCreateSPIRVFloatType(BitWidth, I, TII); + assignSPIRVTypeToVReg(SpirvType, VReg, *CurMF); + return SpirvType; +} + SPIRVType *SPIRVGlobalRegistry::assignVectTypeToVReg( SPIRVType *BaseType, unsigned NumElements, Register VReg, MachineInstr &I, const SPIRVInstrInfo &TII) { @@ -151,6 +165,8 @@ SPIRVGlobalRegistry::getOrCreateConstIntReg(uint64_t Val, SPIRVType *SpvType, Register Res = DT.find(CI, CurMF); if (!Res.isValid()) { unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32; + // TODO: handle cases where the type is not 32bit wide + // TODO: https://github.com/llvm/llvm-project/issues/88129 LLT LLTy = LLT::scalar(32); Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy); CurMF->getRegInfo().setRegClass(Res, &SPIRV::IDRegClass); @@ -164,9 +180,83 @@ SPIRVGlobalRegistry::getOrCreateConstIntReg(uint64_t Val, SPIRVType *SpvType, return std::make_tuple(Res, CI, NewInstr); } +std::tuple +SPIRVGlobalRegistry::getOrCreateConstFloatReg(APFloat Val, SPIRVType *SpvType, + MachineIRBuilder *MIRBuilder, + MachineInstr *I, + const SPIRVInstrInfo *TII) { + const Type *LLVMFloatTy; + LLVMContext &Ctx = CurMF->getFunction().getContext(); + unsigned BitWidth = 32; + if (SpvType) + LLVMFloatTy = getTypeForSPIRVType(SpvType); + else { + LLVMFloatTy = Type::getFloatTy(Ctx); + if (MIRBuilder) + SpvType = getOrCreateSPIRVType(LLVMFloatTy, *MIRBuilder); + } + bool NewInstr = false; + // Find a constant in DT or build a new one. + auto *const CI = ConstantFP::get(Ctx, Val); + Register Res = DT.find(CI, CurMF); + if (!Res.isValid()) { + if (SpvType) + BitWidth = getScalarOrVectorBitWidth(SpvType); + // TODO: handle cases where the type is not 32bit wide + // TODO: https://github.com/llvm/llvm-project/issues/88129 + LLT LLTy = LLT::scalar(32); + Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy); + CurMF->getRegInfo().setRegClass(Res, &SPIRV::IDRegClass); + if (MIRBuilder) + assignTypeToVReg(LLVMFloatTy, Res, *MIRBuilder); + else + assignFloatTypeToVReg(BitWidth, Res, *I, *TII); + DT.add(CI, CurMF, Res); + NewInstr = true; + } + return std::make_tuple(Res, CI, NewInstr, BitWidth); +} + +Register SPIRVGlobalRegistry::getOrCreateConstFP(APFloat Val, MachineInstr &I, + SPIRVType *SpvType, + const SPIRVInstrInfo &TII, + bool ZeroAsNull) { + assert(SpvType); + ConstantFP *CI; + Register Res; + bool New; + unsigned BitWidth; + std::tie(Res, CI, New, BitWidth) = + getOrCreateConstFloatReg(Val, SpvType, nullptr, &I, &TII); + // If we have found Res register which is defined by the passed G_CONSTANT + // machine instruction, a new constant instruction should be created. + if (!New && (!I.getOperand(0).isReg() || Res != I.getOperand(0).getReg())) + return Res; + MachineInstrBuilder MIB; + MachineBasicBlock &BB = *I.getParent(); + // In OpenCL OpConstantNull - Scalar floating point: +0.0 (all bits 0) + if (Val.isPosZero() && ZeroAsNull) { + MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull)) + .addDef(Res) + .addUse(getSPIRVTypeID(SpvType)); + } else { + MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantF)) + .addDef(Res) + .addUse(getSPIRVTypeID(SpvType)); + addNumImm( + APInt(BitWidth, CI->getValueAPF().bitcastToAPInt().getZExtValue()), + MIB); + } + const auto &ST = CurMF->getSubtarget(); + constrainSelectedInstRegOperands(*MIB, *ST.getInstrInfo(), + *ST.getRegisterInfo(), *ST.getRegBankInfo()); + return Res; +} + Register SPIRVGlobalRegistry::getOrCreateConstInt(uint64_t Val, MachineInstr &I, SPIRVType *SpvType, - const SPIRVInstrInfo &TII) { + const SPIRVInstrInfo &TII, + bool ZeroAsNull) { assert(SpvType); ConstantInt *CI; Register Res; @@ -179,7 +269,7 @@ Register SPIRVGlobalRegistry::getOrCreateConstInt(uint64_t Val, MachineInstr &I, return Res; MachineInstrBuilder MIB; MachineBasicBlock &BB = *I.getParent(); - if (Val) { + if (Val || !ZeroAsNull) { MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI)) .addDef(Res) .addUse(getSPIRVTypeID(SpvType)); @@ -270,21 +360,46 @@ Register SPIRVGlobalRegistry::buildConstantFP(APFloat Val, return Res; } -Register SPIRVGlobalRegistry::getOrCreateIntCompositeOrNull( - uint64_t Val, MachineInstr &I, SPIRVType *SpvType, +Register SPIRVGlobalRegistry::getOrCreateBaseRegister(Constant *Val, + MachineInstr &I, + SPIRVType *SpvType, + const SPIRVInstrInfo &TII, + unsigned BitWidth) { + SPIRVType *Type = SpvType; + if (SpvType->getOpcode() == SPIRV::OpTypeVector || + SpvType->getOpcode() == SPIRV::OpTypeArray) { + auto EleTypeReg = SpvType->getOperand(1).getReg(); + Type = getSPIRVTypeForVReg(EleTypeReg); + } + if (Type->getOpcode() == SPIRV::OpTypeFloat) { + SPIRVType *SpvBaseType = getOrCreateSPIRVFloatType(BitWidth, I, TII); + return getOrCreateConstFP(dyn_cast(Val)->getValue(), I, + SpvBaseType, TII); + } + assert(Type->getOpcode() == SPIRV::OpTypeInt); + SPIRVType *SpvBaseType = getOrCreateSPIRVIntegerType(BitWidth, I, TII); + return getOrCreateConstInt(Val->getUniqueInteger().getSExtValue(), I, + SpvBaseType, TII); +} + +Register SPIRVGlobalRegistry::getOrCreateCompositeOrNull( + Constant *Val, MachineInstr &I, SPIRVType *SpvType, const SPIRVInstrInfo &TII, Constant *CA, unsigned BitWidth, - unsigned ElemCnt) { + unsigned ElemCnt, bool ZeroAsNull) { // Find a constant vector in DT or build a new one. Register Res = DT.find(CA, CurMF); + // If no values are attached, the composite is null constant. + bool IsNull = Val->isNullValue() && ZeroAsNull; if (!Res.isValid()) { - SPIRVType *SpvBaseType = getOrCreateSPIRVIntegerType(BitWidth, I, TII); // SpvScalConst should be created before SpvVecConst to avoid undefined ID // error on validation. // TODO: can moved below once sorting of types/consts/defs is implemented. Register SpvScalConst; - if (Val) - SpvScalConst = getOrCreateConstInt(Val, I, SpvBaseType, TII); - // TODO: maybe use bitwidth of base type. + if (!IsNull) + SpvScalConst = getOrCreateBaseRegister(Val, I, SpvType, TII, BitWidth); + + // TODO: handle cases where the type is not 32bit wide + // TODO: https://github.com/llvm/llvm-project/issues/88129 LLT LLTy = LLT::scalar(32); Register SpvVecConst = CurMF->getRegInfo().createGenericVirtualRegister(LLTy); @@ -293,7 +408,7 @@ Register SPIRVGlobalRegistry::getOrCreateIntCompositeOrNull( DT.add(CA, CurMF, SpvVecConst); MachineInstrBuilder MIB; MachineBasicBlock &BB = *I.getParent(); - if (Val) { + if (!IsNull) { MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantComposite)) .addDef(SpvVecConst) .addUse(getSPIRVTypeID(SpvType)); @@ -313,20 +428,42 @@ Register SPIRVGlobalRegistry::getOrCreateIntCompositeOrNull( return Res; } -Register -SPIRVGlobalRegistry::getOrCreateConsIntVector(uint64_t Val, MachineInstr &I, - SPIRVType *SpvType, - const SPIRVInstrInfo &TII) { +Register SPIRVGlobalRegistry::getOrCreateConstVector(uint64_t Val, + MachineInstr &I, + SPIRVType *SpvType, + const SPIRVInstrInfo &TII, + bool ZeroAsNull) { const Type *LLVMTy = getTypeForSPIRVType(SpvType); assert(LLVMTy->isVectorTy()); const FixedVectorType *LLVMVecTy = cast(LLVMTy); Type *LLVMBaseTy = LLVMVecTy->getElementType(); - const auto ConstInt = ConstantInt::get(LLVMBaseTy, Val); - auto ConstVec = - ConstantVector::getSplat(LLVMVecTy->getElementCount(), ConstInt); + assert(LLVMBaseTy->isIntegerTy()); + auto *ConstVal = ConstantInt::get(LLVMBaseTy, Val); + auto *ConstVec = + ConstantVector::getSplat(LLVMVecTy->getElementCount(), ConstVal); unsigned BW = getScalarOrVectorBitWidth(SpvType); - return getOrCreateIntCompositeOrNull(Val, I, SpvType, TII, ConstVec, BW, - SpvType->getOperand(2).getImm()); + return getOrCreateCompositeOrNull(ConstVal, I, SpvType, TII, ConstVec, BW, + SpvType->getOperand(2).getImm(), + ZeroAsNull); +} + +Register SPIRVGlobalRegistry::getOrCreateConstVector(APFloat Val, + MachineInstr &I, + SPIRVType *SpvType, + const SPIRVInstrInfo &TII, + bool ZeroAsNull) { + const Type *LLVMTy = getTypeForSPIRVType(SpvType); + assert(LLVMTy->isVectorTy()); + const FixedVectorType *LLVMVecTy = cast(LLVMTy); + Type *LLVMBaseTy = LLVMVecTy->getElementType(); + assert(LLVMBaseTy->isFloatingPointTy()); + auto *ConstVal = ConstantFP::get(LLVMBaseTy, Val); + auto *ConstVec = + ConstantVector::getSplat(LLVMVecTy->getElementCount(), ConstVal); + unsigned BW = getScalarOrVectorBitWidth(SpvType); + return getOrCreateCompositeOrNull(ConstVal, I, SpvType, TII, ConstVec, BW, + SpvType->getOperand(2).getImm(), + ZeroAsNull); } Register @@ -337,13 +474,13 @@ SPIRVGlobalRegistry::getOrCreateConsIntArray(uint64_t Val, MachineInstr &I, assert(LLVMTy->isArrayTy()); const ArrayType *LLVMArrTy = cast(LLVMTy); Type *LLVMBaseTy = LLVMArrTy->getElementType(); - const auto ConstInt = ConstantInt::get(LLVMBaseTy, Val); - auto ConstArr = + auto *ConstInt = ConstantInt::get(LLVMBaseTy, Val); + auto *ConstArr = ConstantArray::get(const_cast(LLVMArrTy), {ConstInt}); SPIRVType *SpvBaseTy = getSPIRVTypeForVReg(SpvType->getOperand(1).getReg()); unsigned BW = getScalarOrVectorBitWidth(SpvBaseTy); - return getOrCreateIntCompositeOrNull(Val, I, SpvType, TII, ConstArr, BW, - LLVMArrTy->getNumElements()); + return getOrCreateCompositeOrNull(ConstInt, I, SpvType, TII, ConstArr, BW, + LLVMArrTy->getNumElements()); } Register SPIRVGlobalRegistry::getOrCreateIntCompositeOrNull( @@ -1093,14 +1230,16 @@ SPIRVType *SPIRVGlobalRegistry::finishCreatingSPIRVType(const Type *LLVMTy, return SpirvType; } -SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType( - unsigned BitWidth, MachineInstr &I, const SPIRVInstrInfo &TII) { - Type *LLVMTy = IntegerType::get(CurMF->getFunction().getContext(), BitWidth); +SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(unsigned BitWidth, + MachineInstr &I, + const SPIRVInstrInfo &TII, + unsigned SPIRVOPcode, + Type *LLVMTy) { Register Reg = DT.find(LLVMTy, CurMF); if (Reg.isValid()) return getSPIRVTypeForVReg(Reg); MachineBasicBlock &BB = *I.getParent(); - auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypeInt)) + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRVOPcode)) .addDef(createTypeVReg(CurMF->getRegInfo())) .addImm(BitWidth) .addImm(0); @@ -1108,6 +1247,31 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType( return finishCreatingSPIRVType(LLVMTy, MIB); } +SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType( + unsigned BitWidth, MachineInstr &I, const SPIRVInstrInfo &TII) { + Type *LLVMTy = IntegerType::get(CurMF->getFunction().getContext(), BitWidth); + return getOrCreateSPIRVType(BitWidth, I, TII, SPIRV::OpTypeInt, LLVMTy); +} +SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVFloatType( + unsigned BitWidth, MachineInstr &I, const SPIRVInstrInfo &TII) { + LLVMContext &Ctx = CurMF->getFunction().getContext(); + Type *LLVMTy; + switch (BitWidth) { + case 16: + LLVMTy = Type::getHalfTy(Ctx); + break; + case 32: + LLVMTy = Type::getFloatTy(Ctx); + break; + case 64: + LLVMTy = Type::getDoubleTy(Ctx); + break; + default: + llvm_unreachable("Bit width is of unexpected size."); + } + return getOrCreateSPIRVType(BitWidth, I, TII, SPIRV::OpTypeFloat, LLVMTy); +} + SPIRVType * SPIRVGlobalRegistry::getOrCreateSPIRVBoolType(MachineIRBuilder &MIRBuilder) { return getOrCreateSPIRVType( diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h index 37f575e884ef4..2e3e69456ac26 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h @@ -20,6 +20,7 @@ #include "SPIRVDuplicatesTracker.h" #include "SPIRVInstrInfo.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/IR/Constant.h" namespace llvm { using SPIRVType = const MachineInstr; @@ -234,6 +235,8 @@ class SPIRVGlobalRegistry { bool EmitIR = true); SPIRVType *assignIntTypeToVReg(unsigned BitWidth, Register VReg, MachineInstr &I, const SPIRVInstrInfo &TII); + SPIRVType *assignFloatTypeToVReg(unsigned BitWidth, Register VReg, + MachineInstr &I, const SPIRVInstrInfo &TII); SPIRVType *assignVectTypeToVReg(SPIRVType *BaseType, unsigned NumElements, Register VReg, MachineInstr &I, const SPIRVInstrInfo &TII); @@ -372,12 +375,20 @@ class SPIRVGlobalRegistry { std::tuple getOrCreateConstIntReg( uint64_t Val, SPIRVType *SpvType, MachineIRBuilder *MIRBuilder, MachineInstr *I = nullptr, const SPIRVInstrInfo *TII = nullptr); + std::tuple getOrCreateConstFloatReg( + APFloat Val, SPIRVType *SpvType, MachineIRBuilder *MIRBuilder, + MachineInstr *I = nullptr, const SPIRVInstrInfo *TII = nullptr); SPIRVType *finishCreatingSPIRVType(const Type *LLVMTy, SPIRVType *SpirvType); - Register getOrCreateIntCompositeOrNull(uint64_t Val, MachineInstr &I, - SPIRVType *SpvType, - const SPIRVInstrInfo &TII, - Constant *CA, unsigned BitWidth, - unsigned ElemCnt); + Register getOrCreateBaseRegister(Constant *Val, MachineInstr &I, + SPIRVType *SpvType, + const SPIRVInstrInfo &TII, + unsigned BitWidth); + Register getOrCreateCompositeOrNull(Constant *Val, MachineInstr &I, + SPIRVType *SpvType, + const SPIRVInstrInfo &TII, Constant *CA, + unsigned BitWidth, unsigned ElemCnt, + bool ZeroAsNull = true); + Register getOrCreateIntCompositeOrNull(uint64_t Val, MachineIRBuilder &MIRBuilder, SPIRVType *SpvType, bool EmitIR, @@ -388,12 +399,20 @@ class SPIRVGlobalRegistry { Register buildConstantInt(uint64_t Val, MachineIRBuilder &MIRBuilder, SPIRVType *SpvType = nullptr, bool EmitIR = true); Register getOrCreateConstInt(uint64_t Val, MachineInstr &I, - SPIRVType *SpvType, const SPIRVInstrInfo &TII); + SPIRVType *SpvType, const SPIRVInstrInfo &TII, + bool ZeroAsNull = true); + Register getOrCreateConstFP(APFloat Val, MachineInstr &I, SPIRVType *SpvType, + const SPIRVInstrInfo &TII, + bool ZeroAsNull = true); Register buildConstantFP(APFloat Val, MachineIRBuilder &MIRBuilder, SPIRVType *SpvType = nullptr); - Register getOrCreateConsIntVector(uint64_t Val, MachineInstr &I, - SPIRVType *SpvType, - const SPIRVInstrInfo &TII); + + Register getOrCreateConstVector(uint64_t Val, MachineInstr &I, + SPIRVType *SpvType, const SPIRVInstrInfo &TII, + bool ZeroAsNull = true); + Register getOrCreateConstVector(APFloat Val, MachineInstr &I, + SPIRVType *SpvType, const SPIRVInstrInfo &TII, + bool ZeroAsNull = true); Register getOrCreateConsIntArray(uint64_t Val, MachineInstr &I, SPIRVType *SpvType, const SPIRVInstrInfo &TII); @@ -423,6 +442,11 @@ class SPIRVGlobalRegistry { MachineIRBuilder &MIRBuilder); SPIRVType *getOrCreateSPIRVIntegerType(unsigned BitWidth, MachineInstr &I, const SPIRVInstrInfo &TII); + SPIRVType *getOrCreateSPIRVType(unsigned BitWidth, MachineInstr &I, + const SPIRVInstrInfo &TII, + unsigned SPIRVOPcode, Type *LLVMTy); + SPIRVType *getOrCreateSPIRVFloatType(unsigned BitWidth, MachineInstr &I, + const SPIRVInstrInfo &TII); SPIRVType *getOrCreateSPIRVBoolType(MachineIRBuilder &MIRBuilder); SPIRVType *getOrCreateSPIRVBoolType(MachineInstr &I, const SPIRVInstrInfo &TII); diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 45a70da7f8690..c1c0fc4b7dd48 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/Support/Debug.h" @@ -144,6 +145,9 @@ class SPIRVInstructionSelector : public InstructionSelector { bool selectAddrSpaceCast(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool selectAll(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + bool selectBitreverse(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; @@ -229,6 +233,7 @@ class SPIRVInstructionSelector : public InstructionSelector { const SPIRVType *ResType = nullptr) const; Register buildZerosVal(const SPIRVType *ResType, MachineInstr &I) const; + Register buildZerosValF(const SPIRVType *ResType, MachineInstr &I) const; Register buildOnesVal(bool AllOnes, const SPIRVType *ResType, MachineInstr &I) const; @@ -1155,6 +1160,65 @@ static unsigned getBoolCmpOpcode(unsigned PredNum) { } } +bool SPIRVInstructionSelector::selectAll(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + assert(I.getNumOperands() == 3); + assert(I.getOperand(2).isReg()); + MachineBasicBlock &BB = *I.getParent(); + Register InputRegister = I.getOperand(2).getReg(); + SPIRVType *InputType = GR.getSPIRVTypeForVReg(InputRegister); + + if (!InputType) + report_fatal_error("Input Type could not be determined."); + + bool IsBoolTy = GR.isScalarOrVectorOfType(InputRegister, SPIRV::OpTypeBool); + bool IsVectorTy = InputType->getOpcode() == SPIRV::OpTypeVector; + if (IsBoolTy && !IsVectorTy) { + assert(ResVReg == I.getOperand(0).getReg()); + return BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(TargetOpcode::COPY)) + .addDef(ResVReg) + .addUse(InputRegister) + .constrainAllUses(TII, TRI, RBI); + } + + bool IsFloatTy = GR.isScalarOrVectorOfType(InputRegister, SPIRV::OpTypeFloat); + unsigned SpirvNotEqualId = + IsFloatTy ? SPIRV::OpFOrdNotEqual : SPIRV::OpINotEqual; + SPIRVType *SpvBoolScalarTy = GR.getOrCreateSPIRVBoolType(I, TII); + SPIRVType *SpvBoolTy = SpvBoolScalarTy; + Register NotEqualReg = ResVReg; + + if (IsVectorTy) { + NotEqualReg = IsBoolTy ? InputRegister + : MRI->createVirtualRegister(&SPIRV::IDRegClass); + const unsigned NumElts = InputType->getOperand(2).getImm(); + SpvBoolTy = GR.getOrCreateSPIRVVectorType(SpvBoolTy, NumElts, I, TII); + } + + if (!IsBoolTy) { + Register ConstZeroReg = + IsFloatTy ? buildZerosValF(InputType, I) : buildZerosVal(InputType, I); + + BuildMI(BB, I, I.getDebugLoc(), TII.get(SpirvNotEqualId)) + .addDef(NotEqualReg) + .addUse(GR.getSPIRVTypeID(SpvBoolTy)) + .addUse(InputRegister) + .addUse(ConstZeroReg) + .constrainAllUses(TII, TRI, RBI); + } + + if (!IsVectorTy) + return true; + + return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpAll)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(SpvBoolScalarTy)) + .addUse(NotEqualReg) + .constrainAllUses(TII, TRI, RBI); +} + bool SPIRVInstructionSelector::selectBitreverse(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { @@ -1391,9 +1455,35 @@ bool SPIRVInstructionSelector::selectFCmp(Register ResVReg, Register SPIRVInstructionSelector::buildZerosVal(const SPIRVType *ResType, MachineInstr &I) const { + // OpenCL uses nulls for Zero. In HLSL we don't use null constants. + bool ZeroAsNull = STI.isOpenCLEnv(); + if (ResType->getOpcode() == SPIRV::OpTypeVector) + return GR.getOrCreateConstVector(0UL, I, ResType, TII, ZeroAsNull); + return GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull); +} + +static APFloat getZeroFP(const Type *LLVMFloatTy) { + if (!LLVMFloatTy) + return APFloat::getZero(APFloat::IEEEsingle()); + switch (LLVMFloatTy->getScalarType()->getTypeID()) { + case Type::HalfTyID: + return APFloat::getZero(APFloat::IEEEhalf()); + default: + case Type::FloatTyID: + return APFloat::getZero(APFloat::IEEEsingle()); + case Type::DoubleTyID: + return APFloat::getZero(APFloat::IEEEdouble()); + } +} + +Register SPIRVInstructionSelector::buildZerosValF(const SPIRVType *ResType, + MachineInstr &I) const { + // OpenCL uses nulls for Zero. In HLSL we don't use null constants. + bool ZeroAsNull = STI.isOpenCLEnv(); + APFloat VZero = getZeroFP(GR.getTypeForSPIRVType(ResType)); if (ResType->getOpcode() == SPIRV::OpTypeVector) - return GR.getOrCreateConsIntVector(0, I, ResType, TII); - return GR.getOrCreateConstInt(0, I, ResType, TII); + return GR.getOrCreateConstVector(VZero, I, ResType, TII, ZeroAsNull); + return GR.getOrCreateConstFP(VZero, I, ResType, TII, ZeroAsNull); } Register SPIRVInstructionSelector::buildOnesVal(bool AllOnes, @@ -1403,7 +1493,7 @@ Register SPIRVInstructionSelector::buildOnesVal(bool AllOnes, APInt One = AllOnes ? APInt::getAllOnes(BitWidth) : APInt::getOneBitSet(BitWidth, 0); if (ResType->getOpcode() == SPIRV::OpTypeVector) - return GR.getOrCreateConsIntVector(One.getZExtValue(), I, ResType, TII); + return GR.getOrCreateConstVector(One.getZExtValue(), I, ResType, TII); return GR.getOrCreateConstInt(One.getZExtValue(), I, ResType, TII); } @@ -1785,6 +1875,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, break; case Intrinsic::spv_thread_id: return selectSpvThreadId(ResVReg, ResType, I); + case Intrinsic::spv_all: + return selectAll(ResVReg, ResType, I); case Intrinsic::spv_lifetime_start: case Intrinsic::spv_lifetime_end: { unsigned Op = IID == Intrinsic::spv_lifetime_start ? SPIRV::OpLifetimeStart diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/all.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/all.ll new file mode 100644 index 0000000000000..ef8d463cbd815 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/all.ll @@ -0,0 +1,187 @@ +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-HLSL +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-OCL +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; Make sure spirv operation function calls for all are generated. + +; CHECK-HLSL-DAG: OpMemoryModel Logical GLSL450 +; CHECK-OCL-DAG: OpMemoryModel Physical32 OpenCL +; CHECK-DAG: OpName %[[#all_bool_arg:]] "a" +; CHECK-DAG: %[[#int_64:]] = OpTypeInt 64 0 +; CHECK-DAG: %[[#bool:]] = OpTypeBool +; CHECK-DAG: %[[#int_32:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#int_16:]] = OpTypeInt 16 0 +; CHECK-DAG: %[[#float_64:]] = OpTypeFloat 64 +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#vec4_bool:]] = OpTypeVector %[[#bool]] 4 +; CHECK-DAG: %[[#vec4_16:]] = OpTypeVector %[[#int_16]] 4 +; CHECK-DAG: %[[#vec4_32:]] = OpTypeVector %[[#int_32]] 4 +; CHECK-DAG: %[[#vec4_64:]] = OpTypeVector %[[#int_64]] 4 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 +; CHECK-DAG: %[[#vec4_float_64:]] = OpTypeVector %[[#float_64]] 4 + +; CHECK-HLSL-DAG: %[[#const_i64_0:]] = OpConstant %[[#int_64]] 0 +; CHECK-HLSL-DAG: %[[#const_i32_0:]] = OpConstant %[[#int_32]] 0 +; CHECK-HLSL-DAG: %[[#const_i16_0:]] = OpConstant %[[#int_16]] 0 +; CHECK-HLSL-DAG: %[[#const_f64_0:]] = OpConstant %[[#float_64]] 0 +; CHECK-HLSL-DAG: %[[#const_f32_0:]] = OpConstant %[[#float_32:]] 0 +; CHECK-HLSL-DAG: %[[#const_f16_0:]] = OpConstant %[[#float_16:]] 0 +; CHECK-HLSL-DAG: %[[#vec4_const_zeros_i16:]] = OpConstantComposite %[[#vec4_16:]] %[[#const_i16_0:]] %[[#const_i16_0:]] %[[#const_i16_0:]] %[[#const_i16_0:]] +; CHECK-HLSL-DAG: %[[#vec4_const_zeros_i32:]] = OpConstantComposite %[[#vec4_32:]] %[[#const_i32_0:]] %[[#const_i32_0:]] %[[#const_i32_0:]] %[[#const_i32_0:]] +; CHECK-HLSL-DAG: %[[#vec4_const_zeros_i64:]] = OpConstantComposite %[[#vec4_64:]] %[[#const_i64_0:]] %[[#const_i64_0:]] %[[#const_i64_0:]] %[[#const_i64_0:]] +; CHECK-HLSL-DAG: %[[#vec4_const_zeros_f16:]] = OpConstantComposite %[[#vec4_float_16:]] %[[#const_f16_0:]] %[[#const_f16_0:]] %[[#const_f16_0:]] %[[#const_f16_0:]] +; CHECK-HLSL-DAG: %[[#vec4_const_zeros_f32:]] = OpConstantComposite %[[#vec4_float_32:]] %[[#const_f32_0:]] %[[#const_f32_0:]] %[[#const_f32_0:]] %[[#const_f32_0:]] +; CHECK-HLSL-DAG: %[[#vec4_const_zeros_f64:]] = OpConstantComposite %[[#vec4_float_64:]] %[[#const_f64_0:]] %[[#const_f64_0:]] %[[#const_f64_0:]] %[[#const_f64_0:]] + +; CHECK-OCL-DAG: %[[#const_i64_0:]] = OpConstantNull %[[#int_64]] +; CHECK-OCL-DAG: %[[#const_i32_0:]] = OpConstantNull %[[#int_32]] +; CHECK-OCL-DAG: %[[#const_i16_0:]] = OpConstantNull %[[#int_16]] +; CHECK-OCL-DAG: %[[#const_f64_0:]] = OpConstantNull %[[#float_64]] +; CHECK-OCL-DAG: %[[#const_f32_0:]] = OpConstantNull %[[#float_32:]] +; CHECK-OCL-DAG: %[[#const_f16_0:]] = OpConstantNull %[[#float_16:]] +; CHECK-OCL-DAG: %[[#vec4_const_zeros_i16:]] = OpConstantNull %[[#vec4_16:]] +; CHECK-OCL-DAG: %[[#vec4_const_zeros_i32:]] = OpConstantNull %[[#vec4_32:]] +; CHECK-OCL-DAG: %[[#vec4_const_zeros_i64:]] = OpConstantNull %[[#vec4_64:]] +; CHECK-OCL-DAG: %[[#vec4_const_zeros_f16:]] = OpConstantNull %[[#vec4_float_16:]] +; CHECK-OCL-DAG: %[[#vec4_const_zeros_f32:]] = OpConstantNull %[[#vec4_float_32:]] +; CHECK-OCL-DAG: %[[#vec4_const_zeros_f64:]] = OpConstantNull %[[#vec4_float_64:]] + +define noundef i1 @all_int64_t(i64 noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpINotEqual %[[#bool:]] %[[#arg0:]] %[[#const_i64_0:]] + %hlsl.all = call i1 @llvm.spv.all.i64(i64 %p0) + ret i1 %hlsl.all +} + + +define noundef i1 @all_int(i32 noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpINotEqual %[[#bool:]] %[[#arg0:]] %[[#const_i32_0:]] + %hlsl.all = call i1 @llvm.spv.all.i32(i32 %p0) + ret i1 %hlsl.all +} + + +define noundef i1 @all_int16_t(i16 noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpINotEqual %[[#bool:]] %[[#arg0:]] %[[#const_i16_0:]] + %hlsl.all = call i1 @llvm.spv.all.i16(i16 %p0) + ret i1 %hlsl.all +} + +define noundef i1 @all_double(double noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpFOrdNotEqual %[[#bool:]] %[[#arg0:]] %[[#const_f64_0:]] + %hlsl.all = call i1 @llvm.spv.all.f64(double %p0) + ret i1 %hlsl.all +} + + +define noundef i1 @all_float(float noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpFOrdNotEqual %[[#bool:]] %[[#arg0:]] %[[#const_f32_0:]] + %hlsl.all = call i1 @llvm.spv.all.f32(float %p0) + ret i1 %hlsl.all +} + + +define noundef i1 @all_half(half noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpFOrdNotEqual %[[#bool:]] %[[#arg0:]] %[[#const_f16_0:]] + %hlsl.all = call i1 @llvm.spv.all.f16(half %p0) + ret i1 %hlsl.all +} + + +define noundef i1 @all_bool4(<4 x i1> noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpAll %[[#vec4_bool:]] %[[#arg0:]] + %hlsl.all = call i1 @llvm.spv.all.v4i1(<4 x i1> %p0) + ret i1 %hlsl.all +} + +define noundef i1 @all_short4(<4 x i16> noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#shortVecNotEq:]] = OpINotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_i16:]] + ; CHECK: %[[#]] = OpAll %[[#bool:]] %[[#shortVecNotEq:]] + %hlsl.all = call i1 @llvm.spv.all.v4i16(<4 x i16> %p0) + ret i1 %hlsl.all +} + +define noundef i1 @all_int4(<4 x i32> noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#i32VecNotEq:]] = OpINotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_i32:]] + ; CHECK: %[[#]] = OpAll %[[#bool:]] %[[#i32VecNotEq:]] + %hlsl.all = call i1 @llvm.spv.all.v4i32(<4 x i32> %p0) + ret i1 %hlsl.all +} + +define noundef i1 @all_int64_t4(<4 x i64> noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#i64VecNotEq:]] = OpINotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_i64:]] + ; CHECK: %[[#]] = OpAll %[[#bool:]] %[[#i64VecNotEq]] + %hlsl.all = call i1 @llvm.spv.all.v4i64(<4 x i64> %p0) + ret i1 %hlsl.all +} + +define noundef i1 @all_half4(<4 x half> noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#f16VecNotEq:]] = OpFOrdNotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_f16:]] + ; CHECK: %[[#]] = OpAll %[[#bool]] %[[#f16VecNotEq:]] + %hlsl.all = call i1 @llvm.spv.all.v4f16(<4 x half> %p0) + ret i1 %hlsl.all +} + +define noundef i1 @all_float4(<4 x float> noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#f32VecNotEq:]] = OpFOrdNotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_f32:]] + ; CHECK: %[[#]] = OpAll %[[#bool:]] %[[#f32VecNotEq:]] + %hlsl.all = call i1 @llvm.spv.all.v4f32(<4 x float> %p0) + ret i1 %hlsl.all +} + +define noundef i1 @all_double4(<4 x double> noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#f64VecNotEq:]] = OpFOrdNotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_f64:]] + ; CHECK: %[[#]] = OpAll %[[#bool:]] %[[#f64VecNotEq:]] + %hlsl.all = call i1 @llvm.spv.all.v4f64(<4 x double> %p0) + ret i1 %hlsl.all +} + +define noundef i1 @all_bool(i1 noundef %a) { +entry: + ; CHECK: %[[#all_bool_arg:]] = OpFunctionParameter %[[#bool:]] + ; CHECK: OpReturnValue %[[#all_bool_arg:]] + %hlsl.all = call i1 @llvm.spv.all.i1(i1 %a) + ret i1 %hlsl.all +} + +declare i1 @llvm.spv.all.v4f16(<4 x half>) +declare i1 @llvm.spv.all.v4f32(<4 x float>) +declare i1 @llvm.spv.all.v4f64(<4 x double>) +declare i1 @llvm.spv.all.v4i1(<4 x i1>) +declare i1 @llvm.spv.all.v4i16(<4 x i16>) +declare i1 @llvm.spv.all.v4i32(<4 x i32>) +declare i1 @llvm.spv.all.v4i64(<4 x i64>) +declare i1 @llvm.spv.all.i1(i1) +declare i1 @llvm.spv.all.i16(i16) +declare i1 @llvm.spv.all.i32(i32) +declare i1 @llvm.spv.all.i64(i64) +declare i1 @llvm.spv.all.f16(half) +declare i1 @llvm.spv.all.f32(float) +declare i1 @llvm.spv.all.f64(double) From c258f573981336cd9f87f89e59c6c2117e5d44ec Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 10 Apr 2024 13:42:51 -0700 Subject: [PATCH 068/886] [ELF] Move createSyntheticSections from Writer.cpp to SyntheticSections.cpp. NFC SyntheticSections.cpp is more appropriate. This change enables elimination of many explicit template instantiations. Due to `make>(*strtab)` in Arch/ARM.cpp, we do not remove explicit template instantiations for SymbolTableSection. --- lld/ELF/SyntheticSections.cpp | 367 +++++++++++++++++++++++++++++----- lld/ELF/SyntheticSections.h | 4 + lld/ELF/Writer.cpp | 292 --------------------------- lld/ELF/Writer.h | 3 - 4 files changed, 317 insertions(+), 349 deletions(-) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index d8791e83dc9e5..0d7f393a9f3f4 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -3870,6 +3870,27 @@ void InStruct::reset() { symTabShndx.reset(); } +static bool needsInterpSection() { + return !config->relocatable && !config->shared && + !config->dynamicLinker.empty() && script->needsInterpSection(); +} + +bool elf::hasMemtag() { + return config->emachine == EM_AARCH64 && + config->androidMemtagMode != ELF::NT_MEMTAG_LEVEL_NONE; +} + +// Fully static executables don't support MTE globals at this point in time, as +// we currently rely on: +// - A dynamic loader to process relocations, and +// - Dynamic entries. +// This restriction could be removed in future by re-using some of the ideas +// that ifuncs use in fully static executables. +bool elf::canHaveMemtagGlobals() { + return hasMemtag() && + (config->relocatable || config->shared || needsInterpSection()); +} + constexpr char kMemtagAndroidNoteName[] = "Android"; void MemtagAndroidNote::writeTo(uint8_t *buf) { static_assert( @@ -3985,31 +4006,304 @@ size_t MemtagGlobalDescriptors::getSize() const { return createMemtagGlobalDescriptors(symbols); } +static OutputSection *findSection(StringRef name) { + for (SectionCommand *cmd : script->sectionCommands) + if (auto *osd = dyn_cast(cmd)) + if (osd->osec.name == name) + return &osd->osec; + return nullptr; +} + +static Defined *addOptionalRegular(StringRef name, SectionBase *sec, + uint64_t val, uint8_t stOther = STV_HIDDEN) { + Symbol *s = symtab.find(name); + if (!s || s->isDefined() || s->isCommon()) + return nullptr; + + s->resolve(Defined{ctx.internalFile, StringRef(), STB_GLOBAL, stOther, + STT_NOTYPE, val, + /*size=*/0, sec}); + s->isUsedInRegularObj = true; + return cast(s); +} + +template void elf::createSyntheticSections() { + // Initialize all pointers with NULL. This is needed because + // you can call lld::elf::main more than once as a library. + Out::tlsPhdr = nullptr; + Out::preinitArray = nullptr; + Out::initArray = nullptr; + Out::finiArray = nullptr; + + // Add the .interp section first because it is not a SyntheticSection. + // The removeUnusedSyntheticSections() function relies on the + // SyntheticSections coming last. + if (needsInterpSection()) { + for (size_t i = 1; i <= partitions.size(); ++i) { + InputSection *sec = createInterpSection(); + sec->partition = i; + ctx.inputSections.push_back(sec); + } + } + + auto add = [](SyntheticSection &sec) { ctx.inputSections.push_back(&sec); }; + + in.shStrTab = std::make_unique(".shstrtab", false); + + Out::programHeaders = make("", 0, SHF_ALLOC); + Out::programHeaders->addralign = config->wordsize; + + if (config->strip != StripPolicy::All) { + in.strTab = std::make_unique(".strtab", false); + in.symTab = std::make_unique>(*in.strTab); + in.symTabShndx = std::make_unique(); + } + + in.bss = std::make_unique(".bss", 0, 1); + add(*in.bss); + + // If there is a SECTIONS command and a .data.rel.ro section name use name + // .data.rel.ro.bss so that we match in the .data.rel.ro output section. + // This makes sure our relro is contiguous. + bool hasDataRelRo = script->hasSectionsCommand && findSection(".data.rel.ro"); + in.bssRelRo = std::make_unique( + hasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1); + add(*in.bssRelRo); + + // Add MIPS-specific sections. + if (config->emachine == EM_MIPS) { + if (!config->shared && config->hasDynSymTab) { + in.mipsRldMap = std::make_unique(); + add(*in.mipsRldMap); + } + if ((in.mipsAbiFlags = MipsAbiFlagsSection::create())) + add(*in.mipsAbiFlags); + if ((in.mipsOptions = MipsOptionsSection::create())) + add(*in.mipsOptions); + if ((in.mipsReginfo = MipsReginfoSection::create())) + add(*in.mipsReginfo); + } + + StringRef relaDynName = config->isRela ? ".rela.dyn" : ".rel.dyn"; + + const unsigned threadCount = config->threadCount; + for (Partition &part : partitions) { + auto add = [&](SyntheticSection &sec) { + sec.partition = part.getNumber(); + ctx.inputSections.push_back(&sec); + }; + + if (!part.name.empty()) { + part.elfHeader = std::make_unique>(); + part.elfHeader->name = part.name; + add(*part.elfHeader); + + part.programHeaders = + std::make_unique>(); + add(*part.programHeaders); + } + + if (config->buildId != BuildIdKind::None) { + part.buildId = std::make_unique(); + add(*part.buildId); + } + + part.dynStrTab = std::make_unique(".dynstr", true); + part.dynSymTab = + std::make_unique>(*part.dynStrTab); + part.dynamic = std::make_unique>(); + + if (hasMemtag()) { + part.memtagAndroidNote = std::make_unique(); + add(*part.memtagAndroidNote); + if (canHaveMemtagGlobals()) { + part.memtagGlobalDescriptors = + std::make_unique(); + add(*part.memtagGlobalDescriptors); + } + } + + if (config->androidPackDynRelocs) + part.relaDyn = std::make_unique>( + relaDynName, threadCount); + else + part.relaDyn = std::make_unique>( + relaDynName, config->zCombreloc, threadCount); + + if (config->hasDynSymTab) { + add(*part.dynSymTab); + + part.verSym = std::make_unique(); + add(*part.verSym); + + if (!namedVersionDefs().empty()) { + part.verDef = std::make_unique(); + add(*part.verDef); + } + + part.verNeed = std::make_unique>(); + add(*part.verNeed); + + if (config->gnuHash) { + part.gnuHashTab = std::make_unique(); + add(*part.gnuHashTab); + } + + if (config->sysvHash) { + part.hashTab = std::make_unique(); + add(*part.hashTab); + } + + add(*part.dynamic); + add(*part.dynStrTab); + } + add(*part.relaDyn); + + if (config->relrPackDynRelocs) { + part.relrDyn = std::make_unique>(threadCount); + add(*part.relrDyn); + } + + if (!config->relocatable) { + if (config->ehFrameHdr) { + part.ehFrameHdr = std::make_unique(); + add(*part.ehFrameHdr); + } + part.ehFrame = std::make_unique(); + add(*part.ehFrame); + + if (config->emachine == EM_ARM) { + // This section replaces all the individual .ARM.exidx InputSections. + part.armExidx = std::make_unique(); + add(*part.armExidx); + } + } + + if (!config->packageMetadata.empty()) { + part.packageMetadataNote = std::make_unique(); + add(*part.packageMetadataNote); + } + } + + if (partitions.size() != 1) { + // Create the partition end marker. This needs to be in partition number 255 + // so that it is sorted after all other partitions. It also has other + // special handling (see createPhdrs() and combineEhSections()). + in.partEnd = + std::make_unique(".part.end", config->maxPageSize, 1); + in.partEnd->partition = 255; + add(*in.partEnd); + + in.partIndex = std::make_unique(); + addOptionalRegular("__part_index_begin", in.partIndex.get(), 0); + addOptionalRegular("__part_index_end", in.partIndex.get(), + in.partIndex->getSize()); + add(*in.partIndex); + } + + // Add .got. MIPS' .got is so different from the other archs, + // it has its own class. + if (config->emachine == EM_MIPS) { + in.mipsGot = std::make_unique(); + add(*in.mipsGot); + } else { + in.got = std::make_unique(); + add(*in.got); + } + + if (config->emachine == EM_PPC) { + in.ppc32Got2 = std::make_unique(); + add(*in.ppc32Got2); + } + + if (config->emachine == EM_PPC64) { + in.ppc64LongBranchTarget = std::make_unique(); + add(*in.ppc64LongBranchTarget); + } + + in.gotPlt = std::make_unique(); + add(*in.gotPlt); + in.igotPlt = std::make_unique(); + add(*in.igotPlt); + // Add .relro_padding if DATA_SEGMENT_RELRO_END is used; otherwise, add the + // section in the absence of PHDRS/SECTIONS commands. + if (config->zRelro && + ((script->phdrsCommands.empty() && !script->hasSectionsCommand) || + script->seenRelroEnd)) { + in.relroPadding = std::make_unique(); + add(*in.relroPadding); + } + + if (config->emachine == EM_ARM) { + in.armCmseSGSection = std::make_unique(); + add(*in.armCmseSGSection); + } + + // _GLOBAL_OFFSET_TABLE_ is defined relative to either .got.plt or .got. Treat + // it as a relocation and ensure the referenced section is created. + if (ElfSym::globalOffsetTable && config->emachine != EM_MIPS) { + if (target->gotBaseSymInGotPlt) + in.gotPlt->hasGotPltOffRel = true; + else + in.got->hasGotOffRel = true; + } + + // We always need to add rel[a].plt to output if it has entries. + // Even for static linking it can contain R_[*]_IRELATIVE relocations. + in.relaPlt = std::make_unique>( + config->isRela ? ".rela.plt" : ".rel.plt", /*sort=*/false, + /*threadCount=*/1); + add(*in.relaPlt); + + if ((config->emachine == EM_386 || config->emachine == EM_X86_64) && + (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)) { + in.ibtPlt = std::make_unique(); + add(*in.ibtPlt); + } + + if (config->emachine == EM_PPC) + in.plt = std::make_unique(); + else + in.plt = std::make_unique(); + add(*in.plt); + in.iplt = std::make_unique(); + add(*in.iplt); + + if (config->andFeatures || !ctx.aarch64PauthAbiCoreInfo.empty()) + add(*make()); + + if (config->gdbIndex) { + in.gdbIndex = GdbIndexSection::create(); + add(*in.gdbIndex); + } + + // .note.GNU-stack is always added when we are creating a re-linkable + // object file. Other linkers are using the presence of this marker + // section to control the executable-ness of the stack area, but that + // is irrelevant these days. Stack area should always be non-executable + // by default. So we emit this section unconditionally. + if (config->relocatable) + add(*make()); + + if (in.symTab) + add(*in.symTab); + if (in.symTabShndx) + add(*in.symTabShndx); + add(*in.shStrTab); + if (in.strTab) + add(*in.strTab); +} + InStruct elf::in; std::vector elf::partitions; Partition *elf::mainPart; -template std::unique_ptr GdbIndexSection::create(); -template std::unique_ptr GdbIndexSection::create(); -template std::unique_ptr GdbIndexSection::create(); -template std::unique_ptr GdbIndexSection::create(); - template void elf::splitSections(); template void elf::splitSections(); template void elf::splitSections(); template void elf::splitSections(); -template class elf::MipsAbiFlagsSection; -template class elf::MipsAbiFlagsSection; -template class elf::MipsAbiFlagsSection; -template class elf::MipsAbiFlagsSection; - -template class elf::MipsOptionsSection; -template class elf::MipsOptionsSection; -template class elf::MipsOptionsSection; -template class elf::MipsOptionsSection; - template void EhFrameSection::iterateFDEWithLSDA( function_ref); template void EhFrameSection::iterateFDEWithLSDA( @@ -4019,41 +4313,11 @@ template void EhFrameSection::iterateFDEWithLSDA( template void EhFrameSection::iterateFDEWithLSDA( function_ref); -template class elf::MipsReginfoSection; -template class elf::MipsReginfoSection; -template class elf::MipsReginfoSection; -template class elf::MipsReginfoSection; - -template class elf::DynamicSection; -template class elf::DynamicSection; -template class elf::DynamicSection; -template class elf::DynamicSection; - -template class elf::RelocationSection; -template class elf::RelocationSection; -template class elf::RelocationSection; -template class elf::RelocationSection; - -template class elf::AndroidPackedRelocationSection; -template class elf::AndroidPackedRelocationSection; -template class elf::AndroidPackedRelocationSection; -template class elf::AndroidPackedRelocationSection; - -template class elf::RelrSection; -template class elf::RelrSection; -template class elf::RelrSection; -template class elf::RelrSection; - template class elf::SymbolTableSection; template class elf::SymbolTableSection; template class elf::SymbolTableSection; template class elf::SymbolTableSection; -template class elf::VersionNeedSection; -template class elf::VersionNeedSection; -template class elf::VersionNeedSection; -template class elf::VersionNeedSection; - template void elf::writeEhdr(uint8_t *Buf, Partition &Part); template void elf::writeEhdr(uint8_t *Buf, Partition &Part); template void elf::writeEhdr(uint8_t *Buf, Partition &Part); @@ -4064,12 +4328,7 @@ template void elf::writePhdrs(uint8_t *Buf, Partition &Part); template void elf::writePhdrs(uint8_t *Buf, Partition &Part); template void elf::writePhdrs(uint8_t *Buf, Partition &Part); -template class elf::PartitionElfHeaderSection; -template class elf::PartitionElfHeaderSection; -template class elf::PartitionElfHeaderSection; -template class elf::PartitionElfHeaderSection; - -template class elf::PartitionProgramHeadersSection; -template class elf::PartitionProgramHeadersSection; -template class elf::PartitionProgramHeadersSection; -template class elf::PartitionProgramHeadersSection; +template void elf::createSyntheticSections(); +template void elf::createSyntheticSections(); +template void elf::createSyntheticSections(); +template void elf::createSyntheticSections(); diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 68b4cdb1dde04..759b78668f546 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -1284,11 +1284,15 @@ class MemtagGlobalDescriptors final : public SyntheticSection { SmallVector symbols; }; +template void createSyntheticSections(); InputSection *createInterpSection(); MergeInputSection *createCommentSection(); template void splitSections(); void combineEhSections(); +bool hasMemtag(); +bool canHaveMemtagGlobals(); + template void writeEhdr(uint8_t *buf, Partition &part); template void writePhdrs(uint8_t *buf, Partition &part); diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 021b9bb0d5e22..d2a9e872dab91 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -91,11 +91,6 @@ template class Writer { }; } // anonymous namespace -static bool needsInterpSection() { - return !config->relocatable && !config->shared && - !config->dynamicLinker.empty() && script->needsInterpSection(); -} - template void elf::writeResult() { Writer().run(); } @@ -297,22 +292,6 @@ static void demoteSymbolsAndComputeIsPreemptible() { } } -bool elf::hasMemtag() { - return config->emachine == EM_AARCH64 && - config->androidMemtagMode != ELF::NT_MEMTAG_LEVEL_NONE; -} - -// Fully static executables don't support MTE globals at this point in time, as -// we currently rely on: -// - A dynamic loader to process relocations, and -// - Dynamic entries. -// This restriction could be removed in future by re-using some of the ideas -// that ifuncs use in fully static executables. -bool elf::canHaveMemtagGlobals() { - return hasMemtag() && - (config->relocatable || config->shared || needsInterpSection()); -} - static OutputSection *findSection(StringRef name, unsigned partition = 1) { for (SectionCommand *cmd : script->sectionCommands) if (auto *osd = dyn_cast(cmd)) @@ -321,272 +300,6 @@ static OutputSection *findSection(StringRef name, unsigned partition = 1) { return nullptr; } -template void elf::createSyntheticSections() { - // Initialize all pointers with NULL. This is needed because - // you can call lld::elf::main more than once as a library. - Out::tlsPhdr = nullptr; - Out::preinitArray = nullptr; - Out::initArray = nullptr; - Out::finiArray = nullptr; - - // Add the .interp section first because it is not a SyntheticSection. - // The removeUnusedSyntheticSections() function relies on the - // SyntheticSections coming last. - if (needsInterpSection()) { - for (size_t i = 1; i <= partitions.size(); ++i) { - InputSection *sec = createInterpSection(); - sec->partition = i; - ctx.inputSections.push_back(sec); - } - } - - auto add = [](SyntheticSection &sec) { ctx.inputSections.push_back(&sec); }; - - in.shStrTab = std::make_unique(".shstrtab", false); - - Out::programHeaders = make("", 0, SHF_ALLOC); - Out::programHeaders->addralign = config->wordsize; - - if (config->strip != StripPolicy::All) { - in.strTab = std::make_unique(".strtab", false); - in.symTab = std::make_unique>(*in.strTab); - in.symTabShndx = std::make_unique(); - } - - in.bss = std::make_unique(".bss", 0, 1); - add(*in.bss); - - // If there is a SECTIONS command and a .data.rel.ro section name use name - // .data.rel.ro.bss so that we match in the .data.rel.ro output section. - // This makes sure our relro is contiguous. - bool hasDataRelRo = script->hasSectionsCommand && findSection(".data.rel.ro"); - in.bssRelRo = std::make_unique( - hasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1); - add(*in.bssRelRo); - - // Add MIPS-specific sections. - if (config->emachine == EM_MIPS) { - if (!config->shared && config->hasDynSymTab) { - in.mipsRldMap = std::make_unique(); - add(*in.mipsRldMap); - } - if ((in.mipsAbiFlags = MipsAbiFlagsSection::create())) - add(*in.mipsAbiFlags); - if ((in.mipsOptions = MipsOptionsSection::create())) - add(*in.mipsOptions); - if ((in.mipsReginfo = MipsReginfoSection::create())) - add(*in.mipsReginfo); - } - - StringRef relaDynName = config->isRela ? ".rela.dyn" : ".rel.dyn"; - - const unsigned threadCount = config->threadCount; - for (Partition &part : partitions) { - auto add = [&](SyntheticSection &sec) { - sec.partition = part.getNumber(); - ctx.inputSections.push_back(&sec); - }; - - if (!part.name.empty()) { - part.elfHeader = std::make_unique>(); - part.elfHeader->name = part.name; - add(*part.elfHeader); - - part.programHeaders = - std::make_unique>(); - add(*part.programHeaders); - } - - if (config->buildId != BuildIdKind::None) { - part.buildId = std::make_unique(); - add(*part.buildId); - } - - part.dynStrTab = std::make_unique(".dynstr", true); - part.dynSymTab = - std::make_unique>(*part.dynStrTab); - part.dynamic = std::make_unique>(); - - if (hasMemtag()) { - part.memtagAndroidNote = std::make_unique(); - add(*part.memtagAndroidNote); - if (canHaveMemtagGlobals()) { - part.memtagGlobalDescriptors = - std::make_unique(); - add(*part.memtagGlobalDescriptors); - } - } - - if (config->androidPackDynRelocs) - part.relaDyn = std::make_unique>( - relaDynName, threadCount); - else - part.relaDyn = std::make_unique>( - relaDynName, config->zCombreloc, threadCount); - - if (config->hasDynSymTab) { - add(*part.dynSymTab); - - part.verSym = std::make_unique(); - add(*part.verSym); - - if (!namedVersionDefs().empty()) { - part.verDef = std::make_unique(); - add(*part.verDef); - } - - part.verNeed = std::make_unique>(); - add(*part.verNeed); - - if (config->gnuHash) { - part.gnuHashTab = std::make_unique(); - add(*part.gnuHashTab); - } - - if (config->sysvHash) { - part.hashTab = std::make_unique(); - add(*part.hashTab); - } - - add(*part.dynamic); - add(*part.dynStrTab); - } - add(*part.relaDyn); - - if (config->relrPackDynRelocs) { - part.relrDyn = std::make_unique>(threadCount); - add(*part.relrDyn); - } - - if (!config->relocatable) { - if (config->ehFrameHdr) { - part.ehFrameHdr = std::make_unique(); - add(*part.ehFrameHdr); - } - part.ehFrame = std::make_unique(); - add(*part.ehFrame); - - if (config->emachine == EM_ARM) { - // This section replaces all the individual .ARM.exidx InputSections. - part.armExidx = std::make_unique(); - add(*part.armExidx); - } - } - - if (!config->packageMetadata.empty()) { - part.packageMetadataNote = std::make_unique(); - add(*part.packageMetadataNote); - } - } - - if (partitions.size() != 1) { - // Create the partition end marker. This needs to be in partition number 255 - // so that it is sorted after all other partitions. It also has other - // special handling (see createPhdrs() and combineEhSections()). - in.partEnd = - std::make_unique(".part.end", config->maxPageSize, 1); - in.partEnd->partition = 255; - add(*in.partEnd); - - in.partIndex = std::make_unique(); - addOptionalRegular("__part_index_begin", in.partIndex.get(), 0); - addOptionalRegular("__part_index_end", in.partIndex.get(), - in.partIndex->getSize()); - add(*in.partIndex); - } - - // Add .got. MIPS' .got is so different from the other archs, - // it has its own class. - if (config->emachine == EM_MIPS) { - in.mipsGot = std::make_unique(); - add(*in.mipsGot); - } else { - in.got = std::make_unique(); - add(*in.got); - } - - if (config->emachine == EM_PPC) { - in.ppc32Got2 = std::make_unique(); - add(*in.ppc32Got2); - } - - if (config->emachine == EM_PPC64) { - in.ppc64LongBranchTarget = std::make_unique(); - add(*in.ppc64LongBranchTarget); - } - - in.gotPlt = std::make_unique(); - add(*in.gotPlt); - in.igotPlt = std::make_unique(); - add(*in.igotPlt); - // Add .relro_padding if DATA_SEGMENT_RELRO_END is used; otherwise, add the - // section in the absence of PHDRS/SECTIONS commands. - if (config->zRelro && ((script->phdrsCommands.empty() && - !script->hasSectionsCommand) || script->seenRelroEnd)) { - in.relroPadding = std::make_unique(); - add(*in.relroPadding); - } - - if (config->emachine == EM_ARM) { - in.armCmseSGSection = std::make_unique(); - add(*in.armCmseSGSection); - } - - // _GLOBAL_OFFSET_TABLE_ is defined relative to either .got.plt or .got. Treat - // it as a relocation and ensure the referenced section is created. - if (ElfSym::globalOffsetTable && config->emachine != EM_MIPS) { - if (target->gotBaseSymInGotPlt) - in.gotPlt->hasGotPltOffRel = true; - else - in.got->hasGotOffRel = true; - } - - // We always need to add rel[a].plt to output if it has entries. - // Even for static linking it can contain R_[*]_IRELATIVE relocations. - in.relaPlt = std::make_unique>( - config->isRela ? ".rela.plt" : ".rel.plt", /*sort=*/false, - /*threadCount=*/1); - add(*in.relaPlt); - - if ((config->emachine == EM_386 || config->emachine == EM_X86_64) && - (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)) { - in.ibtPlt = std::make_unique(); - add(*in.ibtPlt); - } - - if (config->emachine == EM_PPC) - in.plt = std::make_unique(); - else - in.plt = std::make_unique(); - add(*in.plt); - in.iplt = std::make_unique(); - add(*in.iplt); - - if (config->andFeatures || !ctx.aarch64PauthAbiCoreInfo.empty()) - add(*make()); - - if (config->gdbIndex) { - in.gdbIndex = GdbIndexSection::create(); - add(*in.gdbIndex); - } - - // .note.GNU-stack is always added when we are creating a re-linkable - // object file. Other linkers are using the presence of this marker - // section to control the executable-ness of the stack area, but that - // is irrelevant these days. Stack area should always be non-executable - // by default. So we emit this section unconditionally. - if (config->relocatable) - add(*make()); - - if (in.symTab) - add(*in.symTab); - if (in.symTabShndx) - add(*in.symTabShndx); - add(*in.shStrTab); - if (in.strTab) - add(*in.strTab); -} - // The main function of the writer. template void Writer::run() { // Now that we have a complete set of output sections. This function @@ -3114,11 +2827,6 @@ template void Writer::writeBuildId() { part.buildId->writeBuildId(output); } -template void elf::createSyntheticSections(); -template void elf::createSyntheticSections(); -template void elf::createSyntheticSections(); -template void elf::createSyntheticSections(); - template void elf::writeResult(); template void elf::writeResult(); template void elf::writeResult(); diff --git a/lld/ELF/Writer.h b/lld/ELF/Writer.h index aac8176d90989..7aa06dbcb131a 100644 --- a/lld/ELF/Writer.h +++ b/lld/ELF/Writer.h @@ -17,7 +17,6 @@ namespace lld::elf { class InputFile; class OutputSection; void copySectionsIntoPartitions(); -template void createSyntheticSections(); template void writeResult(); // This describes a program header entry. @@ -57,8 +56,6 @@ bool isMipsN32Abi(const InputFile *f); bool isMicroMips(); bool isMipsR6(); -bool hasMemtag(); -bool canHaveMemtagGlobals(); } // namespace lld::elf #endif From 8cfa72ade9f2f7df81a008efea84f833b73494b9 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 10 Apr 2024 13:51:23 -0700 Subject: [PATCH 069/886] [libc] fix typo in hdr/CMakeLists Fixes #87896 --- libc/hdr/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt index 4ca7db5e98d60..5a1acd9d17ab4 100644 --- a/libc/hdr/CMakeLists.txt +++ b/libc/hdr/CMakeLists.txt @@ -38,5 +38,5 @@ add_proxy_header_library( fenv_macros.h FULL_BUILD_DEPENDS libc.include.llvm-libc-macros.fenv_macros - libc.incude.fenv + libc.include.fenv ) From fb771fe315654231f613a5501ebd538f036c78b6 Mon Sep 17 00:00:00 2001 From: Jeff Niu Date: Wed, 10 Apr 2024 23:34:48 +0200 Subject: [PATCH 070/886] [mlir] Slightly optimize bytecode op numbering (#88310) If the bytecode encoding supports properties, then the dictionary attribute is always the raw dictionary attribute of the operation, regardless of what it contains. Otherwise, get the dictionary attribute from the op: if the op does not have properties, then it returns the raw dictionary, otherwise it returns the combined inherent and discardable attributes. --- mlir/lib/Bytecode/Writer/IRNumbering.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mlir/lib/Bytecode/Writer/IRNumbering.cpp b/mlir/lib/Bytecode/Writer/IRNumbering.cpp index f36c9ef060b6d..d2144dd7f3348 100644 --- a/mlir/lib/Bytecode/Writer/IRNumbering.cpp +++ b/mlir/lib/Bytecode/Writer/IRNumbering.cpp @@ -424,22 +424,22 @@ void IRNumberingState::number(Operation &op) { number(result.getType()); } - // Only number the operation's dictionary if it isn't empty. - DictionaryAttr dictAttr = op.getDiscardableAttrDictionary(); // Prior to a version with native property encoding, or when properties are // not used, we need to number also the merged dictionary containing both the // inherent and discardable attribute. - if (config.getDesiredBytecodeVersion() < - bytecode::kNativePropertiesEncoding || - !op.getPropertiesStorage()) { + DictionaryAttr dictAttr; + if (config.getDesiredBytecodeVersion() >= bytecode::kNativePropertiesEncoding) + dictAttr = op.getRawDictionaryAttrs(); + else dictAttr = op.getAttrDictionary(); - } + // Only number the operation's dictionary if it isn't empty. if (!dictAttr.empty()) number(dictAttr); // Visit the operation properties (if any) to make sure referenced attributes // are numbered. - if (config.getDesiredBytecodeVersion() >= bytecode::kNativePropertiesEncoding && + if (config.getDesiredBytecodeVersion() >= + bytecode::kNativePropertiesEncoding && op.getPropertiesStorageSize()) { if (op.isRegistered()) { // Operation that have properties *must* implement this interface. From af7c196fb8d10f58a704b5a8d142feacf2f0236d Mon Sep 17 00:00:00 2001 From: Chelsea Cassanova Date: Wed, 10 Apr 2024 14:45:49 -0700 Subject: [PATCH 071/886] [lldb][sbdebugger] Move SBDebugger Broadcast bit enum into lldb-enumerations.h (#87409) When the `eBroadcastBitProgressCategory` bit was originally added to Debugger.h and SBDebugger.h, each corresponding bit was added in order of the other bits that were previously there. Since `Debugger.h` has an enum bit that `SBDebugger.h` does not, this meant that their offsets did not match. Instead of trying to keep the bit offsets in sync between the two, it's preferable to just move SBDebugger's enum into the main enumerations header and use the bits from there. This also requires that API tests using the bits from SBDebugger update their usage. --- lldb/include/lldb/API/SBDebugger.h | 7 ------- lldb/include/lldb/lldb-enumerations.h | 8 ++++++++ .../diagnostic_reporting/TestDiagnosticReporting.py | 2 +- .../progress_reporting/TestProgressReporting.py | 2 +- .../clang_modules/TestClangModuleBuildProgress.py | 2 +- lldb/test/API/macosx/rosetta/TestRosetta.py | 2 +- 6 files changed, 12 insertions(+), 11 deletions(-) diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h index 62b2f91f5076d..cf5409a12a056 100644 --- a/lldb/include/lldb/API/SBDebugger.h +++ b/lldb/include/lldb/API/SBDebugger.h @@ -42,13 +42,6 @@ class LLDB_API SBInputReader { class LLDB_API SBDebugger { public: - FLAGS_ANONYMOUS_ENUM(){ - eBroadcastBitProgress = (1 << 0), - eBroadcastBitWarning = (1 << 1), - eBroadcastBitError = (1 << 2), - eBroadcastBitProgressCategory = (1 << 3), - }; - SBDebugger(); SBDebugger(const lldb::SBDebugger &rhs); diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index 646f7bfda9847..f3b07ea6d2039 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -1339,6 +1339,14 @@ enum AddressMaskRange { eAddressMaskRangeAll = eAddressMaskRangeAny, }; +/// Used by the debugger to indicate which events are being broadcasted. +enum DebuggerBroadcastBit { + eBroadcastBitProgress = (1 << 0), + eBroadcastBitWarning = (1 << 1), + eBroadcastBitError = (1 << 2), + eBroadcastBitProgressCategory = (1 << 3), +}; + } // namespace lldb #endif // LLDB_LLDB_ENUMERATIONS_H diff --git a/lldb/test/API/functionalities/diagnostic_reporting/TestDiagnosticReporting.py b/lldb/test/API/functionalities/diagnostic_reporting/TestDiagnosticReporting.py index 36a3be695628f..6353e3e8cbedb 100644 --- a/lldb/test/API/functionalities/diagnostic_reporting/TestDiagnosticReporting.py +++ b/lldb/test/API/functionalities/diagnostic_reporting/TestDiagnosticReporting.py @@ -15,7 +15,7 @@ def setUp(self): self.broadcaster = self.dbg.GetBroadcaster() self.listener = lldbutil.start_listening_from( self.broadcaster, - lldb.SBDebugger.eBroadcastBitWarning | lldb.SBDebugger.eBroadcastBitError, + lldb.eBroadcastBitWarning | lldb.eBroadcastBitError, ) def test_dwarf_symbol_loading_diagnostic_report(self): diff --git a/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py b/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py index 9af53845ca1b7..98988d7624da3 100644 --- a/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py +++ b/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py @@ -13,7 +13,7 @@ def setUp(self): TestBase.setUp(self) self.broadcaster = self.dbg.GetBroadcaster() self.listener = lldbutil.start_listening_from( - self.broadcaster, lldb.SBDebugger.eBroadcastBitProgress + self.broadcaster, lldb.eBroadcastBitProgress ) def test_dwarf_symbol_loading_progress_report(self): diff --git a/lldb/test/API/functionalities/progress_reporting/clang_modules/TestClangModuleBuildProgress.py b/lldb/test/API/functionalities/progress_reporting/clang_modules/TestClangModuleBuildProgress.py index 228f676aedf6a..33c7c269c081e 100644 --- a/lldb/test/API/functionalities/progress_reporting/clang_modules/TestClangModuleBuildProgress.py +++ b/lldb/test/API/functionalities/progress_reporting/clang_modules/TestClangModuleBuildProgress.py @@ -34,7 +34,7 @@ def test_clang_module_build_progress_report(self): # other unrelated progress events. broadcaster = self.dbg.GetBroadcaster() listener = lldbutil.start_listening_from( - broadcaster, lldb.SBDebugger.eBroadcastBitProgress + broadcaster, lldb.eBroadcastBitProgress ) # Trigger module builds. diff --git a/lldb/test/API/macosx/rosetta/TestRosetta.py b/lldb/test/API/macosx/rosetta/TestRosetta.py index ce40de475ef16..669db95a1624c 100644 --- a/lldb/test/API/macosx/rosetta/TestRosetta.py +++ b/lldb/test/API/macosx/rosetta/TestRosetta.py @@ -49,7 +49,7 @@ def test_rosetta(self): if rosetta_debugserver_installed(): broadcaster = self.dbg.GetBroadcaster() listener = lldbutil.start_listening_from( - broadcaster, lldb.SBDebugger.eBroadcastBitWarning + broadcaster, lldb.eBroadcastBitWarning ) target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( From 2fdfea088c8d78119b74116b94bc6729ce0e3efe Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 10 Apr 2024 14:50:54 -0700 Subject: [PATCH 072/886] [AMDGPU] Add v2i32 to the VS_64 types. NFCI. (#88318) I am trying to use VOP3Inst with intrinsic taking v2i32 operand and it fails to create patterm without it. --- llvm/lib/Target/AMDGPU/SIInstructions.td | 8 ++++---- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 04f3a2f576053..d6d49889656bb 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2087,7 +2087,7 @@ def : GCNPat < def : GCNPat < (DivergentUnaryFrag (v2f32 VReg_64:$src)), (V_PK_ADD_F32 11 /* OP_SEL_1 | NEG_LO | HEG_HI */, VReg_64:$src, - 11 /* OP_SEL_1 | NEG_LO | HEG_HI */, 0, + 11 /* OP_SEL_1 | NEG_LO | HEG_HI */, (i64 0), 0, 0, 0, 0, 0) > { let SubtargetPredicate = HasPackedFP32Ops; @@ -2999,7 +2999,7 @@ def : GCNPat< let SubtargetPredicate = HasPackedFP32Ops in { def : GCNPat< (fcanonicalize (v2f32 (VOP3PMods v2f32:$src, i32:$src_mods))), - (V_PK_MUL_F32 0, CONST.FP32_ONE, $src_mods, $src) + (V_PK_MUL_F32 0, (i64 CONST.FP32_ONE), $src_mods, $src) >; } @@ -3007,7 +3007,7 @@ def : GCNPat< let SubtargetPredicate = isNotGFX12Plus in { def : GCNPat< (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))), - (V_MUL_F64_e64 0, CONST.FP64_ONE, $src_mods, $src) + (V_MUL_F64_e64 0, (i64 CONST.FP64_ONE), $src_mods, $src) >; } } // End AddedComplexity = -5 @@ -3369,7 +3369,7 @@ def : GCNPat < SRCMODS.NONE, (V_FRACT_F64_e64 $mods, $x), SRCMODS.NONE, - (V_MOV_B64_PSEUDO 0x3fefffffffffffff)), + (V_MOV_B64_PSEUDO (i64 0x3fefffffffffffff))), $x, (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, (i32 3 /*NaN*/)))) >; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index cb6591bf62449..01ed565bb756d 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1046,7 +1046,7 @@ def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2 let HasSGPR = 1; } -def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> { +def VS_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, (add VReg_64, SReg_64)> { let isAllocatable = 0; let HasVGPR = 1; let HasSGPR = 1; From 9f6d08f2566a26144ea1753f80aebb1f2ecfdc63 Mon Sep 17 00:00:00 2001 From: Chelsea Cassanova Date: Wed, 10 Apr 2024 14:54:30 -0700 Subject: [PATCH 073/886] Revert "[lldb][sbdebugger] Move SBDebugger Broadcast bit enum into lldb-enumerations.h" (#88324) Reverts llvm/llvm-project#87409 due a missed update to the broadcast bit causing a build failure on the x86_64 Debian buildbot. --- lldb/include/lldb/API/SBDebugger.h | 7 +++++++ lldb/include/lldb/lldb-enumerations.h | 8 -------- .../diagnostic_reporting/TestDiagnosticReporting.py | 2 +- .../progress_reporting/TestProgressReporting.py | 2 +- .../clang_modules/TestClangModuleBuildProgress.py | 2 +- lldb/test/API/macosx/rosetta/TestRosetta.py | 2 +- 6 files changed, 11 insertions(+), 12 deletions(-) diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h index cf5409a12a056..62b2f91f5076d 100644 --- a/lldb/include/lldb/API/SBDebugger.h +++ b/lldb/include/lldb/API/SBDebugger.h @@ -42,6 +42,13 @@ class LLDB_API SBInputReader { class LLDB_API SBDebugger { public: + FLAGS_ANONYMOUS_ENUM(){ + eBroadcastBitProgress = (1 << 0), + eBroadcastBitWarning = (1 << 1), + eBroadcastBitError = (1 << 2), + eBroadcastBitProgressCategory = (1 << 3), + }; + SBDebugger(); SBDebugger(const lldb::SBDebugger &rhs); diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index f3b07ea6d2039..646f7bfda9847 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -1339,14 +1339,6 @@ enum AddressMaskRange { eAddressMaskRangeAll = eAddressMaskRangeAny, }; -/// Used by the debugger to indicate which events are being broadcasted. -enum DebuggerBroadcastBit { - eBroadcastBitProgress = (1 << 0), - eBroadcastBitWarning = (1 << 1), - eBroadcastBitError = (1 << 2), - eBroadcastBitProgressCategory = (1 << 3), -}; - } // namespace lldb #endif // LLDB_LLDB_ENUMERATIONS_H diff --git a/lldb/test/API/functionalities/diagnostic_reporting/TestDiagnosticReporting.py b/lldb/test/API/functionalities/diagnostic_reporting/TestDiagnosticReporting.py index 6353e3e8cbedb..36a3be695628f 100644 --- a/lldb/test/API/functionalities/diagnostic_reporting/TestDiagnosticReporting.py +++ b/lldb/test/API/functionalities/diagnostic_reporting/TestDiagnosticReporting.py @@ -15,7 +15,7 @@ def setUp(self): self.broadcaster = self.dbg.GetBroadcaster() self.listener = lldbutil.start_listening_from( self.broadcaster, - lldb.eBroadcastBitWarning | lldb.eBroadcastBitError, + lldb.SBDebugger.eBroadcastBitWarning | lldb.SBDebugger.eBroadcastBitError, ) def test_dwarf_symbol_loading_diagnostic_report(self): diff --git a/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py b/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py index 98988d7624da3..9af53845ca1b7 100644 --- a/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py +++ b/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py @@ -13,7 +13,7 @@ def setUp(self): TestBase.setUp(self) self.broadcaster = self.dbg.GetBroadcaster() self.listener = lldbutil.start_listening_from( - self.broadcaster, lldb.eBroadcastBitProgress + self.broadcaster, lldb.SBDebugger.eBroadcastBitProgress ) def test_dwarf_symbol_loading_progress_report(self): diff --git a/lldb/test/API/functionalities/progress_reporting/clang_modules/TestClangModuleBuildProgress.py b/lldb/test/API/functionalities/progress_reporting/clang_modules/TestClangModuleBuildProgress.py index 33c7c269c081e..228f676aedf6a 100644 --- a/lldb/test/API/functionalities/progress_reporting/clang_modules/TestClangModuleBuildProgress.py +++ b/lldb/test/API/functionalities/progress_reporting/clang_modules/TestClangModuleBuildProgress.py @@ -34,7 +34,7 @@ def test_clang_module_build_progress_report(self): # other unrelated progress events. broadcaster = self.dbg.GetBroadcaster() listener = lldbutil.start_listening_from( - broadcaster, lldb.eBroadcastBitProgress + broadcaster, lldb.SBDebugger.eBroadcastBitProgress ) # Trigger module builds. diff --git a/lldb/test/API/macosx/rosetta/TestRosetta.py b/lldb/test/API/macosx/rosetta/TestRosetta.py index 669db95a1624c..ce40de475ef16 100644 --- a/lldb/test/API/macosx/rosetta/TestRosetta.py +++ b/lldb/test/API/macosx/rosetta/TestRosetta.py @@ -49,7 +49,7 @@ def test_rosetta(self): if rosetta_debugserver_installed(): broadcaster = self.dbg.GetBroadcaster() listener = lldbutil.start_listening_from( - broadcaster, lldb.eBroadcastBitWarning + broadcaster, lldb.SBDebugger.eBroadcastBitWarning ) target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( From d8f1e5d2894f7f4edc2e85e63def456c7f430f34 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 10 Apr 2024 15:07:16 -0700 Subject: [PATCH 074/886] [APInt] Remove accumulator initialization from tcMultiply and tcFullMultiply. NFCI (#88202) The tcMultiplyPart routine has a flag that says whether to add to the accumulator or overwrite it. By using the overwrite mode on the first iteration we don't need to initialize the accumulator to zero. Note, the initialization in tcFullMultiply was only initializing the first rhsParts of dst. tcMultiplyPart always overwrites the rhsParts+1 part that just contains the last carry. The first write to each part of dst past rhsParts is a carry write so that's how the upper part of dst is initialized. --- llvm/lib/Support/APInt.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index 8825025ec3213..18feca4c05533 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -2585,11 +2585,13 @@ int APInt::tcMultiply(WordType *dst, const WordType *lhs, assert(dst != lhs && dst != rhs); int overflow = 0; - tcSet(dst, 0, parts); - for (unsigned i = 0; i < parts; i++) - overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, - parts - i, true); + for (unsigned i = 0; i < parts; i++) { + // Don't accumulate on the first iteration so we don't need to initalize + // dst to 0. + overflow |= + tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, parts - i, i != 0); + } return overflow; } @@ -2605,10 +2607,11 @@ void APInt::tcFullMultiply(WordType *dst, const WordType *lhs, assert(dst != lhs && dst != rhs); - tcSet(dst, 0, rhsParts); - - for (unsigned i = 0; i < lhsParts; i++) - tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true); + for (unsigned i = 0; i < lhsParts; i++) { + // Don't accumulate on the first iteration so we don't need to initalize + // dst to 0. + tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, i != 0); + } } // If RHS is zero LHS and REMAINDER are left unchanged, return one. From a9d4ddd98a0bc495126027122fdca751b6841ceb Mon Sep 17 00:00:00 2001 From: Oskar Wirga Date: Wed, 10 Apr 2024 15:37:27 -0700 Subject: [PATCH 075/886] [MergeFuncs/CFI] Ensure all type metadata is propogated for CFI (#88218) I noticed that we weren't propagating ALL type metadata that was attached to CFI functions: # BEFORE ``` ; Function Attrs: minsize nounwind optsize ssp uwtable(sync) define internal void @foo(ptr nocapture noundef readonly %0) #0 !dbg !62311 !type !34028 !type !34029 !type !34030 ... fn merging ; Function Attrs: minsize nounwind optsize ssp uwtable(sync) define internal void @foo(ptr nocapture noundef readonly %0) #0 !type !34028 ``` # AFTER ``` ; Function Attrs: minsize nounwind optsize ssp uwtable(sync) define internal void @foo(ptr nocapture noundef readonly %0) #0 !dbg !62311 !type !34028 !type !34029 !type !34030 ... fn merging ; Function Attrs: minsize nounwind optsize ssp uwtable(sync) define internal void @foo(ptr nocapture noundef readonly %0) #0 !type !type !34028 !type !34029 !type !34030 ``` This patch makes sure that the entire vector of metadata is copied over. --- llvm/lib/Transforms/IPO/MergeFunctions.cpp | 12 +++++---- .../Transforms/MergeFunc/cfi-thunk-merging.ll | 26 +++++++++---------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp index 05a3b169aaaf4..b50a700e09038 100644 --- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -712,11 +712,13 @@ static bool canCreateThunkFor(Function *F) { return true; } -/// Copy metadata from one function to another. -static void copyMetadataIfPresent(Function *From, Function *To, StringRef Key) { - if (MDNode *MD = From->getMetadata(Key)) { - To->setMetadata(Key, MD); - } +/// Copy all metadata of a specific kind from one function to another. +static void copyMetadataIfPresent(Function *From, Function *To, + StringRef Kind) { + SmallVector MDs; + From->getMetadata(Kind, MDs); + for (MDNode *MD : MDs) + To->addMetadata(Kind, *MD); } // Replace G with a simple tail call to bitcast(F). Also (unless diff --git a/llvm/test/Transforms/MergeFunc/cfi-thunk-merging.ll b/llvm/test/Transforms/MergeFunc/cfi-thunk-merging.ll index d35d777282730..562cc1a973d81 100644 --- a/llvm/test/Transforms/MergeFunc/cfi-thunk-merging.ll +++ b/llvm/test/Transforms/MergeFunc/cfi-thunk-merging.ll @@ -98,7 +98,7 @@ attributes #3 = { noreturn nounwind } !4 = !{i64 0, !"_ZTSFiiE.generalized"} !5 = !{} ; CHECK-LABEL: define dso_local i32 @f -; CHECK-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] !type !2 !type !3 { +; CHECK-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] !type [[META2:![0-9]+]] !type [[META3:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 @@ -119,7 +119,7 @@ attributes #3 = { noreturn nounwind } ; ; ; CHECK-LABEL: define dso_local i32 @g -; CHECK-SAME: (i32 noundef [[B:%.*]]) #[[ATTR0]] !type !2 !type !3 { +; CHECK-SAME: (i32 noundef [[B:%.*]]) #[[ATTR0]] !type [[META2]] !type [[META3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[FP:%.*]] = alloca ptr, align 8 @@ -130,11 +130,11 @@ attributes #3 = { noreturn nounwind } ; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], ptr @f, ptr @f_thunk ; CHECK-NEXT: store ptr [[COND]], ptr [[FP]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[FP]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.type.test(ptr [[TMP2]], metadata !"_ZTSFiiE"), !nosanitize !4 -; CHECK-NEXT: br i1 [[TMP3]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize !4 +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.type.test(ptr [[TMP2]], metadata !"_ZTSFiiE"), !nosanitize [[META4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize [[META4]] ; CHECK: trap: -; CHECK-NEXT: call void @llvm.ubsantrap(i8 2) #[[ATTR3:[0-9]+]], !nosanitize !4 -; CHECK-NEXT: unreachable, !nosanitize !4 +; CHECK-NEXT: call void @llvm.ubsantrap(i8 2) #[[ATTR3:[0-9]+]], !nosanitize [[META4]] +; CHECK-NEXT: unreachable, !nosanitize [[META4]] ; CHECK: cont: ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 ; CHECK-NEXT: [[CALL:%.*]] = call i32 [[TMP2]](i32 noundef [[TMP4]]) @@ -142,13 +142,13 @@ attributes #3 = { noreturn nounwind } ; ; ; CHECK-LABEL: define dso_local i32 @f_thunk -; CHECK-SAME: (i32 noundef [[TMP0:%.*]]) #[[ATTR0]] !type !2 { +; CHECK-SAME: (i32 noundef [[TMP0:%.*]]) #[[ATTR0]] !type [[META2]] !type [[META3]] { ; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @f(i32 noundef [[TMP0]]) #[[ATTR0]] ; CHECK-NEXT: ret i32 [[TMP2]] ; ; ; LOWERTYPETESTS-LABEL: define dso_local i32 @f -; LOWERTYPETESTS-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] !type !2 !type !3 { +; LOWERTYPETESTS-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] !type [[META2:![0-9]+]] !type [[META3:![0-9]+]] { ; LOWERTYPETESTS-NEXT: entry: ; LOWERTYPETESTS-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 ; LOWERTYPETESTS-NEXT: [[A:%.*]] = alloca i32, align 4 @@ -169,7 +169,7 @@ attributes #3 = { noreturn nounwind } ; ; ; LOWERTYPETESTS-LABEL: define dso_local i32 @g -; LOWERTYPETESTS-SAME: (i32 noundef [[B:%.*]]) #[[ATTR0]] !type !2 !type !3 { +; LOWERTYPETESTS-SAME: (i32 noundef [[B:%.*]]) #[[ATTR0]] !type [[META2]] !type [[META3]] { ; LOWERTYPETESTS-NEXT: entry: ; LOWERTYPETESTS-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 ; LOWERTYPETESTS-NEXT: [[FP:%.*]] = alloca ptr, align 8 @@ -186,10 +186,10 @@ attributes #3 = { noreturn nounwind } ; LOWERTYPETESTS-NEXT: [[TMP6:%.*]] = shl i64 [[TMP4]], 61 ; LOWERTYPETESTS-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP6]] ; LOWERTYPETESTS-NEXT: [[TMP8:%.*]] = icmp ule i64 [[TMP7]], 1 -; LOWERTYPETESTS-NEXT: br i1 [[TMP8]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize !4 +; LOWERTYPETESTS-NEXT: br i1 [[TMP8]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize [[META4:![0-9]+]] ; LOWERTYPETESTS: trap: -; LOWERTYPETESTS-NEXT: call void @llvm.ubsantrap(i8 2) #[[ATTR4:[0-9]+]], !nosanitize !4 -; LOWERTYPETESTS-NEXT: unreachable, !nosanitize !4 +; LOWERTYPETESTS-NEXT: call void @llvm.ubsantrap(i8 2) #[[ATTR4:[0-9]+]], !nosanitize [[META4]] +; LOWERTYPETESTS-NEXT: unreachable, !nosanitize [[META4]] ; LOWERTYPETESTS: cont: ; LOWERTYPETESTS-NEXT: [[TMP9:%.*]] = load i32, ptr [[B_ADDR]], align 4 ; LOWERTYPETESTS-NEXT: [[CALL:%.*]] = call i32 [[TMP2]](i32 noundef [[TMP9]]) @@ -197,7 +197,7 @@ attributes #3 = { noreturn nounwind } ; ; ; LOWERTYPETESTS-LABEL: define dso_local i32 @f_thunk -; LOWERTYPETESTS-SAME: (i32 noundef [[TMP0:%.*]]) #[[ATTR0]] !type !2 { +; LOWERTYPETESTS-SAME: (i32 noundef [[TMP0:%.*]]) #[[ATTR0]] !type [[META2]] !type [[META3]] { ; LOWERTYPETESTS-NEXT: [[TMP2:%.*]] = tail call i32 @f(i32 noundef [[TMP0]]) #[[ATTR0]] ; LOWERTYPETESTS-NEXT: ret i32 [[TMP2]] ; From 8136ac1c42dcfdd070f0bcba0f06424093df22db Mon Sep 17 00:00:00 2001 From: Daniel Chen Date: Wed, 10 Apr 2024 19:22:38 -0400 Subject: [PATCH 076/886] [Flang] Define c_int_fast16_t and c_int_fast32_t for PowerPC. (#88292) On Linux, PowerPC defines `int_fast16_t` and `int_fast32_t` as `long`. Need to update the corresponding type, `c_int_fast16_t` and `c_int_fast32_t` in `iso_c_binding` module so they are interoparable. --- flang/lib/Frontend/CompilerInvocation.cpp | 17 +++++++++-------- flang/module/iso_c_binding.f90 | 8 ++++++++ .../test/Driver/predefined-macros-powerpc2.f90 | 13 +++++++++++++ 3 files changed, 30 insertions(+), 8 deletions(-) create mode 100644 flang/test/Driver/predefined-macros-powerpc2.f90 diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 8ce6ab7baf481..e432c5a302754 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1333,6 +1333,15 @@ void CompilerInvocation::setDefaultPredefinitions() { } llvm::Triple targetTriple{llvm::Triple(this->targetOpts.triple)}; + if (targetTriple.isPPC()) { + // '__powerpc__' is a generic macro for any PowerPC cases. e.g. Max integer + // size. + fortranOptions.predefinitions.emplace_back("__powerpc__", "1"); + } + if (targetTriple.isOSLinux()) { + fortranOptions.predefinitions.emplace_back("__linux__", "1"); + } + switch (targetTriple.getArch()) { default: break; @@ -1340,14 +1349,6 @@ void CompilerInvocation::setDefaultPredefinitions() { fortranOptions.predefinitions.emplace_back("__x86_64__", "1"); fortranOptions.predefinitions.emplace_back("__x86_64", "1"); break; - case llvm::Triple::ArchType::ppc: - case llvm::Triple::ArchType::ppcle: - case llvm::Triple::ArchType::ppc64: - case llvm::Triple::ArchType::ppc64le: - // '__powerpc__' is a generic macro for any PowerPC cases. e.g. Max integer - // size. - fortranOptions.predefinitions.emplace_back("__powerpc__", "1"); - break; } } diff --git a/flang/module/iso_c_binding.f90 b/flang/module/iso_c_binding.f90 index 1661fd5a6dcf6..eb0f8f2ef59ad 100644 --- a/flang/module/iso_c_binding.f90 +++ b/flang/module/iso_c_binding.f90 @@ -58,9 +58,17 @@ module iso_c_binding c_int_least8_t = c_int8_t, & c_int_fast8_t = c_int8_t, & c_int_least16_t = c_int16_t, & +#if defined(__linux__) && defined(__powerpc__) + c_int_fast16_t = c_long, & +#else c_int_fast16_t = c_int16_t, & +#endif c_int_least32_t = c_int32_t, & +#if defined(__linux__) && defined(__powerpc__) + c_int_fast32_t = c_long, & +#else c_int_fast32_t = c_int32_t, & +#endif c_int_least64_t = c_int64_t, & c_int_fast64_t = c_int64_t, & c_int_least128_t = c_int128_t, & diff --git a/flang/test/Driver/predefined-macros-powerpc2.f90 b/flang/test/Driver/predefined-macros-powerpc2.f90 new file mode 100644 index 0000000000000..6e10235e21f86 --- /dev/null +++ b/flang/test/Driver/predefined-macros-powerpc2.f90 @@ -0,0 +1,13 @@ +! Test predefined macro for PowerPC architecture + +! RUN: %flang_fc1 -triple ppc64le-unknown-linux -cpp -E %s | FileCheck %s +! REQUIRES: target=powerpc{{.*}} + +! CHECK: integer :: var1 = 1 +! CHECK: integer :: var2 = 1 + +#if defined(__linux__) && defined(__powerpc__) + integer :: var1 = __powerpc__ + integer :: var2 = __linux__ +#endif +end program From acb7ddc5cf2f23416f65dcdc6c7fd08850ad961d Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Wed, 10 Apr 2024 16:24:02 -0700 Subject: [PATCH 077/886] [WebAssembly] Remove threadlocal.address when disabling TLS (#88209) Remove `llvm.threadlocal.address` intrinsic usage when disabling TLS. This fixes errors revealed by the stricter IR verification introduced in PR #87841. --- .../WebAssembly/WebAssemblyTargetMachine.cpp | 11 ++++++++ .../WebAssembly/tls-general-dynamic.ll | 26 +++++++++++++------ .../CodeGen/WebAssembly/tls-local-exec.ll | 17 ++++++++---- 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 70685b2e3bb2d..769ee765e1907 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -291,6 +291,17 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass { bool Stripped = false; for (auto &GV : M.globals()) { if (GV.isThreadLocal()) { + // replace `@llvm.threadlocal.address.pX(GV)` with `GV`. + for (Use &U : make_early_inc_range(GV.uses())) { + if (IntrinsicInst *II = dyn_cast(U.getUser())) { + if (II->getIntrinsicID() == Intrinsic::threadlocal_address && + II->getArgOperand(0) == &GV) { + II->replaceAllUsesWith(&GV); + II->eraseFromParent(); + } + } + } + Stripped = true; GV.setThreadLocal(false); } diff --git a/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll b/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll index 46ab62dfaaa23..006b73922d2b9 100644 --- a/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll +++ b/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll @@ -14,7 +14,9 @@ define i32 @address_of_tls() { ; NO-TLS-NEXT: i32.const tls ; NO-TLS-NEXT: return - ret i32 ptrtoint(ptr @tls to i32) + %p = call ptr @llvm.threadlocal.address.p0(ptr @tls) + %r = ptrtoint ptr %p to i32 + ret i32 %r } ; CHECK-LABEL: address_of_tls_external: @@ -25,7 +27,9 @@ define i32 @address_of_tls_external() { ; NO-TLS-NEXT: i32.const tls_external ; NO-TLS-NEXT: return - ret i32 ptrtoint(ptr @tls_external to i32) + %p = call ptr @llvm.threadlocal.address.p0(ptr @tls_external) + %r = ptrtoint ptr %p to i32 + ret i32 %r } ; CHECK-LABEL: ptr_to_tls: @@ -38,7 +42,8 @@ define ptr @ptr_to_tls() { ; NO-TLS-NEXT: i32.const tls ; NO-TLS-NEXT: return - ret ptr @tls + %p = call ptr @llvm.threadlocal.address.p0(ptr @tls) + ret ptr %p } ; CHECK-LABEL: ptr_to_tls_external: @@ -49,7 +54,8 @@ define ptr @ptr_to_tls_external() { ; NO-TLS-NEXT: i32.const tls_external ; NO-TLS-NEXT: return - ret ptr @tls_external + %p = call ptr @llvm.threadlocal.address.p0(ptr @tls_external) + ret ptr %p } ; CHECK-LABEL: tls_load: @@ -64,7 +70,8 @@ define i32 @tls_load() { ; NO-TLS-NEXT: i32.const 0 ; NO-TLS-NEXT: i32.load tls ; NO-TLS-NEXT: return - %tmp = load i32, ptr @tls, align 4 + %p = call ptr @llvm.threadlocal.address.p0(ptr @tls) + %tmp = load i32, ptr %p, align 4 ret i32 %tmp } @@ -78,7 +85,8 @@ define i32 @tls_load_external() { ; NO-TLS-NEXT: i32.const 0 ; NO-TLS-NEXT: i32.load tls_external ; NO-TLS-NEXT: return - %tmp = load i32, ptr @tls_external, align 4 + %p = call ptr @llvm.threadlocal.address.p0(ptr @tls_external) + %tmp = load i32, ptr %p, align 4 ret i32 %tmp } @@ -94,7 +102,8 @@ define void @tls_store(i32 %x) { ; NO-TLS-NEXT: i32.const 0 ; NO-TLS-NEXT: i32.store tls ; NO-TLS-NEXT: return - store i32 %x, ptr @tls, align 4 + %p = call ptr @llvm.threadlocal.address.p0(ptr @tls) + store i32 %x, ptr %p, align 4 ret void } @@ -108,7 +117,8 @@ define void @tls_store_external(i32 %x) { ; NO-TLS-NEXT: i32.const 0 ; NO-TLS-NEXT: i32.store tls_external ; NO-TLS-NEXT: return - store i32 %x, ptr @tls_external, align 4 + %p = call ptr @llvm.threadlocal.address.p0(ptr @tls_external) + store i32 %x, ptr %p, align 4 ret void } diff --git a/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll b/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll index 3aa044c34789e..dc0d40c7973ad 100644 --- a/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll +++ b/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll @@ -20,7 +20,9 @@ define i32 @address_of_tls() { ; NO-TLS-NEXT: i32.const tls ; NO-TLS-NEXT: return - ret i32 ptrtoint(ptr @tls to i32) + %p = call ptr @llvm.threadlocal.address.p0(ptr @tls) + %r = ptrtoint ptr %p to i32 + ret i32 %r } ; CHECK-LABEL: address_of_tls_external: @@ -33,7 +35,9 @@ define i32 @address_of_tls_external() { ; NO-TLS-NEXT: i32.const tls_external ; NO-TLS-NEXT: return - ret i32 ptrtoint(ptr @tls_external to i32) + %p = call ptr @llvm.threadlocal.address.p0(ptr @tls_external) + %r = ptrtoint ptr %p to i32 + ret i32 %r } ; CHECK-LABEL: ptr_to_tls: @@ -46,7 +50,8 @@ define ptr @ptr_to_tls() { ; NO-TLS-NEXT: i32.const tls ; NO-TLS-NEXT: return - ret ptr @tls + %p = call ptr @llvm.threadlocal.address.p0(ptr @tls) + ret ptr %p } ; CHECK-LABEL: tls_load: @@ -61,7 +66,8 @@ define i32 @tls_load() { ; NO-TLS-NEXT: i32.const 0 ; NO-TLS-NEXT: i32.load tls ; NO-TLS-NEXT: return - %tmp = load i32, ptr @tls, align 4 + %p = call ptr @llvm.threadlocal.address.p0(ptr @tls) + %tmp = load i32, ptr %p, align 4 ret i32 %tmp } @@ -77,7 +83,8 @@ define void @tls_store(i32 %x) { ; NO-TLS-NEXT: i32.const 0 ; NO-TLS-NEXT: i32.store tls ; NO-TLS-NEXT: return - store i32 %x, ptr @tls, align 4 + %p = call ptr @llvm.threadlocal.address.p0(ptr @tls) + store i32 %x, ptr %p, align 4 ret void } From d927d1867fa760836538beef2c4531c1a0b04e24 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 10 Apr 2024 16:30:42 -0700 Subject: [PATCH 078/886] [UBSAN] Emit optimization remarks (#88304) --- llvm/lib/IR/DiagnosticInfo.cpp | 6 ++- .../Instrumentation/LowerAllowCheckPass.cpp | 46 +++++++++++++++++-- .../lower-builtin-allow-check-remarks.ll | 24 ++++++++++ 3 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 llvm/test/Transforms/lower-builtin-allow-check-remarks.ll diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp index 342c4cbbc39d6..31971b179fb4b 100644 --- a/llvm/lib/IR/DiagnosticInfo.cpp +++ b/llvm/lib/IR/DiagnosticInfo.cpp @@ -179,8 +179,12 @@ DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, else if (isa(V)) { raw_string_ostream OS(Val); V->printAsOperand(OS, /*PrintType=*/false); - } else if (auto *I = dyn_cast(V)) + } else if (auto *I = dyn_cast(V)) { Val = I->getOpcodeName(); + } else if (auto *MD = dyn_cast(V)) { + if (auto *S = dyn_cast(MD->getMetadata())) + Val = S->getString(); + } } DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, const Type *T) diff --git a/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp b/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp index cdc8318f088c2..465fa41b6c663 100644 --- a/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp +++ b/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp @@ -10,11 +10,16 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/RandomNumberGenerator.h" #include #include @@ -35,13 +40,41 @@ static cl::opt STATISTIC(NumChecksTotal, "Number of checks"); STATISTIC(NumChecksRemoved, "Number of removed checks"); +struct RemarkInfo { + ore::NV Kind; + ore::NV F; + ore::NV BB; + explicit RemarkInfo(IntrinsicInst *II) + : Kind("Kind", II->getArgOperand(0)), + F("Function", II->getParent()->getParent()), + BB("Block", II->getParent()->getName()) {} +}; + +static void emitRemark(IntrinsicInst *II, OptimizationRemarkEmitter &ORE, + bool Removed) { + if (Removed) { + ORE.emit([&]() { + RemarkInfo Info(II); + return OptimizationRemark(DEBUG_TYPE, "Removed", II) + << "Removed check: Kind=" << Info.Kind << " F=" << Info.F + << " BB=" << Info.BB; + }); + } else { + ORE.emit([&]() { + RemarkInfo Info(II); + return OptimizationRemarkMissed(DEBUG_TYPE, "Allowed", II) + << "Allowed check: Kind=" << Info.Kind << " F=" << Info.F + << " BB=" << Info.BB; + }); + } +} + static bool removeUbsanTraps(Function &F, const BlockFrequencyInfo &BFI, - const ProfileSummaryInfo *PSI) { + const ProfileSummaryInfo *PSI, + OptimizationRemarkEmitter &ORE) { SmallVector, 16> ReplaceWithValue; std::unique_ptr Rng; - // TODO: - // https://github.com/llvm/llvm-project/pull/84858#discussion_r1520603139 auto ShouldRemove = [&](bool IsHot) { if (!RandomRate.getNumOccurrences()) return IsHot; @@ -75,6 +108,7 @@ static bool removeUbsanTraps(Function &F, const BlockFrequencyInfo &BFI, }); if (ToRemove) ++NumChecksRemoved; + emitRemark(II, ORE, ToRemove); break; } default: @@ -99,9 +133,11 @@ PreservedAnalyses LowerAllowCheckPass::run(Function &F, ProfileSummaryInfo *PSI = MAMProxy.getCachedResult(*F.getParent()); BlockFrequencyInfo &BFI = AM.getResult(F); + OptimizationRemarkEmitter &ORE = + AM.getResult(F); - return removeUbsanTraps(F, BFI, PSI) ? PreservedAnalyses::none() - : PreservedAnalyses::all(); + return removeUbsanTraps(F, BFI, PSI, ORE) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); } bool LowerAllowCheckPass::IsRequested() { diff --git a/llvm/test/Transforms/lower-builtin-allow-check-remarks.ll b/llvm/test/Transforms/lower-builtin-allow-check-remarks.ll new file mode 100644 index 0000000000000..3422ab1b56d32 --- /dev/null +++ b/llvm/test/Transforms/lower-builtin-allow-check-remarks.ll @@ -0,0 +1,24 @@ +; RUN: opt < %s -passes='require,function(lower-allow-check)' -lower-allow-check-random-rate=1 -pass-remarks=lower-allow-check -pass-remarks-missed=lower-allow-check -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes='require,function(lower-allow-check)' -lower-allow-check-random-rate=0 -pass-remarks=lower-allow-check -pass-remarks-missed=lower-allow-check -S 2>&1 | FileCheck %s --check-prefixes=REMOVE + +; CHECK: remark: :0:0: Allowed check: Kind=test_check F=test_runtime BB=entry1 +; CHECK: remark: :0:0: Allowed check: Kind=7 F=test_ubsan BB=entry2 + +; REMOVE: remark: :0:0: Removed check: Kind=test_check F=test_runtime BB=entry1 +; REMOVE: remark: :0:0: Removed check: Kind=7 F=test_ubsan BB=entry2 + +target triple = "x86_64-pc-linux-gnu" + +define i1 @test_runtime() local_unnamed_addr { +entry1: + %allow = call i1 @llvm.allow.runtime.check(metadata !"test_check") + ret i1 %allow +} + +declare i1 @llvm.allow.runtime.check(metadata) nounwind + +define i1 @test_ubsan() local_unnamed_addr { +entry2: + %allow = call i1 @llvm.allow.ubsan.check(i8 7) + ret i1 %allow +} From 6ef4450705473e5cccb025219e8980999f456b71 Mon Sep 17 00:00:00 2001 From: Jie Fu Date: Thu, 11 Apr 2024 07:37:12 +0800 Subject: [PATCH 079/886] [clang] Fix -Wunused-function in CGStmtOpenMP.cpp (NFC) llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp:7959:13: error: unused function 'emitTargetTeamsLoopCodegenStatus' [-Werror,-Wunused-function] static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF, ^ 1 error generated. --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 3bf99366b69ce..a0a8a07c76ba1 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -7956,10 +7956,10 @@ void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( [](CodeGenFunction &) { return nullptr; }); } +#ifndef NDEBUG static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF, std::string StatusMsg, const OMPExecutableDirective &D) { -#ifndef NDEBUG bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice; if (IsDevice) StatusMsg += ": DEVICE"; @@ -7972,8 +7972,8 @@ static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF, unsigned LineNo = PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L); llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n"; -#endif } +#endif static void emitTargetTeamsGenericLoopRegionAsParallel( CodeGenFunction &CGF, PrePostActionTy &Action, From 19e516fbed809af094ce195a6a5baa2e1f30f3cd Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 10 Apr 2024 23:35:52 +0000 Subject: [PATCH 080/886] [gn build] Port 1fda1776e32b --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 66e8084d5808a..53dc5b92c2d98 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -361,6 +361,7 @@ if (current_toolchain == default_toolchain) { "__chrono/parser_std_format_spec.h", "__chrono/statically_widen.h", "__chrono/steady_clock.h", + "__chrono/sys_info.h", "__chrono/system_clock.h", "__chrono/time_point.h", "__chrono/time_zone.h", From 402706668362fee8f9a9d29fb6d4628df4d4fc42 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 10 Apr 2024 23:35:53 +0000 Subject: [PATCH 081/886] [gn build] Port 59e66c515a47 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 53dc5b92c2d98..c3eda6878090f 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -466,6 +466,7 @@ if (current_toolchain == default_toolchain) { "__format/formatter_pointer.h", "__format/formatter_string.h", "__format/formatter_tuple.h", + "__format/indic_conjunct_break_table.h", "__format/parser_std_format_spec.h", "__format/range_default_formatter.h", "__format/range_formatter.h", From 233edab8765686bd44611f9f7319d3ffbc12fbab Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 10 Apr 2024 23:35:54 +0000 Subject: [PATCH 082/886] [gn build] Port 5d7d6ad663f8 --- llvm/utils/gn/secondary/clang/unittests/AST/Interp/BUILD.gn | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang/unittests/AST/Interp/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/AST/Interp/BUILD.gn index dd47a3d2d3459..fcdb9a5b1aeb2 100644 --- a/llvm/utils/gn/secondary/clang/unittests/AST/Interp/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/AST/Interp/BUILD.gn @@ -9,5 +9,8 @@ unittest("InterpTests") { "//clang/lib/Testing", "//clang/lib/Tooling", ] - sources = [ "Descriptor.cpp" ] + sources = [ + "Descriptor.cpp", + "toAPValue.cpp", + ] } From 9786a3b4cf9d050a6f87358e3295da3d32fade5c Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 10 Apr 2024 23:36:17 +0000 Subject: [PATCH 083/886] [gn build] Port 0a1317564a6b --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 + llvm/utils/gn/secondary/libcxx/src/BUILD.gn | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index c3eda6878090f..4270bae57ff2c 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -932,6 +932,7 @@ if (current_toolchain == default_toolchain) { "__utility/pair.h", "__utility/piecewise_construct.h", "__utility/priority_tag.h", + "__utility/private_constructor_tag.h", "__utility/rel_ops.h", "__utility/small_buffer.h", "__utility/swap.h", diff --git a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn index 90f6f5d0f1458..5da8db4574a0c 100644 --- a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn @@ -315,8 +315,6 @@ if (libcxx_enable_experimental) { sources = [ "experimental/keep.cpp" ] if (libcxx_enable_filesystem && libcxx_enable_time_zone_database) { sources += [ - "include/tzdb/leap_second_private.h", - "include/tzdb/time_zone_link_private.h", "include/tzdb/time_zone_private.h", "include/tzdb/types_private.h", "include/tzdb/tzdb_list_private.h", From be10070f91b86a6f126d2451852242bfcb2cd366 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 10 Apr 2024 23:41:51 +0000 Subject: [PATCH 084/886] Revert "[Driver] Ensure ToolChain::LibraryPaths is not empty for non-Darwin" This reverts commit ccdebbae4d77d3efc236af92c22941de5d437e01. Causes test failures in the presence of Android runtime libraries in resource-dir. See comments on https://github.com/llvm/llvm-project/pull/87866. --- clang/lib/Driver/ToolChain.cpp | 8 +---- clang/test/Driver/arm-compiler-rt.c | 14 ++++----- clang/test/Driver/cl-link.c | 16 +++++----- clang/test/Driver/compiler-rt-unwind.c | 6 ++-- clang/test/Driver/coverage-ld.c | 8 ++--- clang/test/Driver/instrprof-ld.c | 16 +++++----- clang/test/Driver/linux-ld.c | 6 ++-- clang/test/Driver/mingw-sanitizers.c | 16 +++++----- clang/test/Driver/msp430-toolchain.c | 4 +-- .../Driver/print-libgcc-file-name-clangrt.c | 12 ++++---- clang/test/Driver/print-runtime-dir.c | 6 ++++ clang/test/Driver/riscv32-toolchain-extra.c | 6 ++-- clang/test/Driver/riscv32-toolchain.c | 6 ++-- clang/test/Driver/riscv64-toolchain-extra.c | 6 ++-- clang/test/Driver/riscv64-toolchain.c | 6 ++-- clang/test/Driver/sanitizer-ld.c | 30 +++++++++---------- clang/test/Driver/wasm-toolchain.c | 18 +++++------ clang/test/Driver/wasm-toolchain.cpp | 16 +++++----- clang/test/Driver/windows-cross.c | 18 +++++------ clang/test/Driver/zos-ld.c | 12 ++++---- .../test/Driver/msvc-dependent-lib-flags.f90 | 8 ++--- 21 files changed, 119 insertions(+), 119 deletions(-) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 237092ed07e5d..03450fc0f57b9 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -796,13 +796,7 @@ ToolChain::getTargetSubDirPath(StringRef BaseDir) const { std::optional ToolChain::getRuntimePath() const { SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "lib"); - if (auto Ret = getTargetSubDirPath(P)) - return Ret; - // Darwin does not use per-target runtime directory. - if (Triple.isOSDarwin()) - return {}; - llvm::sys::path::append(P, Triple.str()); - return std::string(P); + return getTargetSubDirPath(P); } std::optional ToolChain::getStdlibPath() const { diff --git a/clang/test/Driver/arm-compiler-rt.c b/clang/test/Driver/arm-compiler-rt.c index cb6c29f48a781..5e9e528400d08 100644 --- a/clang/test/Driver/arm-compiler-rt.c +++ b/clang/test/Driver/arm-compiler-rt.c @@ -10,47 +10,47 @@ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -rtlib=compiler-rt -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix ARM-GNUEABI -// ARM-GNUEABI: "{{.*[/\\]}}libclang_rt.builtins.a" +// ARM-GNUEABI: "{{.*[/\\]}}libclang_rt.builtins-arm.a" // RUN: %clang -target arm-linux-gnueabi \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -rtlib=compiler-rt -mfloat-abi=hard -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix ARM-GNUEABI-ABI -// ARM-GNUEABI-ABI: "{{.*[/\\]}}libclang_rt.builtins.a" +// ARM-GNUEABI-ABI: "{{.*[/\\]}}libclang_rt.builtins-armhf.a" // RUN: %clang -target arm-linux-gnueabihf \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -rtlib=compiler-rt -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix ARM-GNUEABIHF -// ARM-GNUEABIHF: "{{.*[/\\]}}libclang_rt.builtins.a" +// ARM-GNUEABIHF: "{{.*[/\\]}}libclang_rt.builtins-armhf.a" // RUN: %clang -target arm-linux-gnueabihf \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -rtlib=compiler-rt -mfloat-abi=soft -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix ARM-GNUEABIHF-ABI -// ARM-GNUEABIHF-ABI: "{{.*[/\\]}}libclang_rt.builtins.a" +// ARM-GNUEABIHF-ABI: "{{.*[/\\]}}libclang_rt.builtins-arm.a" // RUN: %clang -target arm-windows-itanium \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -rtlib=compiler-rt -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix ARM-WINDOWS -// ARM-WINDOWS: "{{.*[/\\]}}clang_rt.builtins.lib" +// ARM-WINDOWS: "{{.*[/\\]}}clang_rt.builtins-arm.lib" // RUN: %clang -target arm-linux-androideabi \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -rtlib=compiler-rt -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix ARM-ANDROID -// ARM-ANDROID: "{{.*[/\\]}}libclang_rt.builtins.a" +// ARM-ANDROID: "{{.*[/\\]}}libclang_rt.builtins-arm-android.a" // RUN: not %clang --target=arm-linux-androideabi \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -rtlib=compiler-rt -mfloat-abi=hard -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix ARM-ANDROIDHF -// ARM-ANDROIDHF: "{{.*[/\\]}}libclang_rt.builtins.a" +// ARM-ANDROIDHF: "{{.*[/\\]}}libclang_rt.builtins-armhf-android.a" diff --git a/clang/test/Driver/cl-link.c b/clang/test/Driver/cl-link.c index ffd0b5ac4bade..444f0c01b3f99 100644 --- a/clang/test/Driver/cl-link.c +++ b/clang/test/Driver/cl-link.c @@ -13,20 +13,20 @@ // ASAN: link.exe // ASAN: "-debug" // ASAN: "-incremental:no" -// ASAN: "{{[^"]*}}clang_rt.asan.lib" -// ASAN: "-wholearchive:{{.*}}clang_rt.asan.lib" -// ASAN: "{{[^"]*}}clang_rt.asan_cxx.lib" -// ASAN: "-wholearchive:{{.*}}clang_rt.asan_cxx.lib" +// ASAN: "{{[^"]*}}clang_rt.asan-i386.lib" +// ASAN: "-wholearchive:{{.*}}clang_rt.asan-i386.lib" +// ASAN: "{{[^"]*}}clang_rt.asan_cxx-i386.lib" +// ASAN: "-wholearchive:{{.*}}clang_rt.asan_cxx-i386.lib" // ASAN: "{{.*}}cl-link{{.*}}.obj" // RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /MD /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-MD %s // ASAN-MD: link.exe // ASAN-MD: "-debug" // ASAN-MD: "-incremental:no" -// ASAN-MD: "{{.*}}clang_rt.asan_dynamic.lib" -// ASAN-MD: "{{[^"]*}}clang_rt.asan_dynamic_runtime_thunk.lib" +// ASAN-MD: "{{.*}}clang_rt.asan_dynamic-i386.lib" +// ASAN-MD: "{{[^"]*}}clang_rt.asan_dynamic_runtime_thunk-i386.lib" // ASAN-MD: "-include:___asan_seh_interceptor" -// ASAN-MD: "-wholearchive:{{.*}}clang_rt.asan_dynamic_runtime_thunk.lib" +// ASAN-MD: "-wholearchive:{{.*}}clang_rt.asan_dynamic_runtime_thunk-i386.lib" // ASAN-MD: "{{.*}}cl-link{{.*}}.obj" // RUN: %clang_cl /LD -fuse-ld=link -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s @@ -40,7 +40,7 @@ // ASAN-DLL: "-dll" // ASAN-DLL: "-debug" // ASAN-DLL: "-incremental:no" -// ASAN-DLL: "{{.*}}clang_rt.asan_dll_thunk.lib" +// ASAN-DLL: "{{.*}}clang_rt.asan_dll_thunk-i386.lib" // ASAN-DLL: "{{.*}}cl-link{{.*}}.obj" // RUN: %clang_cl /Zi /Tc%s -fuse-ld=link -### 2>&1 | FileCheck --check-prefix=DEBUG %s diff --git a/clang/test/Driver/compiler-rt-unwind.c b/clang/test/Driver/compiler-rt-unwind.c index c5040d7fd900b..7f4e3f22ab19a 100644 --- a/clang/test/Driver/compiler-rt-unwind.c +++ b/clang/test/Driver/compiler-rt-unwind.c @@ -98,14 +98,14 @@ // RUN: --target=x86_64-w64-mingw32 -rtlib=compiler-rt --unwindlib=libunwind \ // RUN: -shared-libgcc \ // RUN: | FileCheck --check-prefix=MINGW-RTLIB-COMPILER-RT-SHARED-UNWINDLIB-COMPILER-RT %s -// MINGW-RTLIB-COMPILER-RT-SHARED-UNWINDLIB-COMPILER-RT: "{{.*}}libclang_rt.builtins.a" +// MINGW-RTLIB-COMPILER-RT-SHARED-UNWINDLIB-COMPILER-RT: "{{.*}}libclang_rt.builtins-x86_64.a" // MINGW-RTLIB-COMPILER-RT-SHARED-UNWINDLIB-COMPILER-RT-SAME: "-l:libunwind.dll.a" // // RUN: %clang -### %s 2>&1 \ // RUN: --target=x86_64-w64-mingw32 -rtlib=compiler-rt --unwindlib=libunwind \ // RUN: -static-libgcc \ // RUN: | FileCheck --check-prefix=MINGW-RTLIB-COMPILER-RT-STATIC-UNWINDLIB-COMPILER-RT %s -// MINGW-RTLIB-COMPILER-RT-STATIC-UNWINDLIB-COMPILER-RT: "{{.*}}libclang_rt.builtins.a" +// MINGW-RTLIB-COMPILER-RT-STATIC-UNWINDLIB-COMPILER-RT: "{{.*}}libclang_rt.builtins-x86_64.a" // MINGW-RTLIB-COMPILER-RT-STATIC-UNWINDLIB-COMPILER-RT-SAME: "-l:libunwind.a" // // RUN: %clang -### %s 2>&1 \ @@ -114,5 +114,5 @@ // RUN: %clangxx -### %s 2>&1 \ // RUN: --target=x86_64-w64-mingw32 -rtlib=compiler-rt --unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=MINGW-RTLIB-COMPILER-RT-UNWINDLIB-COMPILER-RT %s -// MINGW-RTLIB-COMPILER-RT-UNWINDLIB-COMPILER-RT: "{{.*}}libclang_rt.builtins.a" +// MINGW-RTLIB-COMPILER-RT-UNWINDLIB-COMPILER-RT: "{{.*}}libclang_rt.builtins-x86_64.a" // MINGW-RTLIB-COMPILER-RT-UNWINDLIB-COMPILER-RT-SAME: "-lunwind" diff --git a/clang/test/Driver/coverage-ld.c b/clang/test/Driver/coverage-ld.c index be1d8320ab8be..acb08eb5db59a 100644 --- a/clang/test/Driver/coverage-ld.c +++ b/clang/test/Driver/coverage-ld.c @@ -33,7 +33,7 @@ // RUN: | FileCheck --check-prefix=CHECK-FREEBSD-X86-64 %s // // CHECK-FREEBSD-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-FREEBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-freebsd{{/|\\\\}}libclang_rt.profile.a" +// CHECK-FREEBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}freebsd{{/|\\\\}}libclang_rt.profile-x86_64.a" // // RUN: %clang -### %s 2>&1 \ // RUN: --target=x86_64-unknown-netbsd --coverage -fuse-ld=ld \ @@ -42,7 +42,7 @@ // RUN: | FileCheck --check-prefix=CHECK-NETBSD-X86-64 %s // CHECK-NETBSD-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-NETBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-netbsd{{/|\\\\}}libclang_rt.profile.a" +// CHECK-NETBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}netbsd{{/|\\\\}}libclang_rt.profile-x86_64.a" // RUN: %clang -### %s 2>&1 \ // RUN: --target=x86_64-unknown-openbsd --coverage -fuse-ld=ld \ @@ -51,7 +51,7 @@ // RUN: | FileCheck --check-prefix=CHECK-OPENBSD-X86-64 %s // CHECK-OPENBSD-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-OPENBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-openbsd{{/|\\\\}}libclang_rt.profile.a" +// CHECK-OPENBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}openbsd{{/|\\\\}}libclang_rt.profile-x86_64.a" // RUN: %clang -### %s 2>&1 \ // RUN: --target=arm-linux-androideabi --coverage -fuse-ld=ld \ @@ -60,4 +60,4 @@ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-ARM %s // // CHECK-ANDROID-ARM: "{{(.*[^.0-9A-Z_a-z])?}}ld.lld{{(.exe)?}}" -// CHECK-ANDROID-ARM: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}arm-unknown-linux-android{{/|\\\\}}libclang_rt.profile.a" +// CHECK-ANDROID-ARM: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}linux{{/|\\\\}}libclang_rt.profile-arm-android.a" diff --git a/clang/test/Driver/instrprof-ld.c b/clang/test/Driver/instrprof-ld.c index a96bba4a1e763..674580b349d42 100644 --- a/clang/test/Driver/instrprof-ld.c +++ b/clang/test/Driver/instrprof-ld.c @@ -34,7 +34,7 @@ // RUN: | FileCheck --check-prefix=CHECK-FREEBSD-X86-64 %s // // CHECK-FREEBSD-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-FREEBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-freebsd{{/|\\\\}}libclang_rt.profile.a" +// CHECK-FREEBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}freebsd{{/|\\\\}}libclang_rt.profile-x86_64.a" // // RUN: %clang -### %s 2>&1 \ // RUN: --target=x86_64-unknown-netbsd -fprofile-instr-generate -fuse-ld=ld \ @@ -43,7 +43,7 @@ // RUN: | FileCheck --check-prefix=CHECK-NETBSD-X86-64 %s // CHECK-NETBSD-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-NETBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-netbsd{{/|\\\\}}libclang_rt.profile.a" +// CHECK-NETBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}netbsd{{/|\\\\}}libclang_rt.profile-x86_64.a" // RUN: %clang -### %s 2>&1 \ // RUN: --target=x86_64-unknown-openbsd -fprofile-instr-generate -fuse-ld=ld \ @@ -52,7 +52,7 @@ // RUN: | FileCheck --check-prefix=CHECK-OPENBSD-X86-64 %s // CHECK-OPENBSD-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-OPENBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-openbsd{{/|\\\\}}libclang_rt.profile.a" +// CHECK-OPENBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}openbsd{{/|\\\\}}libclang_rt.profile-x86_64.a" // RUN: %clang -### %s 2>&1 \ // RUN: -shared \ @@ -72,7 +72,7 @@ // RUN: | FileCheck --check-prefix=CHECK-LINUX-X86-64-SHARED %s // // CHECK-LINUX-X86-64-SHARED: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-LINUX-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{.*}}x86_64-unknown-linux{{.*}}libclang_rt.profile.a" {{.*}} "-lc" +// CHECK-LINUX-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{.*}}linux{{.*}}libclang_rt.profile.a" {{.*}} "-lc" // // RUN: %clang -### %s 2>&1 \ // RUN: -shared \ @@ -82,7 +82,7 @@ // RUN: | FileCheck --check-prefix=CHECK-FREEBSD-X86-64-SHARED %s // // CHECK-FREEBSD-X86-64-SHARED: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-FREEBSD-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-freebsd{{/|\\\\}}libclang_rt.profile.a" +// CHECK-FREEBSD-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}freebsd{{/|\\\\}}libclang_rt.profile-x86_64.a" // // RUN: %clang -### %s 2>&1 \ // RUN: -shared \ @@ -92,7 +92,7 @@ // RUN: | FileCheck --check-prefix=CHECK-NETBSD-X86-64-SHARED %s // CHECK-NETBSD-X86-64-SHARED: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-NETBSD-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-netbsd{{/|\\\\}}libclang_rt.profile.a" +// CHECK-NETBSD-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}netbsd{{/|\\\\}}libclang_rt.profile-x86_64.a" // RUN: %clang -### %s 2>&1 \ // RUN: -shared \ @@ -102,7 +102,7 @@ // RUN: | FileCheck --check-prefix=CHECK-OPENBSD-X86-64-SHARED %s // CHECK-OPENBSD-X86-64-SHARED: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-OPENBSD-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-openbsd{{/|\\\\}}libclang_rt.profile.a" +// CHECK-OPENBSD-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}openbsd{{/|\\\\}}libclang_rt.profile-x86_64.a" // RUN: %clang -### %s 2>&1 \ // RUN: --target=x86_64-apple-darwin14 -fprofile-instr-generate -fuse-ld=ld \ @@ -174,7 +174,7 @@ // RUN: | FileCheck --check-prefix=CHECK-MINGW-X86-64 %s // // CHECK-MINGW-X86-64: "{{(.*[^.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-MINGW-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-windows-gnu{{/|\\\\}}libclang_rt.profile.a" +// CHECK-MINGW-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}windows{{/|\\\\}}libclang_rt.profile-x86_64.a" // Test instrumented profiling dependent-lib flags // diff --git a/clang/test/Driver/linux-ld.c b/clang/test/Driver/linux-ld.c index e5c5563673858..4020b138dc8fd 100644 --- a/clang/test/Driver/linux-ld.c +++ b/clang/test/Driver/linux-ld.c @@ -99,9 +99,9 @@ // CHECK-LD-RT-ANDROID: "--eh-frame-hdr" // CHECK-LD-RT-ANDROID: "-m" "armelf_linux_eabi" // CHECK-LD-RT-ANDROID: "-dynamic-linker" -// CHECK-LD-RT-ANDROID: libclang_rt.builtins.a" +// CHECK-LD-RT-ANDROID: libclang_rt.builtins-arm-android.a" // CHECK-LD-RT-ANDROID: "-lc" -// CHECK-LD-RT-ANDROID: libclang_rt.builtins.a" +// CHECK-LD-RT-ANDROID: libclang_rt.builtins-arm-android.a" // // RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \ @@ -264,7 +264,7 @@ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-CLANG-ANDROID-STATIC %s // CHECK-CLANG-ANDROID-STATIC: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]" -// CHECK-CLANG-ANDROID-STATIC: "--start-group" "{{[^"]*}}{{/|\\\\}}libclang_rt.builtins.a" "-l:libunwind.a" "-lc" "--end-group" +// CHECK-CLANG-ANDROID-STATIC: "--start-group" "{{[^"]*}}{{/|\\\\}}libclang_rt.builtins-aarch64-android.a" "-l:libunwind.a" "-lc" "--end-group" // // RUN: %clang -### %s 2>&1 \ // RUN: --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \ diff --git a/clang/test/Driver/mingw-sanitizers.c b/clang/test/Driver/mingw-sanitizers.c index 2325f8f0f1f23..d165648a8fdf6 100644 --- a/clang/test/Driver/mingw-sanitizers.c +++ b/clang/test/Driver/mingw-sanitizers.c @@ -4,17 +4,17 @@ // // ASAN-ALL-NOT:"-l{{[^"]+"]}}" // ASAN-ALL-NOT:"[[INPUT]]" -// ASAN-I686: "{{[^"]*}}libclang_rt.asan_dynamic.dll.a" -// ASAN-X86_64: "{{[^"]*}}libclang_rt.asan_dynamic.dll.a" +// ASAN-I686: "{{[^"]*}}libclang_rt.asan_dynamic-i386.dll.a" +// ASAN-X86_64: "{{[^"]*}}libclang_rt.asan_dynamic-x86_64.dll.a" // ASAN-ALL: "-lcomponent" // ASAN-ALL: "[[INPUT]]" -// ASAN-I686: "{{[^"]*}}libclang_rt.asan_dynamic.dll.a" -// ASAN-I686: "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk.a" +// ASAN-I686: "{{[^"]*}}libclang_rt.asan_dynamic-i386.dll.a" +// ASAN-I686: "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk-i386.a" // ASAN-I686: "--require-defined" "___asan_seh_interceptor" -// ASAN-I686: "--whole-archive" "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk.a" "--no-whole-archive" -// ASAN-X86_64: "{{[^"]*}}libclang_rt.asan_dynamic.dll.a" -// ASAN-X86_64: "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk.a" +// ASAN-I686: "--whole-archive" "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk-i386.a" "--no-whole-archive" +// ASAN-X86_64: "{{[^"]*}}libclang_rt.asan_dynamic-x86_64.dll.a" +// ASAN-X86_64: "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk-x86_64.a" // ASAN-X86_64: "--require-defined" "__asan_seh_interceptor" -// ASAN-X86_64: "--whole-archive" "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk.a" "--no-whole-archive" +// ASAN-X86_64: "--whole-archive" "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk-x86_64.a" "--no-whole-archive" // RUN: %clang -target x86_64-windows-gnu %s -### -fsanitize=vptr diff --git a/clang/test/Driver/msp430-toolchain.c b/clang/test/Driver/msp430-toolchain.c index 3c3042b482ef2..ef6780c38f2ee 100644 --- a/clang/test/Driver/msp430-toolchain.c +++ b/clang/test/Driver/msp430-toolchain.c @@ -103,8 +103,8 @@ // LIBS-COMPILER-RT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o" // LIBS-COMPILER-RT-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430" // LIBS-COMPILER-RT-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430" -// LIBS-COMPILER-RT-POS: "{{[^"]*}}libclang_rt.builtins.a" "--start-group" "-lmul_none" "-lc" "{{[^"]*}}libclang_rt.builtins.a" "-lcrt" "-lnosys" "--end-group" "{{[^"]*}}libclang_rt.builtins.a" -// LIBS-COMPILER-RT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "{{[^"]*}}libclang_rt.builtins.a" +// LIBS-COMPILER-RT-POS: "{{[^"]*}}libclang_rt.builtins-msp430.a" "--start-group" "-lmul_none" "-lc" "{{[^"]*}}libclang_rt.builtins-msp430.a" "-lcrt" "-lnosys" "--end-group" "{{[^"]*}}libclang_rt.builtins-msp430.a" +// LIBS-COMPILER-RT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "{{[^"]*}}libclang_rt.builtins-msp430.a" // LIBS-COMPILER-RT-NEG-NOT: crtbegin.o // LIBS-COMPILER-RT-NEG-NOT: -lssp_nonshared // LIBS-COMPILER-RT-NEG-NOT: -lssp diff --git a/clang/test/Driver/print-libgcc-file-name-clangrt.c b/clang/test/Driver/print-libgcc-file-name-clangrt.c index a902eedc85209..ed740e0d2917d 100644 --- a/clang/test/Driver/print-libgcc-file-name-clangrt.c +++ b/clang/test/Driver/print-libgcc-file-name-clangrt.c @@ -5,14 +5,14 @@ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-CLANGRT-X8664 %s -// CHECK-CLANGRT-X8664: libclang_rt.builtins.a +// CHECK-CLANGRT-X8664: libclang_rt.builtins-x86_64.a // RUN: %clang -rtlib=compiler-rt -print-libgcc-file-name \ // RUN: --target=i386-pc-linux \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-CLANGRT-I386 %s -// CHECK-CLANGRT-I386: libclang_rt.builtins.a +// CHECK-CLANGRT-I386: libclang_rt.builtins-i386.a // Check whether alternate arch values map to the correct library. // @@ -27,28 +27,28 @@ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-CLANGRT-ARM %s -// CHECK-CLANGRT-ARM: libclang_rt.builtins.a +// CHECK-CLANGRT-ARM: libclang_rt.builtins-arm.a // RUN: %clang -rtlib=compiler-rt -print-libgcc-file-name \ // RUN: --target=arm-linux-androideabi \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-CLANGRT-ARM-ANDROID %s -// CHECK-CLANGRT-ARM-ANDROID: libclang_rt.builtins.a +// CHECK-CLANGRT-ARM-ANDROID: libclang_rt.builtins-arm-android.a // RUN: %clang -rtlib=compiler-rt -print-libgcc-file-name \ // RUN: --target=arm-linux-gnueabihf \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-CLANGRT-ARMHF %s -// CHECK-CLANGRT-ARMHF: libclang_rt.builtins.a +// CHECK-CLANGRT-ARMHF: libclang_rt.builtins-armhf.a // RUN: %clang -rtlib=compiler-rt -print-libgcc-file-name \ // RUN: --target=arm-linux-gnueabi -mfloat-abi=hard \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-CLANGRT-ARM-ABI %s -// CHECK-CLANGRT-ARM-ABI: libclang_rt.builtins.a +// CHECK-CLANGRT-ARM-ABI: libclang_rt.builtins-armhf.a // RUN: %clang -rtlib=compiler-rt -print-libgcc-file-name \ // RUN: --target=armv7m-none-eabi \ diff --git a/clang/test/Driver/print-runtime-dir.c b/clang/test/Driver/print-runtime-dir.c index ac1ff7e634b81..550ffef1aaf6e 100644 --- a/clang/test/Driver/print-runtime-dir.c +++ b/clang/test/Driver/print-runtime-dir.c @@ -1,3 +1,9 @@ +// Default directory layout +// RUN: %clang -print-runtime-dir --target=x86_64-pc-windows-msvc \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: | FileCheck --check-prefix=PRINT-RUNTIME-DIR -DFILE=%S/Inputs/resource_dir %s +// PRINT-RUNTIME-DIR: [[FILE]]{{/|\\}}lib{{/|\\}}windows + // Per-target directory layout // RUN: %clang -print-runtime-dir --target=x86_64-pc-windows-msvc \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ diff --git a/clang/test/Driver/riscv32-toolchain-extra.c b/clang/test/Driver/riscv32-toolchain-extra.c index aab6b36f3cfca..2d38aa3b545fe 100644 --- a/clang/test/Driver/riscv32-toolchain-extra.c +++ b/clang/test/Driver/riscv32-toolchain-extra.c @@ -29,8 +29,8 @@ // C-RV32-BAREMETAL-ILP32-NOGCC: "-internal-isystem" "{{.*}}/riscv32-nogcc/bin/../riscv32-unknown-elf/include" // C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/bin/riscv32-unknown-elf-ld" // C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/bin/../riscv32-unknown-elf/lib/crt0.o" -// C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/{{.*}}/riscv32-unknown-unknown-elf/clang_rt.crtbegin.o" +// C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/{{.*}}/lib/clang_rt.crtbegin-riscv32.o" // C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/bin/../riscv32-unknown-elf/lib" // C-RV32-BAREMETAL-ILP32-NOGCC: "--start-group" "-lc" "-lgloss" "--end-group" -// C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/{{.*}}/riscv32-unknown-unknown-elf/libclang_rt.builtins.a" -// C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/{{.*}}/riscv32-unknown-unknown-elf/clang_rt.crtend.o" +// C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/{{.*}}/lib/libclang_rt.builtins-riscv32.a" +// C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/{{.*}}/lib/clang_rt.crtend-riscv32.o" diff --git a/clang/test/Driver/riscv32-toolchain.c b/clang/test/Driver/riscv32-toolchain.c index 322a6ca2840fb..bb2533cdf1bce 100644 --- a/clang/test/Driver/riscv32-toolchain.c +++ b/clang/test/Driver/riscv32-toolchain.c @@ -195,9 +195,9 @@ // RUN: --target=riscv32-unknown-elf --rtlib=compiler-rt --unwindlib=compiler-rt 2>&1 \ // RUN: | FileCheck -check-prefix=C-RV32-RTLIB-COMPILERRT-ILP32 %s // C-RV32-RTLIB-COMPILERRT-ILP32: "{{.*}}crt0.o" -// C-RV32-RTLIB-COMPILERRT-ILP32: "{{.*}}clang_rt.crtbegin.o" -// C-RV32-RTLIB-COMPILERRT-ILP32: "--start-group" "-lc" "-lgloss" "--end-group" "{{.*}}libclang_rt.builtins.a" -// C-RV32-RTLIB-COMPILERRT-ILP32: "{{.*}}clang_rt.crtend.o" +// C-RV32-RTLIB-COMPILERRT-ILP32: "{{.*}}clang_rt.crtbegin-riscv32.o" +// C-RV32-RTLIB-COMPILERRT-ILP32: "--start-group" "-lc" "-lgloss" "--end-group" "{{.*}}libclang_rt.builtins-riscv32.a" +// C-RV32-RTLIB-COMPILERRT-ILP32: "{{.*}}clang_rt.crtend-riscv32.o" // RUN: %clang -### %s --target=riscv32 \ // RUN: --gcc-toolchain=%S/Inputs/basic_riscv32_tree --sysroot= \ diff --git a/clang/test/Driver/riscv64-toolchain-extra.c b/clang/test/Driver/riscv64-toolchain-extra.c index d8d9b58441676..a6ec9b16cc5ca 100644 --- a/clang/test/Driver/riscv64-toolchain-extra.c +++ b/clang/test/Driver/riscv64-toolchain-extra.c @@ -29,8 +29,8 @@ // C-RV64-BAREMETAL-LP64-NOGCC: "-internal-isystem" "{{.*}}/riscv64-nogcc/bin/../riscv64-unknown-elf/include" // C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/bin/riscv64-unknown-elf-ld" // C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/bin/../riscv64-unknown-elf/lib/crt0.o" -// C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/{{.*}}/riscv64-unknown-unknown-elf/clang_rt.crtbegin.o" +// C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/{{.*}}/lib/clang_rt.crtbegin-riscv64.o" // C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/bin/../riscv64-unknown-elf/lib" // C-RV64-BAREMETAL-LP64-NOGCC: "--start-group" "-lc" "-lgloss" "--end-group" -// C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/{{.*}}/riscv64-unknown-unknown-elf/libclang_rt.builtins.a" -// C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/{{.*}}/riscv64-unknown-unknown-elf/clang_rt.crtend.o" +// C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/{{.*}}/lib/libclang_rt.builtins-riscv64.a" +// C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/{{.*}}/lib/clang_rt.crtend-riscv64.o" diff --git a/clang/test/Driver/riscv64-toolchain.c b/clang/test/Driver/riscv64-toolchain.c index b3216de307540..381ee58c470c5 100644 --- a/clang/test/Driver/riscv64-toolchain.c +++ b/clang/test/Driver/riscv64-toolchain.c @@ -151,9 +151,9 @@ // RUN: --target=riscv64-unknown-elf --rtlib=compiler-rt --unwindlib=compiler-rt 2>&1 \ // RUN: | FileCheck -check-prefix=C-RV64-RTLIB-COMPILERRT-LP64 %s // C-RV64-RTLIB-COMPILERRT-LP64: "{{.*}}crt0.o" -// C-RV64-RTLIB-COMPILERRT-LP64: "{{.*}}clang_rt.crtbegin.o" -// C-RV64-RTLIB-COMPILERRT-LP64: "--start-group" "-lc" "-lgloss" "--end-group" "{{.*}}libclang_rt.builtins.a" -// C-RV64-RTLIB-COMPILERRT-LP64: "{{.*}}clang_rt.crtend.o" +// C-RV64-RTLIB-COMPILERRT-LP64: "{{.*}}clang_rt.crtbegin-riscv64.o" +// C-RV64-RTLIB-COMPILERRT-LP64: "--start-group" "-lc" "-lgloss" "--end-group" "{{.*}}libclang_rt.builtins-riscv64.a" +// C-RV64-RTLIB-COMPILERRT-LP64: "{{.*}}clang_rt.crtend-riscv64.o" // RUN: %clang -### %s --target=riscv64 \ // RUN: --gcc-toolchain=%S/Inputs/basic_riscv64_tree --sysroot= \ diff --git a/clang/test/Driver/sanitizer-ld.c b/clang/test/Driver/sanitizer-ld.c index 1d52fc1260959..53e536d772924 100644 --- a/clang/test/Driver/sanitizer-ld.c +++ b/clang/test/Driver/sanitizer-ld.c @@ -111,7 +111,7 @@ // CHECK-ASAN-FREEBSD: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" // CHECK-ASAN-FREEBSD-NOT: "-lc" // CHECK-ASAN-FREEBSD-NOT: libclang_rt.asan_cxx -// CHECK-ASAN-FREEBSD: freebsd{{/|\\+}}libclang_rt.asan.a" +// CHECK-ASAN-FREEBSD: freebsd{{/|\\+}}libclang_rt.asan-i386.a" // CHECK-ASAN-FREEBSD-NOT: libclang_rt.asan_cxx // CHECK-ASAN-FREEBSD-NOT: "--dynamic-list" // CHECK-ASAN-FREEBSD: "--export-dynamic" @@ -135,8 +135,8 @@ // // CHECK-ASAN-LINUX-CXX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" // CHECK-ASAN-LINUX-CXX-NOT: "-lc" -// CHECK-ASAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.asan.a" "--no-whole-archive" -// CHECK-ASAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.asan_cxx.a" "--no-whole-archive" +// CHECK-ASAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.asan-i386.a" "--no-whole-archive" +// CHECK-ASAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.asan_cxx-i386.a" "--no-whole-archive" // CHECK-ASAN-LINUX-CXX-NOT: "--dynamic-list" // CHECK-ASAN-LINUX-CXX: "--export-dynamic" // CHECK-ASAN-LINUX-CXX: stdc++ @@ -163,7 +163,7 @@ // // CHECK-ASAN-ARM: "{{(.*[^.0-9A-Z_a-z])?}}ld{{(.exe)?}}" // CHECK-ASAN-ARM-NOT: "-lc" -// CHECK-ASAN-ARM: libclang_rt.asan.a" +// CHECK-ASAN-ARM: libclang_rt.asan-arm.a" // // RUN: %clang -### %s 2>&1 \ // RUN: --target=armv7l-linux-gnueabi -fuse-ld=ld -fsanitize=address \ @@ -172,7 +172,7 @@ // // CHECK-ASAN-ARMv7: "{{(.*[^.0-9A-Z_a-z])?}}ld{{(.exe)?}}" // CHECK-ASAN-ARMv7-NOT: "-lc" -// CHECK-ASAN-ARMv7: libclang_rt.asan.a" +// CHECK-ASAN-ARMv7: libclang_rt.asan-arm.a" // RUN: %clang -### %s 2>&1 \ // RUN: --target=arm-linux-androideabi -fuse-ld=ld -fsanitize=address \ @@ -184,7 +184,7 @@ // CHECK-ASAN-ANDROID-NOT: "-lc" // CHECK-ASAN-ANDROID-NOT: "-lpthread" // CHECK-ASAN-ANDROID-NOT: "-lresolv" -// CHECK-ASAN-ANDROID: libclang_rt.asan.so" +// CHECK-ASAN-ANDROID: libclang_rt.asan-arm-android.so" // CHECK-ASAN-ANDROID-NOT: "-lpthread" // CHECK-ASAN-ANDROID-NOT: "-lresolv" @@ -195,7 +195,7 @@ // RUN: | FileCheck --check-prefix=CHECK-ASAN-ANDROID-STATICLIBASAN %s // // CHECK-ASAN-ANDROID-STATICLIBASAN: "{{(.*[^.0-9A-Z_a-z])?}}ld.lld{{(.exe)?}}" -// CHECK-ASAN-ANDROID-STATICLIBASAN: libclang_rt.asan.a" +// CHECK-ASAN-ANDROID-STATICLIBASAN: libclang_rt.asan-arm-android.a" // CHECK-ASAN-ANDROID-STATICLIBASAN-NOT: "-lpthread" // CHECK-ASAN-ANDROID-STATICLIBASAN-NOT: "-lrt" // CHECK-ASAN-ANDROID-STATICLIBASAN-NOT: "-lresolv" @@ -210,7 +210,7 @@ // CHECK-UBSAN-ANDROID-NOT: "-lc" // CHECK-UBSAN-ANDROID-NOT: "-lpthread" // CHECK-UBSAN-ANDROID-NOT: "-lresolv" -// CHECK-UBSAN-ANDROID: libclang_rt.ubsan_standalone.so" +// CHECK-UBSAN-ANDROID: libclang_rt.ubsan_standalone-arm-android.so" // CHECK-UBSAN-ANDROID-NOT: "-lpthread" // CHECK-UBSAN-ANDROID-NOT: "-lresolv" @@ -221,7 +221,7 @@ // RUN: | FileCheck --check-prefix=CHECK-UBSAN-ANDROID-STATICLIBASAN %s // // CHECK-UBSAN-ANDROID-STATICLIBASAN: "{{(.*[^.0-9A-Z_a-z])?}}ld.lld{{(.exe)?}}" -// CHECK-UBSAN-ANDROID-STATICLIBASAN: libclang_rt.ubsan_standalone.a" +// CHECK-UBSAN-ANDROID-STATICLIBASAN: libclang_rt.ubsan_standalone-arm-android.a" // CHECK-UBSAN-ANDROID-STATICLIBASAN-NOT: "-lpthread" // CHECK-UBSAN-ANDROID-STATICLIBASAN-NOT: "-lrt" // CHECK-UBSAN-ANDROID-STATICLIBASAN-NOT: "-lresolv" @@ -237,7 +237,7 @@ // CHECK-ASAN-ANDROID-X86-NOT: "-lc" // CHECK-ASAN-ANDROID-X86-NOT: "-lpthread" // CHECK-ASAN-ANDROID-X86-NOT: "-lresolv" -// CHECK-ASAN-ANDROID-X86: libclang_rt.asan.so" +// CHECK-ASAN-ANDROID-X86: libclang_rt.asan-i686-android.so" // CHECK-ASAN-ANDROID-X86-NOT: "-lpthread" // CHECK-ASAN-ANDROID-X86-NOT: "-lresolv" // @@ -257,7 +257,7 @@ // // CHECK-ASAN-ANDROID-SHARED: "{{(.*[^.0-9A-Z_a-z])?}}ld.lld{{(.exe)?}}" // CHECK-ASAN-ANDROID-SHARED-NOT: "-lc" -// CHECK-ASAN-ANDROID-SHARED: libclang_rt.asan.so" +// CHECK-ASAN-ANDROID-SHARED: libclang_rt.asan-arm-android.so" // CHECK-ASAN-ANDROID-SHARED-NOT: "-lpthread" // CHECK-ASAN-ANDROID-SHARED-NOT: "-lresolv" @@ -347,7 +347,7 @@ // CHECK-UBSAN-LINUX: "{{.*}}ld{{(.exe)?}}" // CHECK-UBSAN-LINUX-NOT: libclang_rt.asan // CHECK-UBSAN-LINUX-NOT: libclang_rt.ubsan_standalone_cxx -// CHECK-UBSAN-LINUX: "--whole-archive" "{{.*}}libclang_rt.ubsan_standalone.a" "--no-whole-archive" +// CHECK-UBSAN-LINUX: "--whole-archive" "{{.*}}libclang_rt.ubsan_standalone-x32.a" "--no-whole-archive" // CHECK-UBSAN-LINUX-NOT: libclang_rt.asan // CHECK-UBSAN-LINUX-NOT: libclang_rt.ubsan_standalone_cxx // CHECK-UBSAN-LINUX-NOT: "-lstdc++" @@ -678,7 +678,7 @@ // RUN: --sysroot=%S/Inputs/basic_android_tree \ // RUN: | FileCheck --check-prefix=CHECK-CFI-CROSS-DSO-DIAG-ANDROID %s // CHECK-CFI-CROSS-DSO-DIAG-ANDROID: "{{.*}}ld{{(.exe)?}}" -// CHECK-CFI-CROSS-DSO-DIAG-ANDROID: "{{[^"]*}}libclang_rt.ubsan_standalone.so" +// CHECK-CFI-CROSS-DSO-DIAG-ANDROID: "{{[^"]*}}libclang_rt.ubsan_standalone-aarch64-android.so" // CHECK-CFI-CROSS-DSO-DIAG-ANDROID: "--export-dynamic-symbol=__cfi_check" // RUN: %clangxx -fsanitize=address -### %s 2>&1 \ @@ -929,7 +929,7 @@ // CHECK-SCUDO-ANDROID: "-pie" // CHECK-SCUDO-ANDROID-NOT: "-lpthread" // CHECK-SCUDO-ANDROID-NOT: "-lresolv" -// CHECK-SCUDO-ANDROID: libclang_rt.scudo_standalone.so" +// CHECK-SCUDO-ANDROID: libclang_rt.scudo_standalone-arm-android.so" // CHECK-SCUDO-ANDROID-NOT: "-lpthread" // CHECK-SCUDO-ANDROID-NOT: "-lresolv" @@ -940,7 +940,7 @@ // RUN: | FileCheck --check-prefix=CHECK-SCUDO-ANDROID-STATIC %s // CHECK-SCUDO-ANDROID-STATIC: "{{(.*[^.0-9A-Z_a-z])?}}ld.lld{{(.exe)?}}" // CHECK-SCUDO-ANDROID-STATIC: "-pie" -// CHECK-SCUDO-ANDROID-STATIC: "--whole-archive" "{{.*}}libclang_rt.scudo_standalone.a" "--no-whole-archive" +// CHECK-SCUDO-ANDROID-STATIC: "--whole-archive" "{{.*}}libclang_rt.scudo_standalone-arm-android.a" "--no-whole-archive" // CHECK-SCUDO-ANDROID-STATIC-NOT: "-lstdc++" // CHECK-SCUDO-ANDROID-STATIC-NOT: "-lpthread" // CHECK-SCUDO-ANDROID-STATIC-NOT: "-lrt" diff --git a/clang/test/Driver/wasm-toolchain.c b/clang/test/Driver/wasm-toolchain.c index dabf0ac2433bb..88590a3ba4c45 100644 --- a/clang/test/Driver/wasm-toolchain.c +++ b/clang/test/Driver/wasm-toolchain.c @@ -17,42 +17,42 @@ // RUN: %clang -### --target=wasm32-unknown-unknown --sysroot=/foo %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK %s // LINK: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// LINK: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // A basic C link command-line with optimization with unknown OS. // RUN: %clang -### -O2 --target=wasm32-unknown-unknown --sysroot=/foo %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_OPT %s // LINK_OPT: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_OPT: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// LINK_OPT: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // A basic C link command-line with known OS. // RUN: %clang -### --target=wasm32-wasi --sysroot=/foo %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_KNOWN %s // LINK_KNOWN: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// LINK_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // -shared should be passed through to `wasm-ld` and include crt1-reactor.o with a known OS. // RUN: %clang -### -shared -mexec-model=reactor --target=wasm32-wasi --sysroot=/foo %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_KNOWN_SHARED %s // LINK_KNOWN_SHARED: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_KNOWN_SHARED: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1-reactor.o" "--entry" "_initialize" "-shared" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// LINK_KNOWN_SHARED: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1-reactor.o" "--entry" "_initialize" "-shared" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // -shared should be passed through to `wasm-ld` and include crt1-reactor.o with an unknown OS. // RUN: %clang -### -shared -mexec-model=reactor --target=wasm32-unknown-unknown --sysroot=/foo %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_UNKNOWN_SHARED %s // LINK_UNKNOWN_SHARED: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_UNKNOWN_SHARED: wasm-ld{{.*}}" "crt1-reactor.o" "--entry" "_initialize" "-shared" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// LINK_UNKNOWN_SHARED: wasm-ld{{.*}}" "crt1-reactor.o" "--entry" "_initialize" "-shared" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // A basic C link command-line with optimization with known OS. // RUN: %clang -### -O2 --target=wasm32-wasi --sysroot=/foo %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_OPT_KNOWN %s // LINK_OPT_KNOWN: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_OPT_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// LINK_OPT_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // A basic C compile command-line with known OS. @@ -180,12 +180,12 @@ // RUN: %clang -### %s --target=wasm32-unknown-unknown --sysroot=%s/no-sysroot-there -mexec-model=command 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-COMMAND %s // CHECK-COMMAND: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// CHECK-COMMAND: wasm-ld{{.*}}" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// CHECK-COMMAND: wasm-ld{{.*}}" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // RUN: %clang -### %s --target=wasm32-unknown-unknown --sysroot=%s/no-sysroot-there -mexec-model=reactor 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-REACTOR %s // CHECK-REACTOR: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// CHECK-REACTOR: wasm-ld{{.*}}" "crt1-reactor.o" "--entry" "_initialize" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// CHECK-REACTOR: wasm-ld{{.*}}" "crt1-reactor.o" "--entry" "_initialize" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // -fPIC implies +mutable-globals @@ -204,7 +204,7 @@ // RUN: %clang -### -O2 --target=wasm32-wasip2 %s --sysroot /foo 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_WASIP2 %s // LINK_WASIP2: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_WASIP2: wasm-component-ld{{.*}}" "-L/foo/lib/wasm32-wasip2" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// LINK_WASIP2: wasm-component-ld{{.*}}" "-L/foo/lib/wasm32-wasip2" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // Test that on `wasm32-wasip2` the `wasm-component-ld` programs is told where // to find `wasm-ld` by default. diff --git a/clang/test/Driver/wasm-toolchain.cpp b/clang/test/Driver/wasm-toolchain.cpp index ba1c55b33edca..4af011097021f 100644 --- a/clang/test/Driver/wasm-toolchain.cpp +++ b/clang/test/Driver/wasm-toolchain.cpp @@ -17,48 +17,48 @@ // RUN: %clangxx -### --target=wasm32-unknown-unknown --sysroot=/foo --stdlib=libc++ %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK %s // LINK: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// LINK: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // RUN: %clangxx -### --target=wasm32-unknown-unknown --sysroot=/foo --stdlib=libstdc++ %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_STDCXX %s // LINK_STDCXX: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_STDCXX: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// LINK_STDCXX: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // A basic C++ link command-line with optimization with unknown OS. // RUN: %clangxx -### -O2 --target=wasm32-unknown-unknown --sysroot=/foo %s --stdlib=libc++ 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_OPT %s // LINK_OPT: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_OPT: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// LINK_OPT: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // RUN: %clangxx -### -O2 --target=wasm32-unknown-unknown --sysroot=/foo %s --stdlib=libstdc++ 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_OPT_STDCXX %s // LINK_OPT_STDCXX: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_OPT_STDCXX: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// LINK_OPT_STDCXX: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // A basic C++ link command-line with known OS. // RUN: %clangxx -### --target=wasm32-wasi --sysroot=/foo --stdlib=libc++ %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_KNOWN %s // LINK_KNOWN: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// LINK_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // RUN: %clangxx -### --target=wasm32-wasi --sysroot=/foo --stdlib=libstdc++ %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_KNOWN_STDCXX %s // LINK_KNOWN_STDCXX: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_KNOWN_STDCXX: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// LINK_KNOWN_STDCXX: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // A basic C++ link command-line with optimization with known OS. // RUN: %clangxx -### -O2 --target=wasm32-wasi --sysroot=/foo %s --stdlib=libc++ 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_OPT_KNOWN %s // LINK_OPT_KNOWN: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_OPT_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// LINK_OPT_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // RUN: %clangxx -### -O2 --target=wasm32-wasi --sysroot=/foo %s --stdlib=libstdc++ 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_OPT_KNOWN_STDCXX %s // LINK_OPT_KNOWN_STDCXX: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_OPT_KNOWN_STDCXX: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" +// LINK_OPT_KNOWN_STDCXX: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // A basic C++ compile command-line with known OS. diff --git a/clang/test/Driver/windows-cross.c b/clang/test/Driver/windows-cross.c index f6e831f00e13a..75490b992d78d 100644 --- a/clang/test/Driver/windows-cross.c +++ b/clang/test/Driver/windows-cross.c @@ -11,32 +11,32 @@ // RUN: %clang -### -target armv7-windows-itanium --sysroot %s/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -rtlib=compiler-rt -stdlib=libstdc++ -o /dev/null %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-RTLIB -// CHECK-RTLIB: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins.lib" +// CHECK-RTLIB: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib" // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -rtlib=compiler-rt -stdlib=libc++ -o /dev/null %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-C-LIBCXX -// CHECK-C-LIBCXX: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins.lib" +// CHECK-C-LIBCXX: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib" // RUN: %clangxx -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -rtlib=compiler-rt -stdlib=libc++ -o /dev/null %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-LIBCXX -// CHECK-LIBCXX: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lc++" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins.lib" +// CHECK-LIBCXX: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lc++" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib" // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -o shared.dll %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-SHARED -// CHECK-SHARED: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins.lib" +// CHECK-SHARED: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib" // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -static -o shared.dll %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-SHARED-STATIC -// CHECK-SHARED-STATIC: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bstatic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins.lib" +// CHECK-SHARED-STATIC: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bstatic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib" // RUN: %clang -### -target armv7-windows-itanium --sysroot %s/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -nostartfiles -o shared.dll %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-NOSTARTFILES -// CHECK-NOSTARTFILES: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins.lib" +// CHECK-NOSTARTFILES: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib" // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -nostartfiles -nodefaultlibs -o shared.dll %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-STANDALONE @@ -52,19 +52,19 @@ // RUN: | FileCheck %s --check-prefix CHECK-SANITIZE-ADDRESS // CHECK-SANITIZE-ADDRESS: "-fsanitize=address" -// CHECK-SANITIZE-ADDRESS: "{{.*}}clang_rt.asan_dll_thunk.lib" +// CHECK-SANITIZE-ADDRESS: "{{.*}}clang_rt.asan_dll_thunk-arm.lib" // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=lld-link2 -o test.exe -fsanitize=address -x c++ %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-SANITIZE-ADDRESS-EXE // CHECK-SANITIZE-ADDRESS-EXE: "-fsanitize=address" -// CHECK-SANITIZE-ADDRESS-EXE: "{{.*}}clang_rt.asan_dynamic.lib" "{{.*}}clang_rt.asan_dynamic_runtime_thunk.lib" "--undefined" "__asan_seh_interceptor" +// CHECK-SANITIZE-ADDRESS-EXE: "{{.*}}clang_rt.asan_dynamic-arm.lib" "{{.*}}clang_rt.asan_dynamic_runtime_thunk-arm.lib" "--undefined" "__asan_seh_interceptor" // RUN: %clang -### -target i686-windows-itanium -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=lld-link2 -o test.exe -fsanitize=address -x c++ %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-SANITIZE-ADDRESS-EXE-X86 // CHECK-SANITIZE-ADDRESS-EXE-X86: "-fsanitize=address" -// CHECK-SANITIZE-ADDRESS-EXE-X86: "{{.*}}clang_rt.asan_dynamic.lib" "{{.*}}clang_rt.asan_dynamic_runtime_thunk.lib" "--undefined" "___asan_seh_interceptor" +// CHECK-SANITIZE-ADDRESS-EXE-X86: "{{.*}}clang_rt.asan_dynamic-i386.lib" "{{.*}}clang_rt.asan_dynamic_runtime_thunk-i386.lib" "--undefined" "___asan_seh_interceptor" // RUN: not %clang -### --target=armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=lld-link2 -shared -o shared.dll -fsanitize=tsan -x c++ %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-SANITIZE-TSAN diff --git a/clang/test/Driver/zos-ld.c b/clang/test/Driver/zos-ld.c index 87d169936e129..4d4decdd0e65b 100644 --- a/clang/test/Driver/zos-ld.c +++ b/clang/test/Driver/zos-ld.c @@ -14,7 +14,7 @@ // C-LD-SAME: "-S" "//'SYS1.CSSLIB'" // C-LD-SAME: "//'CEE.SCEELIB(CELQS001)'" // C-LD-SAME: "//'CEE.SCEELIB(CELQS003)'" -// C-LD-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}s390x-ibm-zos{{/|\\\\}}libclang_rt.builtins.a" +// C-LD-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}zos{{/|\\\\}}libclang_rt.builtins-s390x.a" // 2. General C link for dll // RUN: %clang -### --shared --target=s390x-ibm-zos %s 2>&1 \ @@ -30,7 +30,7 @@ // C-LD-DLL-SAME: "-S" "//'SYS1.CSSLIB'" // C-LD-DLL-SAME: "//'CEE.SCEELIB(CELQS001)'" // C-LD-DLL-SAME: "//'CEE.SCEELIB(CELQS003)'" -// C-LD-DLL-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}s390x-ibm-zos{{/|\\\\}}libclang_rt.builtins.a" +// C-LD-DLL-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}zos{{/|\\\\}}libclang_rt.builtins-s390x.a" // 3. General C++ link for executable // RUN: %clangxx -### --target=s390x-ibm-zos %s 2>&1 \ @@ -52,7 +52,7 @@ // CXX-LD-SAME: "//'CEE.SCEELIB(CRTDQCXA)'" // CXX-LD-SAME: "//'CEE.SCEELIB(CRTDQXLA)'" // CXX-LD-SAME: "//'CEE.SCEELIB(CRTDQUNW)'" -// CXX-LD-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}s390x-ibm-zos{{/|\\\\}}libclang_rt.builtins.a" +// CXX-LD-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}zos{{/|\\\\}}libclang_rt.builtins-s390x.a" // 4. General C++ link for dll // RUN: %clangxx -### --shared --target=s390x-ibm-zos %s 2>&1 \ @@ -74,7 +74,7 @@ // CXX-LD-DLL-SAME: "//'CEE.SCEELIB(CRTDQCXA)'" // CXX-LD-DLL-SAME: "//'CEE.SCEELIB(CRTDQXLA)'" // CXX-LD-DLL-SAME: "//'CEE.SCEELIB(CRTDQUNW)'" -// CXX-LD-DLL-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}s390x-ibm-zos{{/|\\\\}}libclang_rt.builtins.a" +// CXX-LD-DLL-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}zos{{/|\\\\}}libclang_rt.builtins-s390x.a" // 5. C++ link for executable w/ -mzos-hlq-le=, -mzos-hlq-csslib= // RUN: %clangxx -### --target=s390x-ibm-zos %s 2>&1 \ @@ -97,7 +97,7 @@ // CXX-LD5-SAME: "//'AAAA.SCEELIB(CRTDQCXA)'" // CXX-LD5-SAME: "//'AAAA.SCEELIB(CRTDQXLA)'" // CXX-LD5-SAME: "//'AAAA.SCEELIB(CRTDQUNW)'" -// CXX-LD5-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}s390x-ibm-zos{{/|\\\\}}libclang_rt.builtins.a" +// CXX-LD5-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}zos{{/|\\\\}}libclang_rt.builtins-s390x.a" // 6. C++ link for executable w/ -mzos-hlq-clang= // RUN: %clangxx -### --target=s390x-ibm-zos %s 2>&1 \ @@ -120,4 +120,4 @@ // CXX-LD6-SAME: "//'AAAA.SCEELIB(CRTDQCXA)'" // CXX-LD6-SAME: "//'AAAA.SCEELIB(CRTDQXLA)'" // CXX-LD6-SAME: "//'AAAA.SCEELIB(CRTDQUNW)'" -// CXX-LD6-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}s390x-ibm-zos{{/|\\\\}}libclang_rt.builtins.a" +// CXX-LD6-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}zos{{/|\\\\}}libclang_rt.builtins-s390x.a" diff --git a/flang/test/Driver/msvc-dependent-lib-flags.f90 b/flang/test/Driver/msvc-dependent-lib-flags.f90 index 643dbe9e949cb..7c1f962e339f9 100644 --- a/flang/test/Driver/msvc-dependent-lib-flags.f90 +++ b/flang/test/Driver/msvc-dependent-lib-flags.f90 @@ -4,7 +4,7 @@ ! RUN: %flang -### --target=aarch64-windows-msvc -fms-runtime-lib=dll_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL-DEBUG ! MSVC: -fc1 -! MSVC-SAME: --dependent-lib=clang_rt.builtins.lib +! MSVC-SAME: --dependent-lib=clang_rt.builtins-aarch64.lib ! MSVC-SAME: -D_MT ! MSVC-SAME: --dependent-lib=libcmt ! MSVC-SAME: --dependent-lib=Fortran_main.static.lib @@ -12,7 +12,7 @@ ! MSVC-SAME: --dependent-lib=FortranDecimal.static.lib ! MSVC-DEBUG: -fc1 -! MSVC-DEBUG-SAME: --dependent-lib=clang_rt.builtins.lib +! MSVC-DEBUG-SAME: --dependent-lib=clang_rt.builtins-aarch64.lib ! MSVC-DEBUG-SAME: -D_MT ! MSVC-DEBUG-SAME: -D_DEBUG ! MSVC-DEBUG-SAME: --dependent-lib=libcmtd @@ -21,7 +21,7 @@ ! MSVC-DEBUG-SAME: --dependent-lib=FortranDecimal.static_dbg.lib ! MSVC-DLL: -fc1 -! MSVC-DLL-SAME: --dependent-lib=clang_rt.builtins.lib +! MSVC-DLL-SAME: --dependent-lib=clang_rt.builtins-aarch64.lib ! MSVC-DLL-SAME: -D_MT ! MSVC-DLL-SAME: -D_DLL ! MSVC-DLL-SAME: --dependent-lib=msvcrt @@ -30,7 +30,7 @@ ! MSVC-DLL-SAME: --dependent-lib=FortranDecimal.dynamic.lib ! MSVC-DLL-DEBUG: -fc1 -! MSVC-DLL-DEBUG-SAME: --dependent-lib=clang_rt.builtins.lib +! MSVC-DLL-DEBUG-SAME: --dependent-lib=clang_rt.builtins-aarch64.lib ! MSVC-DLL-DEBUG-SAME: -D_MT ! MSVC-DLL-DEBUG-SAME: -D_DEBUG ! MSVC-DLL-DEBUG-SAME: -D_DLL From fca51911d4668b3a6b79eb956327eb81fad3f40c Mon Sep 17 00:00:00 2001 From: Bill Wendling <5993918+bwendling@users.noreply.github.com> Date: Thu, 11 Apr 2024 00:33:40 +0000 Subject: [PATCH 085/886] [NFC][Clang] Improve const correctness for IdentifierInfo (#79365) The IdentifierInfo isn't typically modified. Use 'const' wherever possible. --- clang/include/clang/AST/ASTContext.h | 4 +- clang/include/clang/AST/Decl.h | 37 +++--- clang/include/clang/AST/DeclObjC.h | 89 +++++++------- clang/include/clang/AST/DeclTemplate.h | 8 +- clang/include/clang/AST/ExprCXX.h | 10 +- clang/include/clang/AST/ExternalASTSource.h | 2 +- clang/include/clang/AST/NestedNameSpecifier.h | 6 +- clang/include/clang/Analysis/SelectorExtras.h | 4 +- clang/include/clang/Basic/IdentifierTable.h | 25 ++-- .../clang/Lex/ExternalPreprocessorSource.h | 2 +- clang/include/clang/Lex/MacroInfo.h | 8 +- clang/include/clang/Lex/Preprocessor.h | 9 +- clang/include/clang/Parse/Parser.h | 16 +-- .../include/clang/Sema/CodeCompleteConsumer.h | 9 +- clang/include/clang/Sema/DeclSpec.h | 23 ++-- clang/include/clang/Sema/ParsedTemplate.h | 7 +- clang/include/clang/Sema/Sema.h | 89 +++++++------- clang/include/clang/Serialization/ASTReader.h | 10 +- clang/lib/ARCMigrate/ObjCMT.cpp | 7 +- clang/lib/ARCMigrate/TransAPIUses.cpp | 2 +- clang/lib/AST/ASTContext.cpp | 11 +- clang/lib/AST/ASTImporter.cpp | 6 +- clang/lib/AST/Decl.cpp | 18 +-- clang/lib/AST/DeclObjC.cpp | 94 ++++++--------- clang/lib/AST/DeclTemplate.cpp | 14 +-- clang/lib/AST/NSAPI.cpp | 104 ++++++---------- clang/lib/AST/NestedNameSpecifier.cpp | 18 +-- clang/lib/AST/SelectorLocationsKind.cpp | 4 +- clang/lib/AST/StmtPrinter.cpp | 4 +- clang/lib/AST/StmtProfile.cpp | 6 +- clang/lib/Analysis/ObjCNoReturn.cpp | 5 +- clang/lib/Basic/IdentifierTable.cpp | 16 +-- clang/lib/CodeGen/CGBlocks.cpp | 4 +- clang/lib/CodeGen/CGCUDANV.cpp | 2 +- clang/lib/CodeGen/CGDecl.cpp | 4 +- clang/lib/CodeGen/CGObjC.cpp | 23 ++-- clang/lib/CodeGen/CGObjCMac.cpp | 13 +- clang/lib/CodeGen/CodeGenFunction.cpp | 2 +- clang/lib/CodeGen/CodeGenModule.cpp | 6 +- .../Frontend/Rewrite/RewriteModernObjC.cpp | 2 +- clang/lib/Lex/HeaderSearch.cpp | 3 +- clang/lib/Lex/MacroInfo.cpp | 2 +- clang/lib/Lex/PPLexerChange.cpp | 9 +- clang/lib/Lex/PPMacroExpansion.cpp | 4 +- clang/lib/Lex/Preprocessor.cpp | 2 +- clang/lib/Parse/ParseDecl.cpp | 2 +- clang/lib/Parse/ParseDeclCXX.cpp | 2 +- clang/lib/Parse/ParseExprCXX.cpp | 5 +- clang/lib/Parse/ParseObjc.cpp | 12 +- clang/lib/Parse/ParseTemplate.cpp | 4 +- clang/lib/Sema/CodeCompleteConsumer.cpp | 3 +- clang/lib/Sema/Sema.cpp | 5 +- clang/lib/Sema/SemaCodeComplete.cpp | 112 +++++++++--------- clang/lib/Sema/SemaDecl.cpp | 21 ++-- clang/lib/Sema/SemaDeclCXX.cpp | 9 +- clang/lib/Sema/SemaDeclObjC.cpp | 23 ++-- clang/lib/Sema/SemaExprCXX.cpp | 24 ++-- clang/lib/Sema/SemaExprObjC.cpp | 17 ++- clang/lib/Sema/SemaObjCProperty.cpp | 4 +- clang/lib/Sema/SemaOpenMP.cpp | 2 +- clang/lib/Sema/SemaPseudoObject.cpp | 48 ++++---- clang/lib/Sema/SemaStmt.cpp | 8 +- clang/lib/Sema/SemaTemplate.cpp | 35 +++--- clang/lib/Serialization/ASTCommon.cpp | 2 +- clang/lib/Serialization/ASTReader.cpp | 14 +-- clang/lib/Serialization/ASTWriter.cpp | 18 +-- .../Checkers/CheckObjCDealloc.cpp | 4 +- .../Checkers/LocalizationChecker.cpp | 108 ++++++++--------- .../Checkers/NullabilityChecker.cpp | 3 +- .../Checkers/ObjCMissingSuperCallChecker.cpp | 2 +- .../Checkers/ObjCSuperDeallocChecker.cpp | 4 +- clang/tools/libclang/CIndexCodeCompletion.cpp | 10 +- .../Plugins/ExpressionParser/Clang/ASTUtils.h | 2 +- .../ExpressionParser/Clang/ClangASTSource.cpp | 7 +- .../AppleObjCRuntime/AppleObjCDeclVendor.cpp | 4 +- .../TypeSystem/Clang/TypeSystemClang.cpp | 11 +- 76 files changed, 602 insertions(+), 666 deletions(-) diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 08f71051e6cbf..28f8d67811f0a 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -3411,13 +3411,13 @@ const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB, /// Utility function for constructing a nullary selector. inline Selector GetNullarySelector(StringRef name, ASTContext &Ctx) { - IdentifierInfo* II = &Ctx.Idents.get(name); + const IdentifierInfo *II = &Ctx.Idents.get(name); return Ctx.Selectors.getSelector(0, &II); } /// Utility function for constructing an unary selector. inline Selector GetUnarySelector(StringRef name, ASTContext &Ctx) { - IdentifierInfo* II = &Ctx.Idents.get(name); + const IdentifierInfo *II = &Ctx.Idents.get(name); return Ctx.Selectors.getSelector(1, &II); } diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 5f1f83bb00282..ed6790acdfc7c 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -1731,7 +1731,7 @@ class ImplicitParamDecl : public VarDecl { static ImplicitParamDecl *CreateDeserialized(ASTContext &C, unsigned ID); ImplicitParamDecl(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, - IdentifierInfo *Id, QualType Type, + const IdentifierInfo *Id, QualType Type, ImplicitParamKind ParamKind) : VarDecl(ImplicitParam, C, DC, IdLoc, IdLoc, Id, Type, /*TInfo=*/nullptr, SC_None) { @@ -1765,7 +1765,7 @@ class ParmVarDecl : public VarDecl { protected: ParmVarDecl(Kind DK, ASTContext &C, DeclContext *DC, SourceLocation StartLoc, - SourceLocation IdLoc, IdentifierInfo *Id, QualType T, + SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg) : VarDecl(DK, C, DC, StartLoc, IdLoc, Id, T, TInfo, S) { assert(ParmVarDeclBits.HasInheritedDefaultArg == false); @@ -1777,10 +1777,10 @@ class ParmVarDecl : public VarDecl { public: static ParmVarDecl *Create(ASTContext &C, DeclContext *DC, - SourceLocation StartLoc, - SourceLocation IdLoc, IdentifierInfo *Id, - QualType T, TypeSourceInfo *TInfo, - StorageClass S, Expr *DefArg); + SourceLocation StartLoc, SourceLocation IdLoc, + const IdentifierInfo *Id, QualType T, + TypeSourceInfo *TInfo, StorageClass S, + Expr *DefArg); static ParmVarDecl *CreateDeserialized(ASTContext &C, unsigned ID); @@ -3095,7 +3095,7 @@ class FieldDecl : public DeclaratorDecl, public Mergeable { protected: FieldDecl(Kind DK, DeclContext *DC, SourceLocation StartLoc, - SourceLocation IdLoc, IdentifierInfo *Id, QualType T, + SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle) : DeclaratorDecl(DK, DC, IdLoc, Id, T, TInfo, StartLoc), BitField(false), @@ -3111,7 +3111,7 @@ class FieldDecl : public DeclaratorDecl, public Mergeable { static FieldDecl *Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, - IdentifierInfo *Id, QualType T, + const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle); @@ -3332,8 +3332,9 @@ class IndirectFieldDecl : public ValueDecl, friend class ASTDeclReader; static IndirectFieldDecl *Create(ASTContext &C, DeclContext *DC, - SourceLocation L, IdentifierInfo *Id, - QualType T, llvm::MutableArrayRef CH); + SourceLocation L, const IdentifierInfo *Id, + QualType T, + llvm::MutableArrayRef CH); static IndirectFieldDecl *CreateDeserialized(ASTContext &C, unsigned ID); @@ -3381,9 +3382,9 @@ class TypeDecl : public NamedDecl { void anchor() override; protected: - TypeDecl(Kind DK, DeclContext *DC, SourceLocation L, IdentifierInfo *Id, + TypeDecl(Kind DK, DeclContext *DC, SourceLocation L, const IdentifierInfo *Id, SourceLocation StartL = SourceLocation()) - : NamedDecl(DK, DC, L, Id), LocStart(StartL) {} + : NamedDecl(DK, DC, L, Id), LocStart(StartL) {} public: // Low-level accessor. If you just want the type defined by this node, @@ -3425,7 +3426,7 @@ class TypedefNameDecl : public TypeDecl, public Redeclarable { protected: TypedefNameDecl(Kind DK, ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, - IdentifierInfo *Id, TypeSourceInfo *TInfo) + const IdentifierInfo *Id, TypeSourceInfo *TInfo) : TypeDecl(DK, DC, IdLoc, Id, StartLoc), redeclarable_base(C), MaybeModedTInfo(TInfo, 0) {} @@ -3512,13 +3513,14 @@ class TypedefNameDecl : public TypeDecl, public Redeclarable { /// type specifier. class TypedefDecl : public TypedefNameDecl { TypedefDecl(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, - SourceLocation IdLoc, IdentifierInfo *Id, TypeSourceInfo *TInfo) + SourceLocation IdLoc, const IdentifierInfo *Id, + TypeSourceInfo *TInfo) : TypedefNameDecl(Typedef, C, DC, StartLoc, IdLoc, Id, TInfo) {} public: static TypedefDecl *Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, - IdentifierInfo *Id, TypeSourceInfo *TInfo); + const IdentifierInfo *Id, TypeSourceInfo *TInfo); static TypedefDecl *CreateDeserialized(ASTContext &C, unsigned ID); SourceRange getSourceRange() const override LLVM_READONLY; @@ -3535,14 +3537,15 @@ class TypeAliasDecl : public TypedefNameDecl { TypeAliasTemplateDecl *Template; TypeAliasDecl(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, - SourceLocation IdLoc, IdentifierInfo *Id, TypeSourceInfo *TInfo) + SourceLocation IdLoc, const IdentifierInfo *Id, + TypeSourceInfo *TInfo) : TypedefNameDecl(TypeAlias, C, DC, StartLoc, IdLoc, Id, TInfo), Template(nullptr) {} public: static TypeAliasDecl *Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, - IdentifierInfo *Id, TypeSourceInfo *TInfo); + const IdentifierInfo *Id, TypeSourceInfo *TInfo); static TypeAliasDecl *CreateDeserialized(ASTContext &C, unsigned ID); SourceRange getSourceRange() const override LLVM_READONLY; diff --git a/clang/include/clang/AST/DeclObjC.h b/clang/include/clang/AST/DeclObjC.h index f8f894b4b10d1..b8d17dd06d155 100644 --- a/clang/include/clang/AST/DeclObjC.h +++ b/clang/include/clang/AST/DeclObjC.h @@ -772,7 +772,7 @@ class ObjCPropertyDecl : public NamedDecl { // Synthesize ivar for this property ObjCIvarDecl *PropertyIvarDecl = nullptr; - ObjCPropertyDecl(DeclContext *DC, SourceLocation L, IdentifierInfo *Id, + ObjCPropertyDecl(DeclContext *DC, SourceLocation L, const IdentifierInfo *Id, SourceLocation AtLocation, SourceLocation LParenLocation, QualType T, TypeSourceInfo *TSI, PropertyControl propControl) : NamedDecl(ObjCProperty, DC, L, Id), AtLoc(AtLocation), @@ -782,10 +782,12 @@ class ObjCPropertyDecl : public NamedDecl { PropertyImplementation(propControl) {} public: - static ObjCPropertyDecl * - Create(ASTContext &C, DeclContext *DC, SourceLocation L, IdentifierInfo *Id, - SourceLocation AtLocation, SourceLocation LParenLocation, QualType T, - TypeSourceInfo *TSI, PropertyControl propControl = None); + static ObjCPropertyDecl *Create(ASTContext &C, DeclContext *DC, + SourceLocation L, const IdentifierInfo *Id, + SourceLocation AtLocation, + SourceLocation LParenLocation, QualType T, + TypeSourceInfo *TSI, + PropertyControl propControl = None); static ObjCPropertyDecl *CreateDeserialized(ASTContext &C, unsigned ID); @@ -952,7 +954,7 @@ class ObjCContainerDecl : public NamedDecl, public DeclContext { void anchor() override; public: - ObjCContainerDecl(Kind DK, DeclContext *DC, IdentifierInfo *Id, + ObjCContainerDecl(Kind DK, DeclContext *DC, const IdentifierInfo *Id, SourceLocation nameLoc, SourceLocation atStartLoc); // Iterator access to instance/class properties. @@ -1240,7 +1242,7 @@ class ObjCInterfaceDecl : public ObjCContainerDecl llvm::PointerIntPair Data; ObjCInterfaceDecl(const ASTContext &C, DeclContext *DC, SourceLocation AtLoc, - IdentifierInfo *Id, ObjCTypeParamList *typeParamList, + const IdentifierInfo *Id, ObjCTypeParamList *typeParamList, SourceLocation CLoc, ObjCInterfaceDecl *PrevDecl, bool IsInternal); @@ -1271,13 +1273,11 @@ class ObjCInterfaceDecl : public ObjCContainerDecl } public: - static ObjCInterfaceDecl *Create(const ASTContext &C, DeclContext *DC, - SourceLocation atLoc, - IdentifierInfo *Id, - ObjCTypeParamList *typeParamList, - ObjCInterfaceDecl *PrevDecl, - SourceLocation ClassLoc = SourceLocation(), - bool isInternal = false); + static ObjCInterfaceDecl * + Create(const ASTContext &C, DeclContext *DC, SourceLocation atLoc, + const IdentifierInfo *Id, ObjCTypeParamList *typeParamList, + ObjCInterfaceDecl *PrevDecl, + SourceLocation ClassLoc = SourceLocation(), bool isInternal = false); static ObjCInterfaceDecl *CreateDeserialized(const ASTContext &C, unsigned ID); @@ -1338,7 +1338,8 @@ class ObjCInterfaceDecl : public ObjCContainerDecl ObjCImplementationDecl *getImplementation() const; void setImplementation(ObjCImplementationDecl *ImplD); - ObjCCategoryDecl *FindCategoryDeclaration(IdentifierInfo *CategoryId) const; + ObjCCategoryDecl * + FindCategoryDeclaration(const IdentifierInfo *CategoryId) const; // Get the local instance/class method declared in a category. ObjCMethodDecl *getCategoryInstanceMethod(Selector Sel) const; @@ -1794,9 +1795,9 @@ class ObjCInterfaceDecl : public ObjCContainerDecl data().CategoryList = category; } - ObjCPropertyDecl - *FindPropertyVisibleInPrimaryClass(IdentifierInfo *PropertyId, - ObjCPropertyQueryKind QueryKind) const; + ObjCPropertyDecl * + FindPropertyVisibleInPrimaryClass(const IdentifierInfo *PropertyId, + ObjCPropertyQueryKind QueryKind) const; void collectPropertiesToImplement(PropertyMap &PM) const override; @@ -1954,8 +1955,8 @@ class ObjCIvarDecl : public FieldDecl { private: ObjCIvarDecl(ObjCContainerDecl *DC, SourceLocation StartLoc, - SourceLocation IdLoc, IdentifierInfo *Id, - QualType T, TypeSourceInfo *TInfo, AccessControl ac, Expr *BW, + SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, + TypeSourceInfo *TInfo, AccessControl ac, Expr *BW, bool synthesized) : FieldDecl(ObjCIvar, DC, StartLoc, IdLoc, Id, T, TInfo, BW, /*Mutable=*/false, /*HasInit=*/ICIS_NoInit), @@ -1964,10 +1965,9 @@ class ObjCIvarDecl : public FieldDecl { public: static ObjCIvarDecl *Create(ASTContext &C, ObjCContainerDecl *DC, SourceLocation StartLoc, SourceLocation IdLoc, - IdentifierInfo *Id, QualType T, - TypeSourceInfo *TInfo, - AccessControl ac, Expr *BW = nullptr, - bool synthesized=false); + const IdentifierInfo *Id, QualType T, + TypeSourceInfo *TInfo, AccessControl ac, + Expr *BW = nullptr, bool synthesized = false); static ObjCIvarDecl *CreateDeserialized(ASTContext &C, unsigned ID); @@ -2343,7 +2343,7 @@ class ObjCCategoryDecl : public ObjCContainerDecl { ObjCCategoryDecl(DeclContext *DC, SourceLocation AtLoc, SourceLocation ClassNameLoc, SourceLocation CategoryNameLoc, - IdentifierInfo *Id, ObjCInterfaceDecl *IDecl, + const IdentifierInfo *Id, ObjCInterfaceDecl *IDecl, ObjCTypeParamList *typeParamList, SourceLocation IvarLBraceLoc = SourceLocation(), SourceLocation IvarRBraceLoc = SourceLocation()); @@ -2354,15 +2354,13 @@ class ObjCCategoryDecl : public ObjCContainerDecl { friend class ASTDeclReader; friend class ASTDeclWriter; - static ObjCCategoryDecl *Create(ASTContext &C, DeclContext *DC, - SourceLocation AtLoc, - SourceLocation ClassNameLoc, - SourceLocation CategoryNameLoc, - IdentifierInfo *Id, - ObjCInterfaceDecl *IDecl, - ObjCTypeParamList *typeParamList, - SourceLocation IvarLBraceLoc=SourceLocation(), - SourceLocation IvarRBraceLoc=SourceLocation()); + static ObjCCategoryDecl * + Create(ASTContext &C, DeclContext *DC, SourceLocation AtLoc, + SourceLocation ClassNameLoc, SourceLocation CategoryNameLoc, + const IdentifierInfo *Id, ObjCInterfaceDecl *IDecl, + ObjCTypeParamList *typeParamList, + SourceLocation IvarLBraceLoc = SourceLocation(), + SourceLocation IvarRBraceLoc = SourceLocation()); static ObjCCategoryDecl *CreateDeserialized(ASTContext &C, unsigned ID); ObjCInterfaceDecl *getClassInterface() { return ClassInterface; } @@ -2472,10 +2470,9 @@ class ObjCImplDecl : public ObjCContainerDecl { void anchor() override; protected: - ObjCImplDecl(Kind DK, DeclContext *DC, - ObjCInterfaceDecl *classInterface, - IdentifierInfo *Id, - SourceLocation nameLoc, SourceLocation atStartLoc) + ObjCImplDecl(Kind DK, DeclContext *DC, ObjCInterfaceDecl *classInterface, + const IdentifierInfo *Id, SourceLocation nameLoc, + SourceLocation atStartLoc) : ObjCContainerDecl(DK, DC, Id, nameLoc, atStartLoc), ClassInterface(classInterface) {} @@ -2543,12 +2540,12 @@ class ObjCCategoryImplDecl : public ObjCImplDecl { // Category name location SourceLocation CategoryNameLoc; - ObjCCategoryImplDecl(DeclContext *DC, IdentifierInfo *Id, + ObjCCategoryImplDecl(DeclContext *DC, const IdentifierInfo *Id, ObjCInterfaceDecl *classInterface, SourceLocation nameLoc, SourceLocation atStartLoc, SourceLocation CategoryNameLoc) - : ObjCImplDecl(ObjCCategoryImpl, DC, classInterface, Id, - nameLoc, atStartLoc), + : ObjCImplDecl(ObjCCategoryImpl, DC, classInterface, Id, nameLoc, + atStartLoc), CategoryNameLoc(CategoryNameLoc) {} void anchor() override; @@ -2557,12 +2554,10 @@ class ObjCCategoryImplDecl : public ObjCImplDecl { friend class ASTDeclReader; friend class ASTDeclWriter; - static ObjCCategoryImplDecl *Create(ASTContext &C, DeclContext *DC, - IdentifierInfo *Id, - ObjCInterfaceDecl *classInterface, - SourceLocation nameLoc, - SourceLocation atStartLoc, - SourceLocation CategoryNameLoc); + static ObjCCategoryImplDecl * + Create(ASTContext &C, DeclContext *DC, const IdentifierInfo *Id, + ObjCInterfaceDecl *classInterface, SourceLocation nameLoc, + SourceLocation atStartLoc, SourceLocation CategoryNameLoc); static ObjCCategoryImplDecl *CreateDeserialized(ASTContext &C, unsigned ID); ObjCCategoryDecl *getCategoryDecl() const; diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index e3b6a7efb1127..cb598cb81840d 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -1389,14 +1389,14 @@ class NonTypeTemplateParmDecl final NonTypeTemplateParmDecl(DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, unsigned D, unsigned P, - IdentifierInfo *Id, QualType T, + const IdentifierInfo *Id, QualType T, bool ParameterPack, TypeSourceInfo *TInfo) : DeclaratorDecl(NonTypeTemplateParm, DC, IdLoc, Id, T, TInfo, StartLoc), TemplateParmPosition(D, P), ParameterPack(ParameterPack) {} NonTypeTemplateParmDecl(DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, unsigned D, unsigned P, - IdentifierInfo *Id, QualType T, + const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, ArrayRef ExpandedTypes, ArrayRef ExpandedTInfos); @@ -1404,12 +1404,12 @@ class NonTypeTemplateParmDecl final public: static NonTypeTemplateParmDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, - SourceLocation IdLoc, unsigned D, unsigned P, IdentifierInfo *Id, + SourceLocation IdLoc, unsigned D, unsigned P, const IdentifierInfo *Id, QualType T, bool ParameterPack, TypeSourceInfo *TInfo); static NonTypeTemplateParmDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, - SourceLocation IdLoc, unsigned D, unsigned P, IdentifierInfo *Id, + SourceLocation IdLoc, unsigned D, unsigned P, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, ArrayRef ExpandedTypes, ArrayRef ExpandedTInfos); diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index 7eb99a75e1fc9..d28e5c3a78ee4 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -2559,7 +2559,7 @@ class CXXDeleteExpr : public Expr { class PseudoDestructorTypeStorage { /// Either the type source information or the name of the type, if /// it couldn't be resolved due to type-dependence. - llvm::PointerUnion Type; + llvm::PointerUnion Type; /// The starting source location of the pseudo-destructor type. SourceLocation Location; @@ -2567,7 +2567,7 @@ class PseudoDestructorTypeStorage { public: PseudoDestructorTypeStorage() = default; - PseudoDestructorTypeStorage(IdentifierInfo *II, SourceLocation Loc) + PseudoDestructorTypeStorage(const IdentifierInfo *II, SourceLocation Loc) : Type(II), Location(Loc) {} PseudoDestructorTypeStorage(TypeSourceInfo *Info); @@ -2576,8 +2576,8 @@ class PseudoDestructorTypeStorage { return Type.dyn_cast(); } - IdentifierInfo *getIdentifier() const { - return Type.dyn_cast(); + const IdentifierInfo *getIdentifier() const { + return Type.dyn_cast(); } SourceLocation getLocation() const { return Location; } @@ -2708,7 +2708,7 @@ class CXXPseudoDestructorExpr : public Expr { /// In a dependent pseudo-destructor expression for which we do not /// have full type information on the destroyed type, provides the name /// of the destroyed type. - IdentifierInfo *getDestroyedTypeIdentifier() const { + const IdentifierInfo *getDestroyedTypeIdentifier() const { return DestroyedType.getIdentifier(); } diff --git a/clang/include/clang/AST/ExternalASTSource.h b/clang/include/clang/AST/ExternalASTSource.h index 8e573965b0a33..230c83943c222 100644 --- a/clang/include/clang/AST/ExternalASTSource.h +++ b/clang/include/clang/AST/ExternalASTSource.h @@ -138,7 +138,7 @@ class ExternalASTSource : public RefCountedBase { virtual CXXBaseSpecifier *GetExternalCXXBaseSpecifiers(uint64_t Offset); /// Update an out-of-date identifier. - virtual void updateOutOfDateIdentifier(IdentifierInfo &II) {} + virtual void updateOutOfDateIdentifier(const IdentifierInfo &II) {} /// Find all declarations with the given name in the given context, /// and add them to the context by calling SetExternalVisibleDeclsForName diff --git a/clang/include/clang/AST/NestedNameSpecifier.h b/clang/include/clang/AST/NestedNameSpecifier.h index 3b6cf97211850..7b0c21b9e7cfb 100644 --- a/clang/include/clang/AST/NestedNameSpecifier.h +++ b/clang/include/clang/AST/NestedNameSpecifier.h @@ -124,7 +124,7 @@ class NestedNameSpecifier : public llvm::FoldingSetNode { /// cannot be resolved. static NestedNameSpecifier *Create(const ASTContext &Context, NestedNameSpecifier *Prefix, - IdentifierInfo *II); + const IdentifierInfo *II); /// Builds a nested name specifier that names a namespace. static NestedNameSpecifier *Create(const ASTContext &Context, @@ -134,7 +134,7 @@ class NestedNameSpecifier : public llvm::FoldingSetNode { /// Builds a nested name specifier that names a namespace alias. static NestedNameSpecifier *Create(const ASTContext &Context, NestedNameSpecifier *Prefix, - NamespaceAliasDecl *Alias); + const NamespaceAliasDecl *Alias); /// Builds a nested name specifier that names a type. static NestedNameSpecifier *Create(const ASTContext &Context, @@ -148,7 +148,7 @@ class NestedNameSpecifier : public llvm::FoldingSetNode { /// nested name specifier, e.g., in "x->Base::f", the "x" has a dependent /// type. static NestedNameSpecifier *Create(const ASTContext &Context, - IdentifierInfo *II); + const IdentifierInfo *II); /// Returns the nested name specifier representing the global /// scope. diff --git a/clang/include/clang/Analysis/SelectorExtras.h b/clang/include/clang/Analysis/SelectorExtras.h index 1e1daf5706bbf..ac2c2519beae3 100644 --- a/clang/include/clang/Analysis/SelectorExtras.h +++ b/clang/include/clang/Analysis/SelectorExtras.h @@ -15,10 +15,10 @@ namespace clang { template static inline Selector getKeywordSelector(ASTContext &Ctx, - IdentifierInfos *... IIs) { + const IdentifierInfos *...IIs) { static_assert(sizeof...(IdentifierInfos) > 0, "keyword selectors must have at least one argument"); - SmallVector II({&Ctx.Idents.get(IIs)...}); + SmallVector II({&Ctx.Idents.get(IIs)...}); return Ctx.Selectors.getSelector(II.size(), &II[0]); } diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index a091639bfa254..a893e6f4d3d39 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -913,12 +913,13 @@ class alignas(IdentifierInfoAlignment) MultiKeywordSelector public: // Constructor for keyword selectors. - MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) + MultiKeywordSelector(unsigned nKeys, const IdentifierInfo **IIV) : DeclarationNameExtra(nKeys) { assert((nKeys > 1) && "not a multi-keyword selector"); // Fill in the trailing keyword array. - IdentifierInfo **KeyInfo = reinterpret_cast(this + 1); + const IdentifierInfo **KeyInfo = + reinterpret_cast(this + 1); for (unsigned i = 0; i != nKeys; ++i) KeyInfo[i] = IIV[i]; } @@ -928,7 +929,7 @@ class alignas(IdentifierInfoAlignment) MultiKeywordSelector using DeclarationNameExtra::getNumArgs; - using keyword_iterator = IdentifierInfo *const *; + using keyword_iterator = const IdentifierInfo *const *; keyword_iterator keyword_begin() const { return reinterpret_cast(this + 1); @@ -938,7 +939,7 @@ class alignas(IdentifierInfoAlignment) MultiKeywordSelector return keyword_begin() + getNumArgs(); } - IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const { + const IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const { assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index"); return keyword_begin()[i]; } @@ -991,10 +992,10 @@ class Selector { /// Do not reorder or add any arguments to this template /// without thoroughly understanding how tightly coupled these classes are. llvm::PointerIntPair< - llvm::PointerUnion, 2> + llvm::PointerUnion, 2> InfoPtr; - Selector(IdentifierInfo *II, unsigned nArgs) { + Selector(const IdentifierInfo *II, unsigned nArgs) { assert(nArgs < 2 && "nArgs not equal to 0/1"); InfoPtr.setPointerAndInt(II, nArgs + 1); } @@ -1006,8 +1007,8 @@ class Selector { InfoPtr.setPointerAndInt(SI, MultiArg & 0b11); } - IdentifierInfo *getAsIdentifierInfo() const { - return InfoPtr.getPointer().dyn_cast(); + const IdentifierInfo *getAsIdentifierInfo() const { + return InfoPtr.getPointer().dyn_cast(); } MultiKeywordSelector *getMultiKeywordSelector() const { @@ -1075,7 +1076,7 @@ class Selector { /// /// \returns the uniqued identifier for this slot, or NULL if this slot has /// no corresponding identifier. - IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const; + const IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const; /// Retrieve the name at a given position in the selector. /// @@ -1132,13 +1133,13 @@ class SelectorTable { /// /// \p NumArgs indicates whether this is a no argument selector "foo", a /// single argument selector "foo:" or multi-argument "foo:bar:". - Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV); + Selector getSelector(unsigned NumArgs, const IdentifierInfo **IIV); - Selector getUnarySelector(IdentifierInfo *ID) { + Selector getUnarySelector(const IdentifierInfo *ID) { return Selector(ID, 1); } - Selector getNullarySelector(IdentifierInfo *ID) { + Selector getNullarySelector(const IdentifierInfo *ID) { return Selector(ID, 0); } diff --git a/clang/include/clang/Lex/ExternalPreprocessorSource.h b/clang/include/clang/Lex/ExternalPreprocessorSource.h index 685941b66bd8b..6775841860373 100644 --- a/clang/include/clang/Lex/ExternalPreprocessorSource.h +++ b/clang/include/clang/Lex/ExternalPreprocessorSource.h @@ -31,7 +31,7 @@ class ExternalPreprocessorSource { virtual void ReadDefinedMacros() = 0; /// Update an out-of-date identifier. - virtual void updateOutOfDateIdentifier(IdentifierInfo &II) = 0; + virtual void updateOutOfDateIdentifier(const IdentifierInfo &II) = 0; /// Return the identifier associated with the given ID number. /// diff --git a/clang/include/clang/Lex/MacroInfo.h b/clang/include/clang/Lex/MacroInfo.h index 1237fc62eb6cf..19a706216d509 100644 --- a/clang/include/clang/Lex/MacroInfo.h +++ b/clang/include/clang/Lex/MacroInfo.h @@ -515,7 +515,7 @@ class ModuleMacro : public llvm::FoldingSetNode { friend class Preprocessor; /// The name defined by the macro. - IdentifierInfo *II; + const IdentifierInfo *II; /// The body of the #define, or nullptr if this is a #undef. MacroInfo *Macro; @@ -529,7 +529,7 @@ class ModuleMacro : public llvm::FoldingSetNode { /// The number of modules whose macros are directly overridden by this one. unsigned NumOverrides; - ModuleMacro(Module *OwningModule, IdentifierInfo *II, MacroInfo *Macro, + ModuleMacro(Module *OwningModule, const IdentifierInfo *II, MacroInfo *Macro, ArrayRef Overrides) : II(II), Macro(Macro), OwningModule(OwningModule), NumOverrides(Overrides.size()) { @@ -539,7 +539,7 @@ class ModuleMacro : public llvm::FoldingSetNode { public: static ModuleMacro *create(Preprocessor &PP, Module *OwningModule, - IdentifierInfo *II, MacroInfo *Macro, + const IdentifierInfo *II, MacroInfo *Macro, ArrayRef Overrides); void Profile(llvm::FoldingSetNodeID &ID) const { @@ -553,7 +553,7 @@ class ModuleMacro : public llvm::FoldingSetNode { } /// Get the name of the macro. - IdentifierInfo *getName() const { return II; } + const IdentifierInfo *getName() const { return II; } /// Get the ID of the module that exports this macro. Module *getOwningModule() const { return OwningModule; } diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 0836b7d439bb0..e89b4a2c5230e 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -836,7 +836,7 @@ class Preprocessor { ModuleMacroInfo *getModuleInfo(Preprocessor &PP, const IdentifierInfo *II) const { if (II->isOutOfDate()) - PP.updateOutOfDateIdentifier(const_cast(*II)); + PP.updateOutOfDateIdentifier(*II); // FIXME: Find a spare bit on IdentifierInfo and store a // HasModuleMacros flag. if (!II->hasMacroDefinition() || @@ -1162,7 +1162,7 @@ class Preprocessor { /// skipped. llvm::DenseMap RecordedSkippedRanges; - void updateOutOfDateIdentifier(IdentifierInfo &II) const; + void updateOutOfDateIdentifier(const IdentifierInfo &II) const; public: Preprocessor(std::shared_ptr PPOpts, @@ -1432,14 +1432,15 @@ class Preprocessor { MacroDirective *MD); /// Register an exported macro for a module and identifier. - ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro, + ModuleMacro *addModuleMacro(Module *Mod, const IdentifierInfo *II, + MacroInfo *Macro, ArrayRef Overrides, bool &IsNew); ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II); /// Get the list of leaf (non-overridden) module macros for a name. ArrayRef getLeafModuleMacros(const IdentifierInfo *II) const { if (II->isOutOfDate()) - updateOutOfDateIdentifier(const_cast(*II)); + updateOutOfDateIdentifier(*II); auto I = LeafModuleMacros.find(II); if (I != LeafModuleMacros.end()) return I->second; diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 8bc929b1dfe4b..3a055c10ffb38 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -329,7 +329,7 @@ class Parser : public CodeCompletionHandler { }; /// Identifiers which have been declared within a tentative parse. - SmallVector TentativelyDeclaredIdentifiers; + SmallVector TentativelyDeclaredIdentifiers; /// Tracker for '<' tokens that might have been intended to be treated as an /// angle bracket instead of a less-than comparison. @@ -1927,15 +1927,11 @@ class Parser : public CodeCompletionHandler { bool EnteringContext, IdentifierInfo &II, CXXScopeSpec &SS); - bool ParseOptionalCXXScopeSpecifier(CXXScopeSpec &SS, - ParsedType ObjectType, - bool ObjectHasErrors, - bool EnteringContext, - bool *MayBePseudoDestructor = nullptr, - bool IsTypename = false, - IdentifierInfo **LastII = nullptr, - bool OnlyNamespace = false, - bool InUsingDeclaration = false); + bool ParseOptionalCXXScopeSpecifier( + CXXScopeSpec &SS, ParsedType ObjectType, bool ObjectHasErrors, + bool EnteringContext, bool *MayBePseudoDestructor = nullptr, + bool IsTypename = false, const IdentifierInfo **LastII = nullptr, + bool OnlyNamespace = false, bool InUsingDeclaration = false); //===--------------------------------------------------------------------===// // C++11 5.1.2: Lambda expressions diff --git a/clang/include/clang/Sema/CodeCompleteConsumer.h b/clang/include/clang/Sema/CodeCompleteConsumer.h index a2028e40f83d5..0924dc27af82b 100644 --- a/clang/include/clang/Sema/CodeCompleteConsumer.h +++ b/clang/include/clang/Sema/CodeCompleteConsumer.h @@ -362,7 +362,7 @@ class CodeCompletionContext { QualType BaseType; /// The identifiers for Objective-C selector parts. - ArrayRef SelIdents; + ArrayRef SelIdents; /// The scope specifier that comes before the completion token e.g. /// "a::b::" @@ -378,8 +378,9 @@ class CodeCompletionContext { : CCKind(CCKind), IsUsingDeclaration(false), SelIdents(std::nullopt) {} /// Construct a new code-completion context of the given kind. - CodeCompletionContext(Kind CCKind, QualType T, - ArrayRef SelIdents = std::nullopt) + CodeCompletionContext( + Kind CCKind, QualType T, + ArrayRef SelIdents = std::nullopt) : CCKind(CCKind), IsUsingDeclaration(false), SelIdents(SelIdents) { if (CCKind == CCC_DotMemberAccess || CCKind == CCC_ArrowMemberAccess || CCKind == CCC_ObjCPropertyAccess || CCKind == CCC_ObjCClassMessage || @@ -406,7 +407,7 @@ class CodeCompletionContext { QualType getBaseType() const { return BaseType; } /// Retrieve the Objective-C selector identifiers. - ArrayRef getSelIdents() const { return SelIdents; } + ArrayRef getSelIdents() const { return SelIdents; } /// Determines whether we want C++ constructors as results within this /// context. diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h index a176159707486..c9eecdafe62c7 100644 --- a/clang/include/clang/Sema/DeclSpec.h +++ b/clang/include/clang/Sema/DeclSpec.h @@ -1049,7 +1049,7 @@ class UnqualifiedId { union { /// When Kind == IK_Identifier, the parsed identifier, or when /// Kind == IK_UserLiteralId, the identifier suffix. - IdentifierInfo *Identifier; + const IdentifierInfo *Identifier; /// When Kind == IK_OperatorFunctionId, the overloaded operator /// that we parsed. @@ -1111,7 +1111,7 @@ class UnqualifiedId { /// \param IdLoc the location of the parsed identifier. void setIdentifier(const IdentifierInfo *Id, SourceLocation IdLoc) { Kind = UnqualifiedIdKind::IK_Identifier; - Identifier = const_cast(Id); + Identifier = Id; StartLocation = EndLocation = IdLoc; } @@ -1154,9 +1154,9 @@ class UnqualifiedId { /// /// \param IdLoc the location of the identifier. void setLiteralOperatorId(const IdentifierInfo *Id, SourceLocation OpLoc, - SourceLocation IdLoc) { + SourceLocation IdLoc) { Kind = UnqualifiedIdKind::IK_LiteralOperatorId; - Identifier = const_cast(Id); + Identifier = Id; StartLocation = OpLoc; EndLocation = IdLoc; } @@ -1225,7 +1225,7 @@ class UnqualifiedId { /// \param Id the identifier. void setImplicitSelfParam(const IdentifierInfo *Id) { Kind = UnqualifiedIdKind::IK_ImplicitSelfParam; - Identifier = const_cast(Id); + Identifier = Id; StartLocation = EndLocation = SourceLocation(); } @@ -1327,7 +1327,7 @@ struct DeclaratorChunk { /// Parameter type lists will have type info (if the actions module provides /// it), but may have null identifier info: e.g. for 'void foo(int X, int)'. struct ParamInfo { - IdentifierInfo *Ident; + const IdentifierInfo *Ident; SourceLocation IdentLoc; Decl *Param; @@ -1339,11 +1339,10 @@ struct DeclaratorChunk { std::unique_ptr DefaultArgTokens; ParamInfo() = default; - ParamInfo(IdentifierInfo *ident, SourceLocation iloc, - Decl *param, + ParamInfo(const IdentifierInfo *ident, SourceLocation iloc, Decl *param, std::unique_ptr DefArgTokens = nullptr) - : Ident(ident), IdentLoc(iloc), Param(param), - DefaultArgTokens(std::move(DefArgTokens)) {} + : Ident(ident), IdentLoc(iloc), Param(param), + DefaultArgTokens(std::move(DefArgTokens)) {} }; struct TypeAndRange { @@ -2326,7 +2325,7 @@ class Declarator { return BindingGroup.isSet(); } - IdentifierInfo *getIdentifier() const { + const IdentifierInfo *getIdentifier() const { if (Name.getKind() == UnqualifiedIdKind::IK_Identifier) return Name.Identifier; @@ -2335,7 +2334,7 @@ class Declarator { SourceLocation getIdentifierLoc() const { return Name.StartLocation; } /// Set the name of this declarator to be the given identifier. - void SetIdentifier(IdentifierInfo *Id, SourceLocation IdLoc) { + void SetIdentifier(const IdentifierInfo *Id, SourceLocation IdLoc) { Name.setIdentifier(Id, IdLoc); } diff --git a/clang/include/clang/Sema/ParsedTemplate.h b/clang/include/clang/Sema/ParsedTemplate.h index 65182d57246ae..ac4dbbf294caf 100644 --- a/clang/include/clang/Sema/ParsedTemplate.h +++ b/clang/include/clang/Sema/ParsedTemplate.h @@ -159,7 +159,7 @@ namespace clang { SourceLocation TemplateNameLoc; /// FIXME: Temporarily stores the name of a specialization - IdentifierInfo *Name; + const IdentifierInfo *Name; /// FIXME: Temporarily stores the overloaded operator kind. OverloadedOperatorKind Operator; @@ -197,7 +197,7 @@ namespace clang { /// appends it to List. static TemplateIdAnnotation * Create(SourceLocation TemplateKWLoc, SourceLocation TemplateNameLoc, - IdentifierInfo *Name, OverloadedOperatorKind OperatorKind, + const IdentifierInfo *Name, OverloadedOperatorKind OperatorKind, ParsedTemplateTy OpaqueTemplateName, TemplateNameKind TemplateKind, SourceLocation LAngleLoc, SourceLocation RAngleLoc, ArrayRef TemplateArgs, bool ArgsInvalid, @@ -236,7 +236,8 @@ namespace clang { TemplateIdAnnotation(const TemplateIdAnnotation &) = delete; TemplateIdAnnotation(SourceLocation TemplateKWLoc, - SourceLocation TemplateNameLoc, IdentifierInfo *Name, + SourceLocation TemplateNameLoc, + const IdentifierInfo *Name, OverloadedOperatorKind OperatorKind, ParsedTemplateTy OpaqueTemplateName, TemplateNameKind TemplateKind, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index e3e255a0dd76f..0ee4f3c8e127f 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -568,7 +568,7 @@ class Sema final : public SemaBase { /// Invent a new identifier for parameters of abbreviated templates. IdentifierInfo * - InventAbbreviatedTemplateParameterTypeName(IdentifierInfo *ParamName, + InventAbbreviatedTemplateParameterTypeName(const IdentifierInfo *ParamName, unsigned Index); void emitAndClearUnusedLocalTypedefWarnings(); @@ -2958,9 +2958,9 @@ class Sema final : public SemaBase { SourceLocation NameLoc, TypeSourceInfo *TSInfo); ParmVarDecl *CheckParameter(DeclContext *DC, SourceLocation StartLoc, - SourceLocation NameLoc, IdentifierInfo *Name, - QualType T, TypeSourceInfo *TSInfo, - StorageClass SC); + SourceLocation NameLoc, + const IdentifierInfo *Name, QualType T, + TypeSourceInfo *TSInfo, StorageClass SC); // Contexts where using non-trivial C union types can be disallowed. This is // passed to err_non_trivial_c_union_in_invalid_context. @@ -3365,7 +3365,7 @@ class Sema final : public SemaBase { /// variable. void DiagnoseUnusedButSetDecl(const VarDecl *VD, DiagReceiverTy DiagReceiver); - ObjCInterfaceDecl *getObjCInterfaceDecl(IdentifierInfo *&Id, + ObjCInterfaceDecl *getObjCInterfaceDecl(const IdentifierInfo *&Id, SourceLocation IdLoc, bool TypoCorrection = false); @@ -3442,8 +3442,9 @@ class Sema final : public SemaBase { /// VerifyBitField - verifies that a bit field expression is an ICE and has /// the correct width, and that the field type is valid. /// Returns false on success. - ExprResult VerifyBitField(SourceLocation FieldLoc, IdentifierInfo *FieldName, - QualType FieldTy, bool IsMsStruct, Expr *BitWidth); + ExprResult VerifyBitField(SourceLocation FieldLoc, + const IdentifierInfo *FieldName, QualType FieldTy, + bool IsMsStruct, Expr *BitWidth); /// IsValueInFlagEnum - Determine if a value is allowed as part of a flag /// enum. If AllowMask is true, then we also allow the complement of a valid @@ -4638,7 +4639,8 @@ class Sema final : public SemaBase { VarDecl *BuildExceptionDeclaration(Scope *S, TypeSourceInfo *TInfo, SourceLocation StartLoc, - SourceLocation IdLoc, IdentifierInfo *Id); + SourceLocation IdLoc, + const IdentifierInfo *Id); Decl *ActOnExceptionDeclarator(Scope *S, Declarator &D); @@ -6555,12 +6557,12 @@ class Sema final : public SemaBase { ParsedType getInheritingConstructorName(CXXScopeSpec &SS, SourceLocation NameLoc, - IdentifierInfo &Name); + const IdentifierInfo &Name); - ParsedType getConstructorName(IdentifierInfo &II, SourceLocation NameLoc, - Scope *S, CXXScopeSpec &SS, - bool EnteringContext); - ParsedType getDestructorName(IdentifierInfo &II, SourceLocation NameLoc, + ParsedType getConstructorName(const IdentifierInfo &II, + SourceLocation NameLoc, Scope *S, + CXXScopeSpec &SS, bool EnteringContext); + ParsedType getDestructorName(const IdentifierInfo &II, SourceLocation NameLoc, Scope *S, CXXScopeSpec &SS, ParsedType ObjectType, bool EnteringContext); @@ -6960,7 +6962,7 @@ class Sema final : public SemaBase { concepts::Requirement *ActOnTypeRequirement(SourceLocation TypenameKWLoc, CXXScopeSpec &SS, SourceLocation NameLoc, - IdentifierInfo *TypeName, + const IdentifierInfo *TypeName, TemplateIdAnnotation *TemplateId); concepts::Requirement *ActOnCompoundRequirement(Expr *E, SourceLocation NoexceptLoc); @@ -9116,7 +9118,7 @@ class Sema final : public SemaBase { TypeResult ActOnTemplateIdType(Scope *S, CXXScopeSpec &SS, SourceLocation TemplateKWLoc, - TemplateTy Template, IdentifierInfo *TemplateII, + TemplateTy Template, const IdentifierInfo *TemplateII, SourceLocation TemplateIILoc, SourceLocation LAngleLoc, ASTTemplateArgsPtr TemplateArgs, SourceLocation RAngleLoc, bool IsCtorOrDtorName = false, bool IsClassName = false, @@ -9457,7 +9459,7 @@ class Sema final : public SemaBase { TypeResult ActOnTypenameType(Scope *S, SourceLocation TypenameLoc, const CXXScopeSpec &SS, SourceLocation TemplateLoc, - TemplateTy TemplateName, IdentifierInfo *TemplateII, + TemplateTy TemplateName, const IdentifierInfo *TemplateII, SourceLocation TemplateIILoc, SourceLocation LAngleLoc, ASTTemplateArgsPtr TemplateArgs, SourceLocation RAngleLoc); @@ -9535,14 +9537,15 @@ class Sema final : public SemaBase { Decl *ActOnConceptDefinition(Scope *S, MultiTemplateParamsArg TemplateParameterLists, - IdentifierInfo *Name, SourceLocation NameLoc, - Expr *ConstraintExpr); + const IdentifierInfo *Name, + SourceLocation NameLoc, Expr *ConstraintExpr); void CheckConceptRedefinition(ConceptDecl *NewDecl, LookupResult &Previous, bool &AddToScope); TypeResult ActOnDependentTag(Scope *S, unsigned TagSpec, TagUseKind TUK, - const CXXScopeSpec &SS, IdentifierInfo *Name, + const CXXScopeSpec &SS, + const IdentifierInfo *Name, SourceLocation TagLoc, SourceLocation NameLoc); void MarkAsLateParsedTemplate(FunctionDecl *FD, Decl *FnD, @@ -11988,22 +11991,22 @@ class Sema final : public SemaBase { SkipBodyInfo *SkipBody); ObjCCategoryDecl *ActOnStartCategoryInterface( - SourceLocation AtInterfaceLoc, IdentifierInfo *ClassName, + SourceLocation AtInterfaceLoc, const IdentifierInfo *ClassName, SourceLocation ClassLoc, ObjCTypeParamList *typeParamList, - IdentifierInfo *CategoryName, SourceLocation CategoryLoc, + const IdentifierInfo *CategoryName, SourceLocation CategoryLoc, Decl *const *ProtoRefs, unsigned NumProtoRefs, const SourceLocation *ProtoLocs, SourceLocation EndProtoLoc, const ParsedAttributesView &AttrList); ObjCImplementationDecl *ActOnStartClassImplementation( - SourceLocation AtClassImplLoc, IdentifierInfo *ClassName, - SourceLocation ClassLoc, IdentifierInfo *SuperClassname, + SourceLocation AtClassImplLoc, const IdentifierInfo *ClassName, + SourceLocation ClassLoc, const IdentifierInfo *SuperClassname, SourceLocation SuperClassLoc, const ParsedAttributesView &AttrList); ObjCCategoryImplDecl *ActOnStartCategoryImplementation( - SourceLocation AtCatImplLoc, IdentifierInfo *ClassName, - SourceLocation ClassLoc, IdentifierInfo *CatName, SourceLocation CatLoc, - const ParsedAttributesView &AttrList); + SourceLocation AtCatImplLoc, const IdentifierInfo *ClassName, + SourceLocation ClassLoc, const IdentifierInfo *CatName, + SourceLocation CatLoc, const ParsedAttributesView &AttrList); DeclGroupPtrTy ActOnFinishObjCImplementation(Decl *ObjCImpDecl, ArrayRef Decls); @@ -12186,11 +12189,13 @@ class Sema final : public SemaBase { bool CheckObjCDeclScope(Decl *D); void ActOnDefs(Scope *S, Decl *TagD, SourceLocation DeclStart, - IdentifierInfo *ClassName, SmallVectorImpl &Decls); + const IdentifierInfo *ClassName, + SmallVectorImpl &Decls); VarDecl *BuildObjCExceptionDecl(TypeSourceInfo *TInfo, QualType ExceptionType, SourceLocation StartLoc, SourceLocation IdLoc, - IdentifierInfo *Id, bool Invalid = false); + const IdentifierInfo *Id, + bool Invalid = false); Decl *ActOnObjCExceptionDecl(Scope *S, Declarator &D); @@ -12307,8 +12312,8 @@ class Sema final : public SemaBase { SourceLocation SuperLoc, QualType SuperType, bool Super); - ExprResult ActOnClassPropertyRefExpr(IdentifierInfo &receiverName, - IdentifierInfo &propertyName, + ExprResult ActOnClassPropertyRefExpr(const IdentifierInfo &receiverName, + const IdentifierInfo &propertyName, SourceLocation receiverNameLoc, SourceLocation propertyNameLoc); @@ -12783,18 +12788,18 @@ class Sema final : public SemaBase { bool IsParameter); void CodeCompleteObjCMessageReceiver(Scope *S); void CodeCompleteObjCSuperMessage(Scope *S, SourceLocation SuperLoc, - ArrayRef SelIdents, + ArrayRef SelIdents, bool AtArgumentExpression); void CodeCompleteObjCClassMessage(Scope *S, ParsedType Receiver, - ArrayRef SelIdents, + ArrayRef SelIdents, bool AtArgumentExpression, bool IsSuper = false); - void CodeCompleteObjCInstanceMessage(Scope *S, Expr *Receiver, - ArrayRef SelIdents, - bool AtArgumentExpression, - ObjCInterfaceDecl *Super = nullptr); + void CodeCompleteObjCInstanceMessage( + Scope *S, Expr *Receiver, ArrayRef SelIdents, + bool AtArgumentExpression, ObjCInterfaceDecl *Super = nullptr); void CodeCompleteObjCForCollection(Scope *S, DeclGroupPtrTy IterationVar); - void CodeCompleteObjCSelector(Scope *S, ArrayRef SelIdents); + void CodeCompleteObjCSelector(Scope *S, + ArrayRef SelIdents); void CodeCompleteObjCProtocolReferences(ArrayRef Protocols); void CodeCompleteObjCProtocolDecl(Scope *S); @@ -12814,11 +12819,11 @@ class Sema final : public SemaBase { void CodeCompleteObjCMethodDecl(Scope *S, std::optional IsInstanceMethod, ParsedType ReturnType); - void CodeCompleteObjCMethodDeclSelector(Scope *S, bool IsInstanceMethod, - bool AtParameterName, - ParsedType ReturnType, - ArrayRef SelIdents); - void CodeCompleteObjCClassPropertyRefExpr(Scope *S, IdentifierInfo &ClassName, + void CodeCompleteObjCMethodDeclSelector( + Scope *S, bool IsInstanceMethod, bool AtParameterName, + ParsedType ReturnType, ArrayRef SelIdents); + void CodeCompleteObjCClassPropertyRefExpr(Scope *S, + const IdentifierInfo &ClassName, SourceLocation ClassNameLoc, bool IsBaseExprStatement); void CodeCompletePreprocessorDirective(bool InConditional); diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 1911252b34cd1..5fd55a519c6b0 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1082,12 +1082,12 @@ class ASTReader /// The set of lookup results that we have faked in order to support /// merging of partially deserialized decls but that we have not yet removed. - llvm::SmallMapVector, 16> - PendingFakeLookupResults; + llvm::SmallMapVector, 16> + PendingFakeLookupResults; /// The generation number of each identifier, which keeps track of /// the last time we loaded information about this identifier. - llvm::DenseMap IdentifierGeneration; + llvm::DenseMap IdentifierGeneration; /// Contains declarations and definitions that could be /// "interesting" to the ASTConsumer, when we get that AST consumer. @@ -2330,10 +2330,10 @@ class ASTReader void ReadDefinedMacros() override; /// Update an out-of-date identifier. - void updateOutOfDateIdentifier(IdentifierInfo &II) override; + void updateOutOfDateIdentifier(const IdentifierInfo &II) override; /// Note that this identifier is up-to-date. - void markIdentifierUpToDate(IdentifierInfo *II); + void markIdentifierUpToDate(const IdentifierInfo *II); /// Load all external visible decls in the given DeclContext. void completeVisibleDeclsMap(const DeclContext *DC) override; diff --git a/clang/lib/ARCMigrate/ObjCMT.cpp b/clang/lib/ARCMigrate/ObjCMT.cpp index 0786c81516b2d..b9dcfb8951b3e 100644 --- a/clang/lib/ARCMigrate/ObjCMT.cpp +++ b/clang/lib/ARCMigrate/ObjCMT.cpp @@ -1144,7 +1144,7 @@ static bool IsValidIdentifier(ASTContext &Ctx, return false; std::string NameString = Name; NameString[0] = toLowercase(NameString[0]); - IdentifierInfo *II = &Ctx.Idents.get(NameString); + const IdentifierInfo *II = &Ctx.Idents.get(NameString); return II->getTokenID() == tok::identifier; } @@ -1166,7 +1166,7 @@ bool ObjCMigrateASTConsumer::migrateProperty(ASTContext &Ctx, if (OIT_Family != OIT_None) return false; - IdentifierInfo *getterName = GetterSelector.getIdentifierInfoForSlot(0); + const IdentifierInfo *getterName = GetterSelector.getIdentifierInfoForSlot(0); Selector SetterSelector = SelectorTable::constructSetterSelector(PP.getIdentifierTable(), PP.getSelectorTable(), @@ -1311,7 +1311,8 @@ void ObjCMigrateASTConsumer::migrateFactoryMethod(ASTContext &Ctx, std::string StringLoweredClassName = LoweredClassName.lower(); LoweredClassName = StringLoweredClassName; - IdentifierInfo *MethodIdName = OM->getSelector().getIdentifierInfoForSlot(0); + const IdentifierInfo *MethodIdName = + OM->getSelector().getIdentifierInfoForSlot(0); // Handle method with no name at its first selector slot; e.g. + (id):(int)x. if (!MethodIdName) return; diff --git a/clang/lib/ARCMigrate/TransAPIUses.cpp b/clang/lib/ARCMigrate/TransAPIUses.cpp index 638850dcf9ecc..8f5d4f4bde06c 100644 --- a/clang/lib/ARCMigrate/TransAPIUses.cpp +++ b/clang/lib/ARCMigrate/TransAPIUses.cpp @@ -41,7 +41,7 @@ class APIChecker : public RecursiveASTVisitor { getReturnValueSel = sels.getUnarySelector(&ids.get("getReturnValue")); setReturnValueSel = sels.getUnarySelector(&ids.get("setReturnValue")); - IdentifierInfo *selIds[2]; + const IdentifierInfo *selIds[2]; selIds[0] = &ids.get("getArgument"); selIds[1] = &ids.get("atIndex"); getArgumentSel = sels.getSelector(2, selIds); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index f7f55dc4e7a9f..2fa6aedca4c6a 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -6929,16 +6929,13 @@ ASTContext::getCanonicalNestedNameSpecifier(NestedNameSpecifier *NNS) const { // typedef typename T::type T1; // typedef typename T1::type T2; if (const auto *DNT = T->getAs()) - return NestedNameSpecifier::Create( - *this, DNT->getQualifier(), - const_cast(DNT->getIdentifier())); + return NestedNameSpecifier::Create(*this, DNT->getQualifier(), + DNT->getIdentifier()); if (const auto *DTST = T->getAs()) - return NestedNameSpecifier::Create(*this, DTST->getQualifier(), true, - const_cast(T)); + return NestedNameSpecifier::Create(*this, DTST->getQualifier(), true, T); // TODO: Set 'Template' parameter to true for other template types. - return NestedNameSpecifier::Create(*this, nullptr, false, - const_cast(T)); + return NestedNameSpecifier::Create(*this, nullptr, false, T); } case NestedNameSpecifier::Global: diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 45d4c9600537b..d5ec5ee409156 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -8383,8 +8383,8 @@ ASTNodeImporter::VisitCXXPseudoDestructorExpr(CXXPseudoDestructorExpr *E) { return std::move(Err); PseudoDestructorTypeStorage Storage; - if (IdentifierInfo *FromII = E->getDestroyedTypeIdentifier()) { - IdentifierInfo *ToII = Importer.Import(FromII); + if (const IdentifierInfo *FromII = E->getDestroyedTypeIdentifier()) { + const IdentifierInfo *ToII = Importer.Import(FromII); ExpectedSLoc ToDestroyedTypeLocOrErr = import(E->getDestroyedTypeLoc()); if (!ToDestroyedTypeLocOrErr) return ToDestroyedTypeLocOrErr.takeError(); @@ -10194,7 +10194,7 @@ Expected ASTImporter::Import(Selector FromSel) { if (FromSel.isNull()) return Selector{}; - SmallVector Idents; + SmallVector Idents; Idents.push_back(Import(FromSel.getIdentifierInfoForSlot(0))); for (unsigned I = 1, N = FromSel.getNumArgs(); I < N; ++I) Idents.push_back(Import(FromSel.getIdentifierInfoForSlot(I))); diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 131f82985e903..60e0a3aecf6c8 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -2913,10 +2913,10 @@ VarDecl::setInstantiationOfStaticDataMember(VarDecl *VD, //===----------------------------------------------------------------------===// ParmVarDecl *ParmVarDecl::Create(ASTContext &C, DeclContext *DC, - SourceLocation StartLoc, - SourceLocation IdLoc, IdentifierInfo *Id, - QualType T, TypeSourceInfo *TInfo, - StorageClass S, Expr *DefArg) { + SourceLocation StartLoc, SourceLocation IdLoc, + const IdentifierInfo *Id, QualType T, + TypeSourceInfo *TInfo, StorageClass S, + Expr *DefArg) { return new (C, DC) ParmVarDecl(ParmVar, C, DC, StartLoc, IdLoc, Id, T, TInfo, S, DefArg); } @@ -4511,7 +4511,7 @@ unsigned FunctionDecl::getODRHash() { FieldDecl *FieldDecl::Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, - IdentifierInfo *Id, QualType T, + const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle) { return new (C, DC) FieldDecl(Decl::Field, DC, StartLoc, IdLoc, Id, T, TInfo, @@ -5438,7 +5438,7 @@ IndirectFieldDecl::IndirectFieldDecl(ASTContext &C, DeclContext *DC, IndirectFieldDecl * IndirectFieldDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L, - IdentifierInfo *Id, QualType T, + const IdentifierInfo *Id, QualType T, llvm::MutableArrayRef CH) { return new (C, DC) IndirectFieldDecl(C, DC, L, Id, T, CH); } @@ -5461,7 +5461,8 @@ void TypeDecl::anchor() {} TypedefDecl *TypedefDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, - IdentifierInfo *Id, TypeSourceInfo *TInfo) { + const IdentifierInfo *Id, + TypeSourceInfo *TInfo) { return new (C, DC) TypedefDecl(C, DC, StartLoc, IdLoc, Id, TInfo); } @@ -5511,7 +5512,8 @@ TypedefDecl *TypedefDecl::CreateDeserialized(ASTContext &C, unsigned ID) { TypeAliasDecl *TypeAliasDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, - SourceLocation IdLoc, IdentifierInfo *Id, + SourceLocation IdLoc, + const IdentifierInfo *Id, TypeSourceInfo *TInfo) { return new (C, DC) TypeAliasDecl(C, DC, StartLoc, IdLoc, Id, TInfo); } diff --git a/clang/lib/AST/DeclObjC.cpp b/clang/lib/AST/DeclObjC.cpp index 962f503306a0f..32c14938cd588 100644 --- a/clang/lib/AST/DeclObjC.cpp +++ b/clang/lib/AST/DeclObjC.cpp @@ -66,7 +66,8 @@ void ObjCProtocolList::set(ObjCProtocolDecl* const* InList, unsigned Elts, //===----------------------------------------------------------------------===// ObjCContainerDecl::ObjCContainerDecl(Kind DK, DeclContext *DC, - IdentifierInfo *Id, SourceLocation nameLoc, + const IdentifierInfo *Id, + SourceLocation nameLoc, SourceLocation atStartLoc) : NamedDecl(DK, DC, nameLoc, Id), DeclContext(DK) { setAtStartLoc(atStartLoc); @@ -378,10 +379,8 @@ SourceLocation ObjCInterfaceDecl::getSuperClassLoc() const { /// FindPropertyVisibleInPrimaryClass - Finds declaration of the property /// with name 'PropertyId' in the primary class; including those in protocols /// (direct or indirect) used by the primary class. -ObjCPropertyDecl * -ObjCInterfaceDecl::FindPropertyVisibleInPrimaryClass( - IdentifierInfo *PropertyId, - ObjCPropertyQueryKind QueryKind) const { +ObjCPropertyDecl *ObjCInterfaceDecl::FindPropertyVisibleInPrimaryClass( + const IdentifierInfo *PropertyId, ObjCPropertyQueryKind QueryKind) const { // FIXME: Should make sure no callers ever do this. if (!hasDefinition()) return nullptr; @@ -1539,14 +1538,10 @@ void ObjCTypeParamList::gatherDefaultTypeArgs( // ObjCInterfaceDecl //===----------------------------------------------------------------------===// -ObjCInterfaceDecl *ObjCInterfaceDecl::Create(const ASTContext &C, - DeclContext *DC, - SourceLocation atLoc, - IdentifierInfo *Id, - ObjCTypeParamList *typeParamList, - ObjCInterfaceDecl *PrevDecl, - SourceLocation ClassLoc, - bool isInternal){ +ObjCInterfaceDecl *ObjCInterfaceDecl::Create( + const ASTContext &C, DeclContext *DC, SourceLocation atLoc, + const IdentifierInfo *Id, ObjCTypeParamList *typeParamList, + ObjCInterfaceDecl *PrevDecl, SourceLocation ClassLoc, bool isInternal) { auto *Result = new (C, DC) ObjCInterfaceDecl(C, DC, atLoc, Id, typeParamList, ClassLoc, PrevDecl, isInternal); @@ -1564,12 +1559,10 @@ ObjCInterfaceDecl *ObjCInterfaceDecl::CreateDeserialized(const ASTContext &C, return Result; } -ObjCInterfaceDecl::ObjCInterfaceDecl(const ASTContext &C, DeclContext *DC, - SourceLocation AtLoc, IdentifierInfo *Id, - ObjCTypeParamList *typeParamList, - SourceLocation CLoc, - ObjCInterfaceDecl *PrevDecl, - bool IsInternal) +ObjCInterfaceDecl::ObjCInterfaceDecl( + const ASTContext &C, DeclContext *DC, SourceLocation AtLoc, + const IdentifierInfo *Id, ObjCTypeParamList *typeParamList, + SourceLocation CLoc, ObjCInterfaceDecl *PrevDecl, bool IsInternal) : ObjCContainerDecl(ObjCInterface, DC, Id, CLoc, AtLoc), redeclarable_base(C) { setPreviousDecl(PrevDecl); @@ -1751,8 +1744,8 @@ ObjCIvarDecl *ObjCInterfaceDecl::all_declared_ivar_begin() { /// categories for this class and returns it. Name of the category is passed /// in 'CategoryId'. If category not found, return 0; /// -ObjCCategoryDecl * -ObjCInterfaceDecl::FindCategoryDeclaration(IdentifierInfo *CategoryId) const { +ObjCCategoryDecl *ObjCInterfaceDecl::FindCategoryDeclaration( + const IdentifierInfo *CategoryId) const { // FIXME: Should make sure no callers ever do this. if (!hasDefinition()) return nullptr; @@ -1838,10 +1831,10 @@ void ObjCIvarDecl::anchor() {} ObjCIvarDecl *ObjCIvarDecl::Create(ASTContext &C, ObjCContainerDecl *DC, SourceLocation StartLoc, - SourceLocation IdLoc, IdentifierInfo *Id, - QualType T, TypeSourceInfo *TInfo, - AccessControl ac, Expr *BW, - bool synthesized) { + SourceLocation IdLoc, + const IdentifierInfo *Id, QualType T, + TypeSourceInfo *TInfo, AccessControl ac, + Expr *BW, bool synthesized) { if (DC) { // Ivar's can only appear in interfaces, implementations (via synthesized // properties), and class extensions (via direct declaration, or synthesized @@ -2120,28 +2113,23 @@ void ObjCProtocolDecl::setHasODRHash(bool HasHash) { void ObjCCategoryDecl::anchor() {} -ObjCCategoryDecl::ObjCCategoryDecl(DeclContext *DC, SourceLocation AtLoc, - SourceLocation ClassNameLoc, - SourceLocation CategoryNameLoc, - IdentifierInfo *Id, ObjCInterfaceDecl *IDecl, - ObjCTypeParamList *typeParamList, - SourceLocation IvarLBraceLoc, - SourceLocation IvarRBraceLoc) +ObjCCategoryDecl::ObjCCategoryDecl( + DeclContext *DC, SourceLocation AtLoc, SourceLocation ClassNameLoc, + SourceLocation CategoryNameLoc, const IdentifierInfo *Id, + ObjCInterfaceDecl *IDecl, ObjCTypeParamList *typeParamList, + SourceLocation IvarLBraceLoc, SourceLocation IvarRBraceLoc) : ObjCContainerDecl(ObjCCategory, DC, Id, ClassNameLoc, AtLoc), ClassInterface(IDecl), CategoryNameLoc(CategoryNameLoc), IvarLBraceLoc(IvarLBraceLoc), IvarRBraceLoc(IvarRBraceLoc) { setTypeParamList(typeParamList); } -ObjCCategoryDecl *ObjCCategoryDecl::Create(ASTContext &C, DeclContext *DC, - SourceLocation AtLoc, - SourceLocation ClassNameLoc, - SourceLocation CategoryNameLoc, - IdentifierInfo *Id, - ObjCInterfaceDecl *IDecl, - ObjCTypeParamList *typeParamList, - SourceLocation IvarLBraceLoc, - SourceLocation IvarRBraceLoc) { +ObjCCategoryDecl *ObjCCategoryDecl::Create( + ASTContext &C, DeclContext *DC, SourceLocation AtLoc, + SourceLocation ClassNameLoc, SourceLocation CategoryNameLoc, + const IdentifierInfo *Id, ObjCInterfaceDecl *IDecl, + ObjCTypeParamList *typeParamList, SourceLocation IvarLBraceLoc, + SourceLocation IvarRBraceLoc) { auto *CatDecl = new (C, DC) ObjCCategoryDecl(DC, AtLoc, ClassNameLoc, CategoryNameLoc, Id, IDecl, typeParamList, IvarLBraceLoc, @@ -2190,13 +2178,10 @@ void ObjCCategoryDecl::setTypeParamList(ObjCTypeParamList *TPL) { void ObjCCategoryImplDecl::anchor() {} -ObjCCategoryImplDecl * -ObjCCategoryImplDecl::Create(ASTContext &C, DeclContext *DC, - IdentifierInfo *Id, - ObjCInterfaceDecl *ClassInterface, - SourceLocation nameLoc, - SourceLocation atStartLoc, - SourceLocation CategoryNameLoc) { +ObjCCategoryImplDecl *ObjCCategoryImplDecl::Create( + ASTContext &C, DeclContext *DC, const IdentifierInfo *Id, + ObjCInterfaceDecl *ClassInterface, SourceLocation nameLoc, + SourceLocation atStartLoc, SourceLocation CategoryNameLoc) { if (ClassInterface && ClassInterface->hasDefinition()) ClassInterface = ClassInterface->getDefinition(); return new (C, DC) ObjCCategoryImplDecl(DC, Id, ClassInterface, nameLoc, @@ -2365,14 +2350,11 @@ ObjCCompatibleAliasDecl::CreateDeserialized(ASTContext &C, unsigned ID) { void ObjCPropertyDecl::anchor() {} -ObjCPropertyDecl *ObjCPropertyDecl::Create(ASTContext &C, DeclContext *DC, - SourceLocation L, - IdentifierInfo *Id, - SourceLocation AtLoc, - SourceLocation LParenLoc, - QualType T, - TypeSourceInfo *TSI, - PropertyControl propControl) { +ObjCPropertyDecl * +ObjCPropertyDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L, + const IdentifierInfo *Id, SourceLocation AtLoc, + SourceLocation LParenLoc, QualType T, + TypeSourceInfo *TSI, PropertyControl propControl) { return new (C, DC) ObjCPropertyDecl(DC, L, Id, AtLoc, LParenLoc, T, TSI, propControl); } diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp index 3c217d6a6a5ae..571ed81a42e40 100644 --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -715,7 +715,7 @@ void TemplateTypeParmDecl::setTypeConstraint( NonTypeTemplateParmDecl::NonTypeTemplateParmDecl( DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, unsigned D, - unsigned P, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, + unsigned P, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, ArrayRef ExpandedTypes, ArrayRef ExpandedTInfos) : DeclaratorDecl(NonTypeTemplateParm, DC, IdLoc, Id, T, TInfo, StartLoc), TemplateParmPosition(D, P), ParameterPack(true), @@ -730,12 +730,10 @@ NonTypeTemplateParmDecl::NonTypeTemplateParmDecl( } } -NonTypeTemplateParmDecl * -NonTypeTemplateParmDecl::Create(const ASTContext &C, DeclContext *DC, - SourceLocation StartLoc, SourceLocation IdLoc, - unsigned D, unsigned P, IdentifierInfo *Id, - QualType T, bool ParameterPack, - TypeSourceInfo *TInfo) { +NonTypeTemplateParmDecl *NonTypeTemplateParmDecl::Create( + const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, + SourceLocation IdLoc, unsigned D, unsigned P, const IdentifierInfo *Id, + QualType T, bool ParameterPack, TypeSourceInfo *TInfo) { AutoType *AT = C.getLangOpts().CPlusPlus20 ? T->getContainedAutoType() : nullptr; return new (C, DC, @@ -748,7 +746,7 @@ NonTypeTemplateParmDecl::Create(const ASTContext &C, DeclContext *DC, NonTypeTemplateParmDecl *NonTypeTemplateParmDecl::Create( const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, - SourceLocation IdLoc, unsigned D, unsigned P, IdentifierInfo *Id, + SourceLocation IdLoc, unsigned D, unsigned P, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, ArrayRef ExpandedTypes, ArrayRef ExpandedTInfos) { AutoType *AT = TInfo->getType()->getContainedAutoType(); diff --git a/clang/lib/AST/NSAPI.cpp b/clang/lib/AST/NSAPI.cpp index 86dee540e9e29..ecc56c13fb757 100644 --- a/clang/lib/AST/NSAPI.cpp +++ b/clang/lib/AST/NSAPI.cpp @@ -56,10 +56,8 @@ Selector NSAPI::getNSStringSelector(NSStringMethodKind MK) const { &Ctx.Idents.get("initWithUTF8String")); break; case NSStr_stringWithCStringEncoding: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("stringWithCString"), - &Ctx.Idents.get("encoding") - }; + const IdentifierInfo *KeyIdents[] = {&Ctx.Idents.get("stringWithCString"), + &Ctx.Idents.get("encoding")}; Sel = Ctx.Selectors.getSelector(2, KeyIdents); break; } @@ -93,10 +91,8 @@ Selector NSAPI::getNSArraySelector(NSArrayMethodKind MK) const { Sel = Ctx.Selectors.getUnarySelector(&Ctx.Idents.get("arrayWithObjects")); break; case NSArr_arrayWithObjectsCount: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("arrayWithObjects"), - &Ctx.Idents.get("count") - }; + const IdentifierInfo *KeyIdents[] = {&Ctx.Idents.get("arrayWithObjects"), + &Ctx.Idents.get("count")}; Sel = Ctx.Selectors.getSelector(2, KeyIdents); break; } @@ -110,10 +106,9 @@ Selector NSAPI::getNSArraySelector(NSArrayMethodKind MK) const { Sel = Ctx.Selectors.getUnarySelector(&Ctx.Idents.get("objectAtIndex")); break; case NSMutableArr_replaceObjectAtIndex: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("replaceObjectAtIndex"), - &Ctx.Idents.get("withObject") - }; + const IdentifierInfo *KeyIdents[] = { + &Ctx.Idents.get("replaceObjectAtIndex"), + &Ctx.Idents.get("withObject")}; Sel = Ctx.Selectors.getSelector(2, KeyIdents); break; } @@ -121,18 +116,14 @@ Selector NSAPI::getNSArraySelector(NSArrayMethodKind MK) const { Sel = Ctx.Selectors.getUnarySelector(&Ctx.Idents.get("addObject")); break; case NSMutableArr_insertObjectAtIndex: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("insertObject"), - &Ctx.Idents.get("atIndex") - }; + const IdentifierInfo *KeyIdents[] = {&Ctx.Idents.get("insertObject"), + &Ctx.Idents.get("atIndex")}; Sel = Ctx.Selectors.getSelector(2, KeyIdents); break; } case NSMutableArr_setObjectAtIndexedSubscript: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("setObject"), - &Ctx.Idents.get("atIndexedSubscript") - }; + const IdentifierInfo *KeyIdents[] = { + &Ctx.Idents.get("setObject"), &Ctx.Idents.get("atIndexedSubscript")}; Sel = Ctx.Selectors.getSelector(2, KeyIdents); break; } @@ -167,27 +158,21 @@ Selector NSAPI::getNSDictionarySelector( &Ctx.Idents.get("dictionaryWithDictionary")); break; case NSDict_dictionaryWithObjectForKey: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("dictionaryWithObject"), - &Ctx.Idents.get("forKey") - }; + const IdentifierInfo *KeyIdents[] = { + &Ctx.Idents.get("dictionaryWithObject"), &Ctx.Idents.get("forKey")}; Sel = Ctx.Selectors.getSelector(2, KeyIdents); break; } case NSDict_dictionaryWithObjectsForKeys: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("dictionaryWithObjects"), - &Ctx.Idents.get("forKeys") - }; + const IdentifierInfo *KeyIdents[] = { + &Ctx.Idents.get("dictionaryWithObjects"), &Ctx.Idents.get("forKeys")}; Sel = Ctx.Selectors.getSelector(2, KeyIdents); break; } case NSDict_dictionaryWithObjectsForKeysCount: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("dictionaryWithObjects"), - &Ctx.Idents.get("forKeys"), - &Ctx.Idents.get("count") - }; + const IdentifierInfo *KeyIdents[] = { + &Ctx.Idents.get("dictionaryWithObjects"), &Ctx.Idents.get("forKeys"), + &Ctx.Idents.get("count")}; Sel = Ctx.Selectors.getSelector(3, KeyIdents); break; } @@ -204,10 +189,8 @@ Selector NSAPI::getNSDictionarySelector( &Ctx.Idents.get("initWithObjectsAndKeys")); break; case NSDict_initWithObjectsForKeys: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("initWithObjects"), - &Ctx.Idents.get("forKeys") - }; + const IdentifierInfo *KeyIdents[] = {&Ctx.Idents.get("initWithObjects"), + &Ctx.Idents.get("forKeys")}; Sel = Ctx.Selectors.getSelector(2, KeyIdents); break; } @@ -215,26 +198,20 @@ Selector NSAPI::getNSDictionarySelector( Sel = Ctx.Selectors.getUnarySelector(&Ctx.Idents.get("objectForKey")); break; case NSMutableDict_setObjectForKey: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("setObject"), - &Ctx.Idents.get("forKey") - }; + const IdentifierInfo *KeyIdents[] = {&Ctx.Idents.get("setObject"), + &Ctx.Idents.get("forKey")}; Sel = Ctx.Selectors.getSelector(2, KeyIdents); break; } case NSMutableDict_setObjectForKeyedSubscript: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("setObject"), - &Ctx.Idents.get("forKeyedSubscript") - }; + const IdentifierInfo *KeyIdents[] = { + &Ctx.Idents.get("setObject"), &Ctx.Idents.get("forKeyedSubscript")}; Sel = Ctx.Selectors.getSelector(2, KeyIdents); break; } case NSMutableDict_setValueForKey: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("setValue"), - &Ctx.Idents.get("forKey") - }; + const IdentifierInfo *KeyIdents[] = {&Ctx.Idents.get("setValue"), + &Ctx.Idents.get("forKey")}; Sel = Ctx.Selectors.getSelector(2, KeyIdents); break; } @@ -264,34 +241,27 @@ Selector NSAPI::getNSSetSelector(NSSetMethodKind MK) const { Sel = Ctx.Selectors.getUnarySelector(&Ctx.Idents.get("addObject")); break; case NSOrderedSet_insertObjectAtIndex: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("insertObject"), - &Ctx.Idents.get("atIndex") - }; + const IdentifierInfo *KeyIdents[] = {&Ctx.Idents.get("insertObject"), + &Ctx.Idents.get("atIndex")}; Sel = Ctx.Selectors.getSelector(2, KeyIdents); break; } case NSOrderedSet_setObjectAtIndex: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("setObject"), - &Ctx.Idents.get("atIndex") - }; + const IdentifierInfo *KeyIdents[] = {&Ctx.Idents.get("setObject"), + &Ctx.Idents.get("atIndex")}; Sel = Ctx.Selectors.getSelector(2, KeyIdents); break; } case NSOrderedSet_setObjectAtIndexedSubscript: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("setObject"), - &Ctx.Idents.get("atIndexedSubscript") - }; + const IdentifierInfo *KeyIdents[] = { + &Ctx.Idents.get("setObject"), &Ctx.Idents.get("atIndexedSubscript")}; Sel = Ctx.Selectors.getSelector(2, KeyIdents); break; } case NSOrderedSet_replaceObjectAtIndexWithObject: { - IdentifierInfo *KeyIdents[] = { - &Ctx.Idents.get("replaceObjectAtIndex"), - &Ctx.Idents.get("withObject") - }; + const IdentifierInfo *KeyIdents[] = { + &Ctx.Idents.get("replaceObjectAtIndex"), + &Ctx.Idents.get("withObject")}; Sel = Ctx.Selectors.getSelector(2, KeyIdents); break; } @@ -606,7 +576,7 @@ bool NSAPI::isObjCEnumerator(const Expr *E, Selector NSAPI::getOrInitSelector(ArrayRef Ids, Selector &Sel) const { if (Sel.isNull()) { - SmallVector Idents; + SmallVector Idents; for (ArrayRef::const_iterator I = Ids.begin(), E = Ids.end(); I != E; ++I) Idents.push_back(&Ctx.Idents.get(*I)); @@ -617,7 +587,7 @@ Selector NSAPI::getOrInitSelector(ArrayRef Ids, Selector NSAPI::getOrInitNullarySelector(StringRef Id, Selector &Sel) const { if (Sel.isNull()) { - IdentifierInfo *Ident = &Ctx.Idents.get(Id); + const IdentifierInfo *Ident = &Ctx.Idents.get(Id); Sel = Ctx.Selectors.getSelector(0, &Ident); } return Sel; diff --git a/clang/lib/AST/NestedNameSpecifier.cpp b/clang/lib/AST/NestedNameSpecifier.cpp index 36f2c47b30005..785c46e86a77c 100644 --- a/clang/lib/AST/NestedNameSpecifier.cpp +++ b/clang/lib/AST/NestedNameSpecifier.cpp @@ -55,16 +55,16 @@ NestedNameSpecifier::FindOrInsert(const ASTContext &Context, return NNS; } -NestedNameSpecifier * -NestedNameSpecifier::Create(const ASTContext &Context, - NestedNameSpecifier *Prefix, IdentifierInfo *II) { +NestedNameSpecifier *NestedNameSpecifier::Create(const ASTContext &Context, + NestedNameSpecifier *Prefix, + const IdentifierInfo *II) { assert(II && "Identifier cannot be NULL"); assert((!Prefix || Prefix->isDependent()) && "Prefix must be dependent"); NestedNameSpecifier Mockup; Mockup.Prefix.setPointer(Prefix); Mockup.Prefix.setInt(StoredIdentifier); - Mockup.Specifier = II; + Mockup.Specifier = const_cast(II); return FindOrInsert(Context, Mockup); } @@ -87,7 +87,7 @@ NestedNameSpecifier::Create(const ASTContext &Context, NestedNameSpecifier * NestedNameSpecifier::Create(const ASTContext &Context, NestedNameSpecifier *Prefix, - NamespaceAliasDecl *Alias) { + const NamespaceAliasDecl *Alias) { assert(Alias && "Namespace alias cannot be NULL"); assert((!Prefix || (Prefix->getAsType() == nullptr && @@ -96,7 +96,7 @@ NestedNameSpecifier::Create(const ASTContext &Context, NestedNameSpecifier Mockup; Mockup.Prefix.setPointer(Prefix); Mockup.Prefix.setInt(StoredDecl); - Mockup.Specifier = Alias; + Mockup.Specifier = const_cast(Alias); return FindOrInsert(Context, Mockup); } @@ -112,13 +112,13 @@ NestedNameSpecifier::Create(const ASTContext &Context, return FindOrInsert(Context, Mockup); } -NestedNameSpecifier * -NestedNameSpecifier::Create(const ASTContext &Context, IdentifierInfo *II) { +NestedNameSpecifier *NestedNameSpecifier::Create(const ASTContext &Context, + const IdentifierInfo *II) { assert(II && "Identifier cannot be NULL"); NestedNameSpecifier Mockup; Mockup.Prefix.setPointer(nullptr); Mockup.Prefix.setInt(StoredIdentifier); - Mockup.Specifier = II; + Mockup.Specifier = const_cast(II); return FindOrInsert(Context, Mockup); } diff --git a/clang/lib/AST/SelectorLocationsKind.cpp b/clang/lib/AST/SelectorLocationsKind.cpp index 2c34c9c60c2b2..ebe6324f904c7 100644 --- a/clang/lib/AST/SelectorLocationsKind.cpp +++ b/clang/lib/AST/SelectorLocationsKind.cpp @@ -26,7 +26,7 @@ static SourceLocation getStandardSelLoc(unsigned Index, assert(Index == 0); if (EndLoc.isInvalid()) return SourceLocation(); - IdentifierInfo *II = Sel.getIdentifierInfoForSlot(0); + const IdentifierInfo *II = Sel.getIdentifierInfoForSlot(0); unsigned Len = II ? II->getLength() : 0; return EndLoc.getLocWithOffset(-Len); } @@ -34,7 +34,7 @@ static SourceLocation getStandardSelLoc(unsigned Index, assert(Index < NumSelArgs); if (ArgLoc.isInvalid()) return SourceLocation(); - IdentifierInfo *II = Sel.getIdentifierInfoForSlot(Index); + const IdentifierInfo *II = Sel.getIdentifierInfoForSlot(Index); unsigned Len = /* selector id */ (II ? II->getLength() : 0) + /* ':' */ 1; if (WithArgSpace) ++Len; diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 2ba93d17f2675..5855ab3141edc 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -1447,7 +1447,7 @@ void StmtPrinter::VisitOffsetOfExpr(OffsetOfExpr *Node) { continue; // Field or identifier node. - IdentifierInfo *Id = ON.getFieldName(); + const IdentifierInfo *Id = ON.getFieldName(); if (!Id) continue; @@ -2348,7 +2348,7 @@ void StmtPrinter::VisitCXXPseudoDestructorExpr(CXXPseudoDestructorExpr *E) { E->getQualifier()->print(OS, Policy); OS << "~"; - if (IdentifierInfo *II = E->getDestroyedTypeIdentifier()) + if (const IdentifierInfo *II = E->getDestroyedTypeIdentifier()) OS << II->getName(); else E->getDestroyedType().print(OS, Policy); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index be3dd4b673cf9..01e1d1cc8289b 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -61,7 +61,7 @@ namespace { virtual void VisitName(DeclarationName Name, bool TreatAsDecl = false) = 0; /// Visit identifiers that are not in Decl's or Type's. - virtual void VisitIdentifierInfo(IdentifierInfo *II) = 0; + virtual void VisitIdentifierInfo(const IdentifierInfo *II) = 0; /// Visit a nested-name-specifier that occurs within an expression /// or statement. @@ -163,7 +163,7 @@ namespace { ID.AddPointer(Name.getAsOpaquePtr()); } - void VisitIdentifierInfo(IdentifierInfo *II) override { + void VisitIdentifierInfo(const IdentifierInfo *II) override { ID.AddPointer(II); } @@ -211,7 +211,7 @@ namespace { } Hash.AddDeclarationName(Name, TreatAsDecl); } - void VisitIdentifierInfo(IdentifierInfo *II) override { + void VisitIdentifierInfo(const IdentifierInfo *II) override { ID.AddBoolean(II); if (II) { Hash.AddIdentifierInfo(II); diff --git a/clang/lib/Analysis/ObjCNoReturn.cpp b/clang/lib/Analysis/ObjCNoReturn.cpp index 9d7c365c3b992..9e651c29e085d 100644 --- a/clang/lib/Analysis/ObjCNoReturn.cpp +++ b/clang/lib/Analysis/ObjCNoReturn.cpp @@ -17,7 +17,8 @@ using namespace clang; -static bool isSubclass(const ObjCInterfaceDecl *Class, IdentifierInfo *II) { +static bool isSubclass(const ObjCInterfaceDecl *Class, + const IdentifierInfo *II) { if (!Class) return false; if (Class->getIdentifier() == II) @@ -30,7 +31,7 @@ ObjCNoReturn::ObjCNoReturn(ASTContext &C) NSExceptionII(&C.Idents.get("NSException")) { // Generate selectors. - SmallVector II; + SmallVector II; // raise:format: II.push_back(&C.Idents.get("raise")); diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index a9b07aca65c05..feea84544d62f 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -541,7 +541,8 @@ unsigned Selector::getNumArgs() const { return SI->getNumArgs(); } -IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const { +const IdentifierInfo * +Selector::getIdentifierInfoForSlot(unsigned argIndex) const { if (getIdentifierInfoFlag() < MultiArg) { assert(argIndex == 0 && "illegal keyword index"); return getAsIdentifierInfo(); @@ -553,7 +554,7 @@ IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const { } StringRef Selector::getNameForSlot(unsigned int argIndex) const { - IdentifierInfo *II = getIdentifierInfoForSlot(argIndex); + const IdentifierInfo *II = getIdentifierInfoForSlot(argIndex); return II ? II->getName() : StringRef(); } @@ -574,7 +575,7 @@ std::string Selector::getAsString() const { return ""; if (getIdentifierInfoFlag() < MultiArg) { - IdentifierInfo *II = getAsIdentifierInfo(); + const IdentifierInfo *II = getAsIdentifierInfo(); if (getNumArgs() == 0) { assert(II && "If the number of arguments is 0 then II is guaranteed to " @@ -608,7 +609,7 @@ static bool startsWithWord(StringRef name, StringRef word) { } ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) { - IdentifierInfo *first = sel.getIdentifierInfoForSlot(0); + const IdentifierInfo *first = sel.getIdentifierInfoForSlot(0); if (!first) return OMF_None; StringRef name = first->getName(); @@ -655,7 +656,7 @@ ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) { } ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) { - IdentifierInfo *first = sel.getIdentifierInfoForSlot(0); + const IdentifierInfo *first = sel.getIdentifierInfoForSlot(0); if (!first) return OIT_None; StringRef name = first->getName(); @@ -683,7 +684,7 @@ ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) { } ObjCStringFormatFamily Selector::getStringFormatFamilyImpl(Selector sel) { - IdentifierInfo *first = sel.getIdentifierInfoForSlot(0); + const IdentifierInfo *first = sel.getIdentifierInfoForSlot(0); if (!first) return SFF_None; StringRef name = first->getName(); @@ -750,7 +751,8 @@ size_t SelectorTable::getTotalMemory() const { return SelTabImpl.Allocator.getTotalMemory(); } -Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) { +Selector SelectorTable::getSelector(unsigned nKeys, + const IdentifierInfo **IIV) { if (nKeys < 2) return Selector(IIV[0], nKeys); diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index 47f063b5501cc..2742c39965b2c 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -1447,7 +1447,7 @@ llvm::Function *CodeGenFunction::GenerateBlockFunction( selfTy = getContext().getPointerType(getContext().getAddrSpaceQualType( getContext().VoidTy, LangAS::opencl_generic)); - IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor"); + const IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor"); ImplicitParamDecl SelfDecl(getContext(), const_cast(blockDecl), SourceLocation(), II, selfTy, @@ -2791,7 +2791,7 @@ static void configureBlocksRuntimeObject(CodeGenModule &CGM, auto *GV = cast(C->stripPointerCasts()); if (CGM.getTarget().getTriple().isOSBinFormatCOFF()) { - IdentifierInfo &II = CGM.getContext().Idents.get(C->getName()); + const IdentifierInfo &II = CGM.getContext().Idents.get(C->getName()); TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 0cb5b06a519c0..370642cb3d536 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -361,7 +361,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, KernelLaunchAPI = KernelLaunchAPI + "_ptsz"; } auto LaunchKernelName = addPrefixToName(KernelLaunchAPI); - IdentifierInfo &cudaLaunchKernelII = + const IdentifierInfo &cudaLaunchKernelII = CGM.getContext().Idents.get(LaunchKernelName); FunctionDecl *cudaLaunchKernelFD = nullptr; for (auto *Result : DC->lookup(&cudaLaunchKernelII)) { diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 4f4013292b1fc..8bdafa7c569b0 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -1384,7 +1384,7 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions( // For each dimension stores its QualType and corresponding // size-expression Value. SmallVector Dimensions; - SmallVector VLAExprNames; + SmallVector VLAExprNames; // Break down the array into individual dimensions. QualType Type1D = D.getType(); @@ -1421,7 +1421,7 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions( MD = llvm::ConstantAsMetadata::get(C); else { // Create an artificial VarDecl to generate debug info for. - IdentifierInfo *NameIdent = VLAExprNames[NameIdx++]; + const IdentifierInfo *NameIdent = VLAExprNames[NameIdx++]; auto QT = getContext().getIntTypeForBitwidth( SizeTy->getScalarSizeInBits(), false); auto *ArtificialDecl = VarDecl::Create( diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index c7f497a7c8451..ee571995ce4c3 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -1789,11 +1789,10 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ static const unsigned NumItems = 16; // Fetch the countByEnumeratingWithState:objects:count: selector. - IdentifierInfo *II[] = { - &CGM.getContext().Idents.get("countByEnumeratingWithState"), - &CGM.getContext().Idents.get("objects"), - &CGM.getContext().Idents.get("count") - }; + const IdentifierInfo *II[] = { + &CGM.getContext().Idents.get("countByEnumeratingWithState"), + &CGM.getContext().Idents.get("objects"), + &CGM.getContext().Idents.get("count")}; Selector FastEnumSel = CGM.getContext().Selectors.getSelector(std::size(II), &II[0]); @@ -2720,7 +2719,7 @@ llvm::Value *CodeGenFunction::EmitObjCMRRAutoreleasePoolPush() { CGObjCRuntime &Runtime = CGM.getObjCRuntime(); llvm::Value *Receiver = Runtime.EmitNSAutoreleasePoolClassRef(*this); // [NSAutoreleasePool alloc] - IdentifierInfo *II = &CGM.getContext().Idents.get("alloc"); + const IdentifierInfo *II = &CGM.getContext().Idents.get("alloc"); Selector AllocSel = getContext().Selectors.getSelector(0, &II); CallArgList Args; RValue AllocRV = @@ -2767,7 +2766,7 @@ llvm::Value *CodeGenFunction::EmitObjCAllocInit(llvm::Value *value, /// Produce the code to do a primitive release. /// [tmp drain]; void CodeGenFunction::EmitObjCMRRAutoreleasePoolPop(llvm::Value *Arg) { - IdentifierInfo *II = &CGM.getContext().Idents.get("drain"); + const IdentifierInfo *II = &CGM.getContext().Idents.get("drain"); Selector DrainSel = getContext().Selectors.getSelector(0, &II); CallArgList Args; CGM.getObjCRuntime().GenerateMessageSend(*this, ReturnValueSlot(), @@ -3715,8 +3714,8 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( if ((HelperFn = CGM.getAtomicSetterHelperFnMap(Ty))) return HelperFn; - IdentifierInfo *II - = &CGM.getContext().Idents.get("__assign_helper_atomic_property_"); + const IdentifierInfo *II = + &CGM.getContext().Idents.get("__assign_helper_atomic_property_"); QualType ReturnTy = C.VoidTy; QualType DestTy = C.getPointerType(Ty); @@ -3813,7 +3812,7 @@ llvm::Constant *CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( if ((HelperFn = CGM.getAtomicGetterHelperFnMap(Ty))) return HelperFn; - IdentifierInfo *II = + const IdentifierInfo *II = &CGM.getContext().Idents.get("__copy_helper_atomic_property_"); QualType ReturnTy = C.VoidTy; @@ -3907,10 +3906,10 @@ llvm::Constant *CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( llvm::Value * CodeGenFunction::EmitBlockCopyAndAutorelease(llvm::Value *Block, QualType Ty) { // Get selectors for retain/autorelease. - IdentifierInfo *CopyID = &getContext().Idents.get("copy"); + const IdentifierInfo *CopyID = &getContext().Idents.get("copy"); Selector CopySelector = getContext().Selectors.getNullarySelector(CopyID); - IdentifierInfo *AutoreleaseID = &getContext().Idents.get("autorelease"); + const IdentifierInfo *AutoreleaseID = &getContext().Idents.get("autorelease"); Selector AutoreleaseSelector = getContext().Selectors.getNullarySelector(AutoreleaseID); diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index 8a599c10e1caf..042cd5d46da4b 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -1555,12 +1555,12 @@ class CGObjCNonFragileABIMac : public CGObjCCommonMac { // Shamelessly stolen from Analysis/CFRefCount.cpp Selector GetNullarySelector(const char* name) const { - IdentifierInfo* II = &CGM.getContext().Idents.get(name); + const IdentifierInfo *II = &CGM.getContext().Idents.get(name); return CGM.getContext().Selectors.getSelector(0, &II); } Selector GetUnarySelector(const char* name) const { - IdentifierInfo* II = &CGM.getContext().Idents.get(name); + const IdentifierInfo *II = &CGM.getContext().Idents.get(name); return CGM.getContext().Selectors.getSelector(1, &II); } @@ -6268,11 +6268,10 @@ bool CGObjCNonFragileABIMac::isVTableDispatchedSelector(Selector Sel) { VTableDispatchMethods.insert(GetUnarySelector("addObject")); // "countByEnumeratingWithState:objects:count" - IdentifierInfo *KeyIdents[] = { - &CGM.getContext().Idents.get("countByEnumeratingWithState"), - &CGM.getContext().Idents.get("objects"), - &CGM.getContext().Idents.get("count") - }; + const IdentifierInfo *KeyIdents[] = { + &CGM.getContext().Idents.get("countByEnumeratingWithState"), + &CGM.getContext().Idents.get("objects"), + &CGM.getContext().Idents.get("count")}; VTableDispatchMethods.insert( CGM.getContext().Selectors.getSelector(3, KeyIdents)); } diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index a2d746bb8f4f9..87766a758311d 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -828,7 +828,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, // .cxx_destruct, __destroy_helper_block_ and all of their calees at run time. if (SanOpts.has(SanitizerKind::Thread)) { if (const auto *OMD = dyn_cast_or_null(D)) { - IdentifierInfo *II = OMD->getSelector().getIdentifierInfoForSlot(0); + const IdentifierInfo *II = OMD->getSelector().getIdentifierInfoForSlot(0); if (OMD->getMethodFamily() == OMF_dealloc || OMD->getMethodFamily() == OMF_initialize || (OMD->getSelector().isUnarySelector() && II->isStr(".cxx_destruct"))) { diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 75519be8bba05..b15031dca4686 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -6626,7 +6626,7 @@ static bool AllTrivialInitializers(CodeGenModule &CGM, void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) { // We might need a .cxx_destruct even if we don't have any ivar initializers. if (needsDestructMethod(D)) { - IdentifierInfo *II = &getContext().Idents.get(".cxx_destruct"); + const IdentifierInfo *II = &getContext().Idents.get(".cxx_destruct"); Selector cxxSelector = getContext().Selectors.getSelector(0, &II); ObjCMethodDecl *DTORMethod = ObjCMethodDecl::Create( getContext(), D->getLocation(), D->getLocation(), cxxSelector, @@ -6646,7 +6646,7 @@ void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) { AllTrivialInitializers(*this, D)) return; - IdentifierInfo *II = &getContext().Idents.get(".cxx_construct"); + const IdentifierInfo *II = &getContext().Idents.get(".cxx_construct"); Selector cxxSelector = getContext().Selectors.getSelector(0, &II); // The constructor returns 'self'. ObjCMethodDecl *CTORMethod = ObjCMethodDecl::Create( @@ -7214,7 +7214,7 @@ void CodeGenModule::EmitStaticExternCAliases() { if (!getTargetCodeGenInfo().shouldEmitStaticExternCAliases()) return; for (auto &I : StaticExternCValues) { - IdentifierInfo *Name = I.first; + const IdentifierInfo *Name = I.first; llvm::GlobalValue *Val = I.second; // If Val is null, that implies there were multiple declarations that each diff --git a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp index 1f40db785981d..6ae955a2380b7 100644 --- a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp +++ b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp @@ -592,7 +592,7 @@ namespace { } bool ImplementationIsNonLazy(const ObjCImplDecl *OD) const { - IdentifierInfo* II = &Context->Idents.get("load"); + const IdentifierInfo *II = &Context->Idents.get("load"); Selector LoadSel = Context->Selectors.getSelector(0, &II); return OD->getClassMethod(LoadSel) != nullptr; } diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 7dffcf0e941e0..f0750e5336b6a 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -64,8 +64,7 @@ HeaderFileInfo::getControllingMacro(ExternalPreprocessorSource *External) { if (ControllingMacro->isOutOfDate()) { assert(External && "We must have an external source if we have a " "controlling macro that is out of date."); - External->updateOutOfDateIdentifier( - *const_cast(ControllingMacro)); + External->updateOutOfDateIdentifier(*ControllingMacro); } return ControllingMacro; } diff --git a/clang/lib/Lex/MacroInfo.cpp b/clang/lib/Lex/MacroInfo.cpp index 39bb0f44eff25..dfdf463665f3c 100644 --- a/clang/lib/Lex/MacroInfo.cpp +++ b/clang/lib/Lex/MacroInfo.cpp @@ -257,7 +257,7 @@ LLVM_DUMP_METHOD void MacroDirective::dump() const { } ModuleMacro *ModuleMacro::create(Preprocessor &PP, Module *OwningModule, - IdentifierInfo *II, MacroInfo *Macro, + const IdentifierInfo *II, MacroInfo *Macro, ArrayRef Overrides) { void *Mem = PP.getPreprocessorAllocator().Allocate( sizeof(ModuleMacro) + sizeof(ModuleMacro *) * Overrides.size(), diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index 3b1b6df1dbae4..2ca2122ac7109 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -368,8 +368,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { // Okay, this has a controlling macro, remember in HeaderFileInfo. if (OptionalFileEntryRef FE = CurPPLexer->getFileEntry()) { HeaderInfo.SetFileControllingMacro(*FE, ControllingMacro); - if (MacroInfo *MI = - getMacroInfo(const_cast(ControllingMacro))) + if (MacroInfo *MI = getMacroInfo(ControllingMacro)) MI->setUsedForHeaderGuard(true); if (const IdentifierInfo *DefinedMacro = CurPPLexer->MIOpt.GetDefinedMacro()) { @@ -805,7 +804,7 @@ Module *Preprocessor::LeaveSubmodule(bool ForPragma) { llvm::SmallPtrSet VisitedMacros; for (unsigned I = Info.OuterPendingModuleMacroNames; I != PendingModuleMacroNames.size(); ++I) { - auto *II = const_cast(PendingModuleMacroNames[I]); + const auto *II = PendingModuleMacroNames[I]; if (!VisitedMacros.insert(II).second) continue; @@ -855,8 +854,8 @@ Module *Preprocessor::LeaveSubmodule(bool ForPragma) { // Don't bother creating a module macro if it would represent a #undef // that doesn't override anything. if (Def || !Macro.getOverriddenMacros().empty()) - addModuleMacro(LeavingMod, II, Def, - Macro.getOverriddenMacros(), IsNew); + addModuleMacro(LeavingMod, II, Def, Macro.getOverriddenMacros(), + IsNew); if (!getLangOpts().ModulesLocalVisibility) { // This macro is exposed to the rest of this compilation as a diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 516269c0c6013..a5f22f01682d2 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -129,7 +129,7 @@ void Preprocessor::setLoadedMacroDirective(IdentifierInfo *II, II->setHasMacroDefinition(false); } -ModuleMacro *Preprocessor::addModuleMacro(Module *Mod, IdentifierInfo *II, +ModuleMacro *Preprocessor::addModuleMacro(Module *Mod, const IdentifierInfo *II, MacroInfo *Macro, ArrayRef Overrides, bool &New) { @@ -162,7 +162,7 @@ ModuleMacro *Preprocessor::addModuleMacro(Module *Mod, IdentifierInfo *II, // The new macro is always a leaf macro. LeafMacros.push_back(MM); // The identifier now has defined macros (that may or may not be visible). - II->setHasMacroDefinition(true); + const_cast(II)->setHasMacroDefinition(true); New = true; return MM; diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 031ed1e16bb8f..0b70192743a39 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -759,7 +759,7 @@ void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); } -void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const { +void Preprocessor::updateOutOfDateIdentifier(const IdentifierInfo &II) const { assert(II.isOutOfDate() && "not out of date"); getExternalSource()->updateOutOfDateIdentifier(II); } diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 0aa14b0510746..583232f2d610d 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -7700,7 +7700,7 @@ void Parser::ParseParameterDeclarationClause( } // Remember this parsed parameter in ParamInfo. - IdentifierInfo *ParmII = ParmDeclarator.getIdentifier(); + const IdentifierInfo *ParmII = ParmDeclarator.getIdentifier(); // DefArgToks is used when the parsing of default arguments needs // to be delayed. diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index 861a25dc5103c..477d81cdc2c23 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -616,7 +616,7 @@ bool Parser::ParseUsingDeclarator(DeclaratorContext Context, } // Parse nested-name-specifier. - IdentifierInfo *LastII = nullptr; + const IdentifierInfo *LastII = nullptr; if (ParseOptionalCXXScopeSpecifier(D.SS, /*ObjectType=*/nullptr, /*ObjectHasErrors=*/false, /*EnteringContext=*/false, diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index 73c85c585baae..43d6105dcf31c 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -157,7 +157,8 @@ void Parser::CheckForTemplateAndDigraph(Token &Next, ParsedType ObjectType, bool Parser::ParseOptionalCXXScopeSpecifier( CXXScopeSpec &SS, ParsedType ObjectType, bool ObjectHadErrors, bool EnteringContext, bool *MayBePseudoDestructor, bool IsTypename, - IdentifierInfo **LastII, bool OnlyNamespace, bool InUsingDeclaration) { + const IdentifierInfo **LastII, bool OnlyNamespace, + bool InUsingDeclaration) { assert(getLangOpts().CPlusPlus && "Call sites of this function should be guarded by checking for C++"); @@ -2626,7 +2627,7 @@ bool Parser::ParseUnqualifiedIdTemplateId( // UnqualifiedId. // FIXME: Store name for literal operator too. - IdentifierInfo *TemplateII = + const IdentifierInfo *TemplateII = Id.getKind() == UnqualifiedIdKind::IK_Identifier ? Id.Identifier : nullptr; OverloadedOperatorKind OpKind = diff --git a/clang/lib/Parse/ParseObjc.cpp b/clang/lib/Parse/ParseObjc.cpp index 88bab0eb27a3e..887d7a36cee7e 100644 --- a/clang/lib/Parse/ParseObjc.cpp +++ b/clang/lib/Parse/ParseObjc.cpp @@ -799,11 +799,11 @@ void Parser::ParseObjCInterfaceDeclList(tok::ObjCKeywordKind contextKey, addedToDeclSpec); // Install the property declarator into interfaceDecl. - IdentifierInfo *SelName = + const IdentifierInfo *SelName = OCDS.getGetterName() ? OCDS.getGetterName() : FD.D.getIdentifier(); Selector GetterSel = PP.getSelectorTable().getNullarySelector(SelName); - IdentifierInfo *SetterName = OCDS.getSetterName(); + const IdentifierInfo *SetterName = OCDS.getSetterName(); Selector SetterSel; if (SetterName) SetterSel = PP.getSelectorTable().getSelector(1, &SetterName); @@ -1445,7 +1445,7 @@ Decl *Parser::ParseObjCMethodDecl(SourceLocation mLoc, return Result; } - SmallVector KeyIdents; + SmallVector KeyIdents; SmallVector KeyLocs; SmallVector ArgInfos; ParseScope PrototypeScope(this, Scope::FunctionPrototypeScope | @@ -1541,7 +1541,7 @@ Decl *Parser::ParseObjCMethodDecl(SourceLocation mLoc, Declarator ParmDecl(DS, ParsedAttributesView::none(), DeclaratorContext::Prototype); ParseDeclarator(ParmDecl); - IdentifierInfo *ParmII = ParmDecl.getIdentifier(); + const IdentifierInfo *ParmII = ParmDecl.getIdentifier(); Decl *Param = Actions.ActOnParamDeclarator(getCurScope(), ParmDecl); CParamInfo.push_back(DeclaratorChunk::ParamInfo(ParmII, ParmDecl.getIdentifierLoc(), @@ -3242,7 +3242,7 @@ Parser::ParseObjCMessageExpressionBody(SourceLocation LBracLoc, SourceLocation Loc; IdentifierInfo *selIdent = ParseObjCSelectorPiece(Loc); - SmallVector KeyIdents; + SmallVector KeyIdents; SmallVector KeyLocs; ExprVector KeyExprs; @@ -3642,7 +3642,7 @@ ExprResult Parser::ParseObjCSelectorExpression(SourceLocation AtLoc) { if (Tok.isNot(tok::l_paren)) return ExprError(Diag(Tok, diag::err_expected_lparen_after) << "@selector"); - SmallVector KeyIdents; + SmallVector KeyIdents; SourceLocation sLoc; BalancedDelimiterTracker T(*this, tok::l_paren); diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp index d4897f8f66072..03257500426e5 100644 --- a/clang/lib/Parse/ParseTemplate.cpp +++ b/clang/lib/Parse/ParseTemplate.cpp @@ -313,7 +313,7 @@ Parser::ParseConceptDefinition(const ParsedTemplateInfo &TemplateInfo, return nullptr; } - IdentifierInfo *Id = Result.Identifier; + const IdentifierInfo *Id = Result.Identifier; SourceLocation IdLoc = Result.getBeginLoc(); DiagnoseAndSkipCXX11Attributes(); @@ -1289,7 +1289,7 @@ bool Parser::AnnotateTemplateIdToken(TemplateTy Template, TemplateNameKind TNK, // later. Tok.setKind(tok::annot_template_id); - IdentifierInfo *TemplateII = + const IdentifierInfo *TemplateII = TemplateName.getKind() == UnqualifiedIdKind::IK_Identifier ? TemplateName.Identifier : nullptr; diff --git a/clang/lib/Sema/CodeCompleteConsumer.cpp b/clang/lib/Sema/CodeCompleteConsumer.cpp index 350bd78b57107..91713d71786ee 100644 --- a/clang/lib/Sema/CodeCompleteConsumer.cpp +++ b/clang/lib/Sema/CodeCompleteConsumer.cpp @@ -854,7 +854,8 @@ StringRef CodeCompletionResult::getOrderedName(std::string &Saved) const { if (IdentifierInfo *Id = Name.getAsIdentifierInfo()) return Id->getName(); if (Name.isObjCZeroArgSelector()) - if (IdentifierInfo *Id = Name.getObjCSelector().getIdentifierInfoForSlot(0)) + if (const IdentifierInfo *Id = + Name.getObjCSelector().getIdentifierInfoForSlot(0)) return Id->getName(); Saved = Name.getAsString(); diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 801b03a63dbc8..a2ea66f339c8e 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -92,9 +92,8 @@ DarwinSDKInfo *Sema::getDarwinSDKInfoForAvailabilityChecking() { return nullptr; } -IdentifierInfo * -Sema::InventAbbreviatedTemplateParameterTypeName(IdentifierInfo *ParamName, - unsigned int Index) { +IdentifierInfo *Sema::InventAbbreviatedTemplateParameterTypeName( + const IdentifierInfo *ParamName, unsigned int Index) { std::string InventedName; llvm::raw_string_ostream OS(InventedName); diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 83ebcaf9e765a..c335017f243eb 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -3691,7 +3691,7 @@ CodeCompletionString *CodeCompletionResult::createCodeCompletionStringForDecl( std::string Keyword; if (Idx > StartParameter) Result.AddChunk(CodeCompletionString::CK_HorizontalSpace); - if (IdentifierInfo *II = Sel.getIdentifierInfoForSlot(Idx)) + if (const IdentifierInfo *II = Sel.getIdentifierInfoForSlot(Idx)) Keyword += II->getName(); Keyword += ":"; if (Idx < StartParameter || AllParametersAreInformative) @@ -3720,7 +3720,7 @@ CodeCompletionString *CodeCompletionResult::createCodeCompletionStringForDecl( Arg = "(" + formatObjCParamQualifiers((*P)->getObjCDeclQualifier(), ParamType); Arg += ParamType.getAsString(Policy) + ")"; - if (IdentifierInfo *II = (*P)->getIdentifier()) + if (const IdentifierInfo *II = (*P)->getIdentifier()) if (DeclaringEntity || AllParametersAreInformative) Arg += II->getName(); } @@ -4500,11 +4500,11 @@ void Sema::CodeCompleteOrdinaryName(Scope *S, Results.data(), Results.size()); } -static void AddClassMessageCompletions(Sema &SemaRef, Scope *S, - ParsedType Receiver, - ArrayRef SelIdents, - bool AtArgumentExpression, bool IsSuper, - ResultBuilder &Results); +static void +AddClassMessageCompletions(Sema &SemaRef, Scope *S, ParsedType Receiver, + ArrayRef SelIdents, + bool AtArgumentExpression, bool IsSuper, + ResultBuilder &Results); void Sema::CodeCompleteDeclSpec(Scope *S, DeclSpec &DS, bool AllowNonIdentifiers, @@ -4928,7 +4928,7 @@ void Sema::CodeCompletePostfixExpression(Scope *S, ExprResult E, /// The set of properties that have already been added, referenced by /// property name. -typedef llvm::SmallPtrSet AddedPropertiesSet; +typedef llvm::SmallPtrSet AddedPropertiesSet; /// Retrieve the container definition, if any? static ObjCContainerDecl *getContainerDef(ObjCContainerDecl *Container) { @@ -5090,7 +5090,7 @@ AddObjCProperties(const CodeCompletionContext &CCContext, PrintingPolicy Policy = getCompletionPrintingPolicy(Results.getSema()); // Adds a method result const auto AddMethod = [&](const ObjCMethodDecl *M) { - IdentifierInfo *Name = M->getSelector().getIdentifierInfoForSlot(0); + const IdentifierInfo *Name = M->getSelector().getIdentifierInfoForSlot(0); if (!Name) return; if (!AddedProperties.insert(Name).second) @@ -5859,10 +5859,10 @@ void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base, } void Sema::CodeCompleteObjCClassPropertyRefExpr(Scope *S, - IdentifierInfo &ClassName, + const IdentifierInfo &ClassName, SourceLocation ClassNameLoc, bool IsBaseExprStatement) { - IdentifierInfo *ClassNamePtr = &ClassName; + const IdentifierInfo *ClassNamePtr = &ClassName; ObjCInterfaceDecl *IFace = getObjCInterfaceDecl(ClassNamePtr, ClassNameLoc); if (!IFace) return; @@ -7527,7 +7527,7 @@ enum ObjCMethodKind { }; static bool isAcceptableObjCSelector(Selector Sel, ObjCMethodKind WantKind, - ArrayRef SelIdents, + ArrayRef SelIdents, bool AllowSameLength = true) { unsigned NumSelIdents = SelIdents.size(); if (NumSelIdents > Sel.getNumArgs()) @@ -7554,7 +7554,7 @@ static bool isAcceptableObjCSelector(Selector Sel, ObjCMethodKind WantKind, static bool isAcceptableObjCMethod(ObjCMethodDecl *Method, ObjCMethodKind WantKind, - ArrayRef SelIdents, + ArrayRef SelIdents, bool AllowSameLength = true) { return isAcceptableObjCSelector(Method->getSelector(), WantKind, SelIdents, AllowSameLength); @@ -7586,7 +7586,7 @@ typedef llvm::SmallPtrSet VisitedSelectorSet; /// \param Results the structure into which we'll add results. static void AddObjCMethods(ObjCContainerDecl *Container, bool WantInstanceMethods, ObjCMethodKind WantKind, - ArrayRef SelIdents, + ArrayRef SelIdents, DeclContext *CurContext, VisitedSelectorSet &Selectors, bool AllowSameLength, ResultBuilder &Results, bool InOriginalClass = true, @@ -7819,7 +7819,7 @@ static ObjCInterfaceDecl *GetAssumedMessageSendExprType(Expr *E) { if (Sel.isNull()) return nullptr; - IdentifierInfo *Id = Sel.getIdentifierInfoForSlot(0); + const IdentifierInfo *Id = Sel.getIdentifierInfoForSlot(0); if (!Id) return nullptr; @@ -7895,7 +7895,7 @@ static ObjCInterfaceDecl *GetAssumedMessageSendExprType(Expr *E) { /// this "super" completion. If NULL, no completion was added. static ObjCMethodDecl * AddSuperSendCompletion(Sema &S, bool NeedSuperKeyword, - ArrayRef SelIdents, + ArrayRef SelIdents, ResultBuilder &Results) { ObjCMethodDecl *CurMethod = S.getCurMethodDecl(); if (!CurMethod) @@ -8032,9 +8032,9 @@ void Sema::CodeCompleteObjCMessageReceiver(Scope *S) { Results.data(), Results.size()); } -void Sema::CodeCompleteObjCSuperMessage(Scope *S, SourceLocation SuperLoc, - ArrayRef SelIdents, - bool AtArgumentExpression) { +void Sema::CodeCompleteObjCSuperMessage( + Scope *S, SourceLocation SuperLoc, + ArrayRef SelIdents, bool AtArgumentExpression) { ObjCInterfaceDecl *CDecl = nullptr; if (ObjCMethodDecl *CurMethod = getCurMethodDecl()) { // Figure out which interface we're in. @@ -8059,7 +8059,7 @@ void Sema::CodeCompleteObjCSuperMessage(Scope *S, SourceLocation SuperLoc, } else { // "super" may be the name of a type or variable. Figure out which // it is. - IdentifierInfo *Super = getSuperIdentifier(); + const IdentifierInfo *Super = getSuperIdentifier(); NamedDecl *ND = LookupSingleName(S, Super, SuperLoc, LookupOrdinaryName); if ((CDecl = dyn_cast_or_null(ND))) { // "super" names an interface. Use it. @@ -8127,11 +8127,11 @@ static QualType getPreferredArgumentTypeForMessageSend(ResultBuilder &Results, return PreferredType; } -static void AddClassMessageCompletions(Sema &SemaRef, Scope *S, - ParsedType Receiver, - ArrayRef SelIdents, - bool AtArgumentExpression, bool IsSuper, - ResultBuilder &Results) { +static void +AddClassMessageCompletions(Sema &SemaRef, Scope *S, ParsedType Receiver, + ArrayRef SelIdents, + bool AtArgumentExpression, bool IsSuper, + ResultBuilder &Results) { typedef CodeCompletionResult Result; ObjCInterfaceDecl *CDecl = nullptr; @@ -8202,10 +8202,9 @@ static void AddClassMessageCompletions(Sema &SemaRef, Scope *S, Results.ExitScope(); } -void Sema::CodeCompleteObjCClassMessage(Scope *S, ParsedType Receiver, - ArrayRef SelIdents, - bool AtArgumentExpression, - bool IsSuper) { +void Sema::CodeCompleteObjCClassMessage( + Scope *S, ParsedType Receiver, ArrayRef SelIdents, + bool AtArgumentExpression, bool IsSuper) { QualType T = this->GetTypeFromParser(Receiver); @@ -8237,10 +8236,9 @@ void Sema::CodeCompleteObjCClassMessage(Scope *S, ParsedType Receiver, Results.data(), Results.size()); } -void Sema::CodeCompleteObjCInstanceMessage(Scope *S, Expr *Receiver, - ArrayRef SelIdents, - bool AtArgumentExpression, - ObjCInterfaceDecl *Super) { +void Sema::CodeCompleteObjCInstanceMessage( + Scope *S, Expr *Receiver, ArrayRef SelIdents, + bool AtArgumentExpression, ObjCInterfaceDecl *Super) { typedef CodeCompletionResult Result; Expr *RecExpr = static_cast(Receiver); @@ -8410,8 +8408,8 @@ void Sema::CodeCompleteObjCForCollection(Scope *S, CodeCompleteExpression(S, Data); } -void Sema::CodeCompleteObjCSelector(Scope *S, - ArrayRef SelIdents) { +void Sema::CodeCompleteObjCSelector( + Scope *S, ArrayRef SelIdents) { // If we have an external source, load the entire class method // pool from the AST file. if (ExternalSource) { @@ -9166,8 +9164,8 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, // Add -(void)getKey:(type **)buffer range:(NSRange)inRange if (IsInstanceMethod && ReturnTypeMatchesVoid) { std::string SelectorName = (Twine("get") + UpperKey).str(); - IdentifierInfo *SelectorIds[2] = {&Context.Idents.get(SelectorName), - &Context.Idents.get("range")}; + const IdentifierInfo *SelectorIds[2] = {&Context.Idents.get(SelectorName), + &Context.Idents.get("range")}; if (KnownSelectors.insert(Selectors.getSelector(2, SelectorIds)).second) { if (ReturnType.isNull()) { @@ -9198,8 +9196,8 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, // - (void)insertObject:(type *)object inKeyAtIndex:(NSUInteger)index if (IsInstanceMethod && ReturnTypeMatchesVoid) { std::string SelectorName = (Twine("in") + UpperKey + "AtIndex").str(); - IdentifierInfo *SelectorIds[2] = {&Context.Idents.get("insertObject"), - &Context.Idents.get(SelectorName)}; + const IdentifierInfo *SelectorIds[2] = {&Context.Idents.get("insertObject"), + &Context.Idents.get(SelectorName)}; if (KnownSelectors.insert(Selectors.getSelector(2, SelectorIds)).second) { if (ReturnType.isNull()) { @@ -9228,8 +9226,8 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, // - (void)insertKey:(NSArray *)array atIndexes:(NSIndexSet *)indexes if (IsInstanceMethod && ReturnTypeMatchesVoid) { std::string SelectorName = (Twine("insert") + UpperKey).str(); - IdentifierInfo *SelectorIds[2] = {&Context.Idents.get(SelectorName), - &Context.Idents.get("atIndexes")}; + const IdentifierInfo *SelectorIds[2] = {&Context.Idents.get(SelectorName), + &Context.Idents.get("atIndexes")}; if (KnownSelectors.insert(Selectors.getSelector(2, SelectorIds)).second) { if (ReturnType.isNull()) { @@ -9258,7 +9256,7 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, if (IsInstanceMethod && ReturnTypeMatchesVoid) { std::string SelectorName = (Twine("removeObjectFrom") + UpperKey + "AtIndex").str(); - IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); + const IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); if (KnownSelectors.insert(Selectors.getUnarySelector(SelectorId)).second) { if (ReturnType.isNull()) { Builder.AddChunk(CodeCompletionString::CK_LeftParen); @@ -9279,7 +9277,7 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, // -(void)removeKeyAtIndexes:(NSIndexSet *)indexes if (IsInstanceMethod && ReturnTypeMatchesVoid) { std::string SelectorName = (Twine("remove") + UpperKey + "AtIndexes").str(); - IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); + const IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); if (KnownSelectors.insert(Selectors.getUnarySelector(SelectorId)).second) { if (ReturnType.isNull()) { Builder.AddChunk(CodeCompletionString::CK_LeftParen); @@ -9301,8 +9299,8 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, if (IsInstanceMethod && ReturnTypeMatchesVoid) { std::string SelectorName = (Twine("replaceObjectIn") + UpperKey + "AtIndex").str(); - IdentifierInfo *SelectorIds[2] = {&Context.Idents.get(SelectorName), - &Context.Idents.get("withObject")}; + const IdentifierInfo *SelectorIds[2] = {&Context.Idents.get(SelectorName), + &Context.Idents.get("withObject")}; if (KnownSelectors.insert(Selectors.getSelector(2, SelectorIds)).second) { if (ReturnType.isNull()) { @@ -9332,8 +9330,8 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, std::string SelectorName1 = (Twine("replace") + UpperKey + "AtIndexes").str(); std::string SelectorName2 = (Twine("with") + UpperKey).str(); - IdentifierInfo *SelectorIds[2] = {&Context.Idents.get(SelectorName1), - &Context.Idents.get(SelectorName2)}; + const IdentifierInfo *SelectorIds[2] = {&Context.Idents.get(SelectorName1), + &Context.Idents.get(SelectorName2)}; if (KnownSelectors.insert(Selectors.getSelector(2, SelectorIds)).second) { if (ReturnType.isNull()) { @@ -9368,7 +9366,7 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, ->getInterfaceDecl() ->getName() == "NSEnumerator"))) { std::string SelectorName = (Twine("enumeratorOf") + UpperKey).str(); - IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); + const IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); if (KnownSelectors.insert(Selectors.getNullarySelector(SelectorId)) .second) { if (ReturnType.isNull()) { @@ -9387,7 +9385,7 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, if (IsInstanceMethod && (ReturnType.isNull() || ReturnType->isObjCObjectPointerType())) { std::string SelectorName = (Twine("memberOf") + UpperKey).str(); - IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); + const IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); if (KnownSelectors.insert(Selectors.getUnarySelector(SelectorId)).second) { if (ReturnType.isNull()) { Builder.AddChunk(CodeCompletionString::CK_LeftParen); @@ -9417,7 +9415,7 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, if (IsInstanceMethod && ReturnTypeMatchesVoid) { std::string SelectorName = (Twine("add") + UpperKey + Twine("Object")).str(); - IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); + const IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); if (KnownSelectors.insert(Selectors.getUnarySelector(SelectorId)).second) { if (ReturnType.isNull()) { Builder.AddChunk(CodeCompletionString::CK_LeftParen); @@ -9439,7 +9437,7 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, // - (void)addKey:(NSSet *)objects if (IsInstanceMethod && ReturnTypeMatchesVoid) { std::string SelectorName = (Twine("add") + UpperKey).str(); - IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); + const IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); if (KnownSelectors.insert(Selectors.getUnarySelector(SelectorId)).second) { if (ReturnType.isNull()) { Builder.AddChunk(CodeCompletionString::CK_LeftParen); @@ -9461,7 +9459,7 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, if (IsInstanceMethod && ReturnTypeMatchesVoid) { std::string SelectorName = (Twine("remove") + UpperKey + Twine("Object")).str(); - IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); + const IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); if (KnownSelectors.insert(Selectors.getUnarySelector(SelectorId)).second) { if (ReturnType.isNull()) { Builder.AddChunk(CodeCompletionString::CK_LeftParen); @@ -9483,7 +9481,7 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, // - (void)removeKey:(NSSet *)objects if (IsInstanceMethod && ReturnTypeMatchesVoid) { std::string SelectorName = (Twine("remove") + UpperKey).str(); - IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); + const IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); if (KnownSelectors.insert(Selectors.getUnarySelector(SelectorId)).second) { if (ReturnType.isNull()) { Builder.AddChunk(CodeCompletionString::CK_LeftParen); @@ -9504,7 +9502,7 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, // - (void)intersectKey:(NSSet *)objects if (IsInstanceMethod && ReturnTypeMatchesVoid) { std::string SelectorName = (Twine("intersect") + UpperKey).str(); - IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); + const IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); if (KnownSelectors.insert(Selectors.getUnarySelector(SelectorId)).second) { if (ReturnType.isNull()) { Builder.AddChunk(CodeCompletionString::CK_LeftParen); @@ -9533,7 +9531,7 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, ->getName() == "NSSet"))) { std::string SelectorName = (Twine("keyPathsForValuesAffecting") + UpperKey).str(); - IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); + const IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); if (KnownSelectors.insert(Selectors.getNullarySelector(SelectorId)) .second) { if (ReturnType.isNull()) { @@ -9554,7 +9552,7 @@ static void AddObjCKeyValueCompletions(ObjCPropertyDecl *Property, ReturnType->isBooleanType())) { std::string SelectorName = (Twine("automaticallyNotifiesObserversOf") + UpperKey).str(); - IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); + const IdentifierInfo *SelectorId = &Context.Idents.get(SelectorName); if (KnownSelectors.insert(Selectors.getNullarySelector(SelectorId)) .second) { if (ReturnType.isNull()) { @@ -9749,7 +9747,7 @@ void Sema::CodeCompleteObjCMethodDecl(Scope *S, void Sema::CodeCompleteObjCMethodDeclSelector( Scope *S, bool IsInstanceMethod, bool AtParameterName, ParsedType ReturnTy, - ArrayRef SelIdents) { + ArrayRef SelIdents) { // If we have an external source, load the entire class method // pool from the AST file. if (ExternalSource) { diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 8472aaeb6bad9..5a23179dfbbf4 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2318,7 +2318,7 @@ void Sema::ActOnPopScope(SourceLocation Loc, Scope *S) { /// /// \returns The declaration of the named Objective-C class, or NULL if the /// class could not be found. -ObjCInterfaceDecl *Sema::getObjCInterfaceDecl(IdentifierInfo *&Id, +ObjCInterfaceDecl *Sema::getObjCInterfaceDecl(const IdentifierInfo *&Id, SourceLocation IdLoc, bool DoTypoCorrection) { // The third "scope" argument is 0 since we aren't enabling lazy built-in @@ -15307,7 +15307,7 @@ Decl *Sema::ActOnParamDeclarator(Scope *S, Declarator &D, QualType parmDeclType = TInfo->getType(); // Check for redeclaration of parameters, e.g. int foo(int x, int x); - IdentifierInfo *II = D.getIdentifier(); + const IdentifierInfo *II = D.getIdentifier(); if (II) { LookupResult R(*this, II, D.getIdentifierLoc(), LookupOrdinaryName, ForVisibleRedeclaration); @@ -15459,9 +15459,9 @@ QualType Sema::AdjustParameterTypeForObjCAutoRefCount(QualType T, } ParmVarDecl *Sema::CheckParameter(DeclContext *DC, SourceLocation StartLoc, - SourceLocation NameLoc, IdentifierInfo *Name, - QualType T, TypeSourceInfo *TSInfo, - StorageClass SC) { + SourceLocation NameLoc, + const IdentifierInfo *Name, QualType T, + TypeSourceInfo *TSInfo, StorageClass SC) { // In ARC, infer a lifetime qualifier for appropriate parameter types. if (getLangOpts().ObjCAutoRefCount && T.getObjCLifetime() == Qualifiers::OCL_None && @@ -18551,8 +18551,9 @@ void Sema::ActOnTagDefinitionError(Scope *S, Decl *TagD) { // Note that FieldName may be null for anonymous bitfields. ExprResult Sema::VerifyBitField(SourceLocation FieldLoc, - IdentifierInfo *FieldName, QualType FieldTy, - bool IsMsStruct, Expr *BitWidth) { + const IdentifierInfo *FieldName, + QualType FieldTy, bool IsMsStruct, + Expr *BitWidth) { assert(BitWidth); if (BitWidth->containsErrors()) return ExprError(); @@ -18661,7 +18662,7 @@ FieldDecl *Sema::HandleField(Scope *S, RecordDecl *Record, return nullptr; } - IdentifierInfo *II = D.getIdentifier(); + const IdentifierInfo *II = D.getIdentifier(); SourceLocation Loc = DeclStart; if (II) Loc = D.getIdentifierLoc(); @@ -18762,7 +18763,7 @@ FieldDecl *Sema::CheckFieldDecl(DeclarationName Name, QualType T, SourceLocation TSSL, AccessSpecifier AS, NamedDecl *PrevDecl, Declarator *D) { - IdentifierInfo *II = Name.getAsIdentifierInfo(); + const IdentifierInfo *II = Name.getAsIdentifierInfo(); bool InvalidDecl = false; if (D) InvalidDecl = D->isInvalidType(); @@ -19022,7 +19023,7 @@ TranslateIvarVisibility(tok::ObjCKeywordKind ivarVisibility) { Decl *Sema::ActOnIvar(Scope *S, SourceLocation DeclStart, Declarator &D, Expr *BitWidth, tok::ObjCKeywordKind Visibility) { - IdentifierInfo *II = D.getIdentifier(); + const IdentifierInfo *II = D.getIdentifier(); SourceLocation Loc = DeclStart; if (II) Loc = D.getIdentifierLoc(); diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 068a2e4f04fa5..858951580ea45 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -16913,11 +16913,10 @@ Decl *Sema::ActOnEmptyDeclaration(Scope *S, /// Perform semantic analysis for the variable declaration that /// occurs within a C++ catch clause, returning the newly-created /// variable. -VarDecl *Sema::BuildExceptionDeclaration(Scope *S, - TypeSourceInfo *TInfo, +VarDecl *Sema::BuildExceptionDeclaration(Scope *S, TypeSourceInfo *TInfo, SourceLocation StartLoc, SourceLocation Loc, - IdentifierInfo *Name) { + const IdentifierInfo *Name) { bool Invalid = false; QualType ExDeclType = TInfo->getType(); @@ -17062,7 +17061,7 @@ Decl *Sema::ActOnExceptionDeclarator(Scope *S, Declarator &D) { Invalid = true; } - IdentifierInfo *II = D.getIdentifier(); + const IdentifierInfo *II = D.getIdentifier(); if (NamedDecl *PrevDecl = LookupSingleName(S, II, D.getIdentifierLoc(), LookupOrdinaryName, ForVisibleRedeclaration)) { @@ -19158,7 +19157,7 @@ MSPropertyDecl *Sema::HandleMSProperty(Scope *S, RecordDecl *Record, InClassInitStyle InitStyle, AccessSpecifier AS, const ParsedAttr &MSPropertyAttr) { - IdentifierInfo *II = D.getIdentifier(); + const IdentifierInfo *II = D.getIdentifier(); if (!II) { Diag(DeclStart, diag::err_anonymous_property); return nullptr; diff --git a/clang/lib/Sema/SemaDeclObjC.cpp b/clang/lib/Sema/SemaDeclObjC.cpp index 94a245f0f905f..74d6f0700b0e4 100644 --- a/clang/lib/Sema/SemaDeclObjC.cpp +++ b/clang/lib/Sema/SemaDeclObjC.cpp @@ -1818,9 +1818,9 @@ Sema::ActOnForwardProtocolDeclaration(SourceLocation AtProtocolLoc, } ObjCCategoryDecl *Sema::ActOnStartCategoryInterface( - SourceLocation AtInterfaceLoc, IdentifierInfo *ClassName, + SourceLocation AtInterfaceLoc, const IdentifierInfo *ClassName, SourceLocation ClassLoc, ObjCTypeParamList *typeParamList, - IdentifierInfo *CategoryName, SourceLocation CategoryLoc, + const IdentifierInfo *CategoryName, SourceLocation CategoryLoc, Decl *const *ProtoRefs, unsigned NumProtoRefs, const SourceLocation *ProtoLocs, SourceLocation EndProtoLoc, const ParsedAttributesView &AttrList) { @@ -1916,9 +1916,9 @@ ObjCCategoryDecl *Sema::ActOnStartCategoryInterface( /// category implementation declaration and build an ObjCCategoryImplDecl /// object. ObjCCategoryImplDecl *Sema::ActOnStartCategoryImplementation( - SourceLocation AtCatImplLoc, IdentifierInfo *ClassName, - SourceLocation ClassLoc, IdentifierInfo *CatName, SourceLocation CatLoc, - const ParsedAttributesView &Attrs) { + SourceLocation AtCatImplLoc, const IdentifierInfo *ClassName, + SourceLocation ClassLoc, const IdentifierInfo *CatName, + SourceLocation CatLoc, const ParsedAttributesView &Attrs) { ObjCInterfaceDecl *IDecl = getObjCInterfaceDecl(ClassName, ClassLoc, true); ObjCCategoryDecl *CatIDecl = nullptr; if (IDecl && IDecl->hasDefinition()) { @@ -1982,8 +1982,8 @@ ObjCCategoryImplDecl *Sema::ActOnStartCategoryImplementation( } ObjCImplementationDecl *Sema::ActOnStartClassImplementation( - SourceLocation AtClassImplLoc, IdentifierInfo *ClassName, - SourceLocation ClassLoc, IdentifierInfo *SuperClassname, + SourceLocation AtClassImplLoc, const IdentifierInfo *ClassName, + SourceLocation ClassLoc, const IdentifierInfo *SuperClassname, SourceLocation SuperClassLoc, const ParsedAttributesView &Attrs) { ObjCInterfaceDecl *IDecl = nullptr; // Check for another declaration kind with the same name. @@ -2751,7 +2751,7 @@ static void CheckProtocolMethodDefs( // implemented in the class, we should not issue "Method definition not // found" warnings. // FIXME: Use a general GetUnarySelector method for this. - IdentifierInfo* II = &S.Context.Idents.get("forwardInvocation"); + const IdentifierInfo *II = &S.Context.Idents.get("forwardInvocation"); Selector fISelector = S.Context.Selectors.getSelector(1, &II); if (InsMap.count(fISelector)) // Is IDecl derived from 'NSProxy'? If so, no instance methods @@ -5105,8 +5105,8 @@ bool Sema::CheckObjCDeclScope(Decl *D) { /// Called whenever \@defs(ClassName) is encountered in the source. Inserts the /// instance variables of ClassName into Decls. void Sema::ActOnDefs(Scope *S, Decl *TagD, SourceLocation DeclStart, - IdentifierInfo *ClassName, - SmallVectorImpl &Decls) { + const IdentifierInfo *ClassName, + SmallVectorImpl &Decls) { // Check that ClassName is a valid class ObjCInterfaceDecl *Class = getObjCInterfaceDecl(ClassName, DeclStart); if (!Class) { @@ -5148,8 +5148,7 @@ void Sema::ActOnDefs(Scope *S, Decl *TagD, SourceLocation DeclStart, VarDecl *Sema::BuildObjCExceptionDecl(TypeSourceInfo *TInfo, QualType T, SourceLocation StartLoc, SourceLocation IdLoc, - IdentifierInfo *Id, - bool Invalid) { + const IdentifierInfo *Id, bool Invalid) { // ISO/IEC TR 18037 S6.7.3: "The type of an object with automatic storage // duration shall not be qualified by an address-space qualifier." // Since all parameters have automatic store duration, they can not have diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 7b9b8f149d9ed..12f42f66e5e21 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -57,7 +57,7 @@ using namespace sema; /// name of the corresponding type. ParsedType Sema::getInheritingConstructorName(CXXScopeSpec &SS, SourceLocation NameLoc, - IdentifierInfo &Name) { + const IdentifierInfo &Name) { NestedNameSpecifier *NNS = SS.getScopeRep(); // Convert the nested-name-specifier into a type. @@ -89,10 +89,9 @@ ParsedType Sema::getInheritingConstructorName(CXXScopeSpec &SS, Context.getTrivialTypeSourceInfo(Type, NameLoc)); } -ParsedType Sema::getConstructorName(IdentifierInfo &II, - SourceLocation NameLoc, - Scope *S, CXXScopeSpec &SS, - bool EnteringContext) { +ParsedType Sema::getConstructorName(const IdentifierInfo &II, + SourceLocation NameLoc, Scope *S, + CXXScopeSpec &SS, bool EnteringContext) { CXXRecordDecl *CurClass = getCurrentClass(S, &SS); assert(CurClass && &II == CurClass->getIdentifier() && "not a constructor name"); @@ -140,9 +139,9 @@ ParsedType Sema::getConstructorName(IdentifierInfo &II, return ParsedType::make(T); } -ParsedType Sema::getDestructorName(IdentifierInfo &II, SourceLocation NameLoc, - Scope *S, CXXScopeSpec &SS, - ParsedType ObjectTypePtr, +ParsedType Sema::getDestructorName(const IdentifierInfo &II, + SourceLocation NameLoc, Scope *S, + CXXScopeSpec &SS, ParsedType ObjectTypePtr, bool EnteringContext) { // Determine where to perform name lookup. @@ -500,7 +499,7 @@ bool Sema::checkLiteralOperatorId(const CXXScopeSpec &SS, // // double operator""_Bq(long double); // OK: not a reserved identifier // double operator"" _Bq(long double); // ill-formed, no diagnostic required - IdentifierInfo *II = Name.Identifier; + const IdentifierInfo *II = Name.Identifier; ReservedIdentifierStatus Status = II->isReserved(PP.getLangOpts()); SourceLocation Loc = Name.getEndLoc(); if (!PP.getSourceManager().isInSystemHeader(Loc)) { @@ -9178,10 +9177,9 @@ concepts::Requirement *Sema::ActOnSimpleRequirement(Expr *E) { /*ReturnTypeRequirement=*/{}); } -concepts::Requirement * -Sema::ActOnTypeRequirement(SourceLocation TypenameKWLoc, CXXScopeSpec &SS, - SourceLocation NameLoc, IdentifierInfo *TypeName, - TemplateIdAnnotation *TemplateId) { +concepts::Requirement *Sema::ActOnTypeRequirement( + SourceLocation TypenameKWLoc, CXXScopeSpec &SS, SourceLocation NameLoc, + const IdentifierInfo *TypeName, TemplateIdAnnotation *TemplateId) { assert(((!TypeName && TemplateId) || (TypeName && !TemplateId)) && "Exactly one of TypeName and TemplateId must be specified."); TypeSourceInfo *TSI = nullptr; diff --git a/clang/lib/Sema/SemaExprObjC.cpp b/clang/lib/Sema/SemaExprObjC.cpp index a8853f634c9cc..3148f0db6e20c 100644 --- a/clang/lib/Sema/SemaExprObjC.cpp +++ b/clang/lib/Sema/SemaExprObjC.cpp @@ -663,10 +663,8 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) { } if (!ValueWithBytesObjCTypeMethod) { - IdentifierInfo *II[] = { - &Context.Idents.get("valueWithBytes"), - &Context.Idents.get("objCType") - }; + const IdentifierInfo *II[] = {&Context.Idents.get("valueWithBytes"), + &Context.Idents.get("objCType")}; Selector ValueWithBytesObjCType = Context.Selectors.getSelector(2, II); // Look for the appropriate method within NSValue. @@ -2155,13 +2153,12 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT, return ExprError(); } -ExprResult Sema:: -ActOnClassPropertyRefExpr(IdentifierInfo &receiverName, - IdentifierInfo &propertyName, - SourceLocation receiverNameLoc, - SourceLocation propertyNameLoc) { +ExprResult Sema::ActOnClassPropertyRefExpr(const IdentifierInfo &receiverName, + const IdentifierInfo &propertyName, + SourceLocation receiverNameLoc, + SourceLocation propertyNameLoc) { - IdentifierInfo *receiverNamePtr = &receiverName; + const IdentifierInfo *receiverNamePtr = &receiverName; ObjCInterfaceDecl *IFace = getObjCInterfaceDecl(receiverNamePtr, receiverNameLoc); diff --git a/clang/lib/Sema/SemaObjCProperty.cpp b/clang/lib/Sema/SemaObjCProperty.cpp index f9e1ad0121e2a..222a65a13dd0b 100644 --- a/clang/lib/Sema/SemaObjCProperty.cpp +++ b/clang/lib/Sema/SemaObjCProperty.cpp @@ -419,7 +419,7 @@ Sema::HandlePropertyInClassExtension(Scope *S, ObjCCategoryDecl *CDecl = cast(CurContext); // Diagnose if this property is already in continuation class. DeclContext *DC = CurContext; - IdentifierInfo *PropertyId = FD.D.getIdentifier(); + const IdentifierInfo *PropertyId = FD.D.getIdentifier(); ObjCInterfaceDecl *CCPrimary = CDecl->getClassInterface(); // We need to look in the @interface to see if the @property was @@ -571,7 +571,7 @@ ObjCPropertyDecl *Sema::CreatePropertyDecl(Scope *S, TypeSourceInfo *TInfo, tok::ObjCKeywordKind MethodImplKind, DeclContext *lexicalDC){ - IdentifierInfo *PropertyId = FD.D.getIdentifier(); + const IdentifierInfo *PropertyId = FD.D.getIdentifier(); // Property defaults to 'assign' if it is readwrite, unless this is ARC // and the type is retainable. diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index c814535ad6bdb..e9efb4721133f 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -7375,7 +7375,7 @@ void Sema::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( llvm::omp::TraitProperty::implementation_extension_allow_templates)) return; - IdentifierInfo *BaseII = D.getIdentifier(); + const IdentifierInfo *BaseII = D.getIdentifier(); LookupResult Lookup(*this, DeclarationName(BaseII), D.getIdentifierLoc(), LookupOrdinaryName); LookupParsedName(Lookup, S, &D.getCXXScopeSpec()); diff --git a/clang/lib/Sema/SemaPseudoObject.cpp b/clang/lib/Sema/SemaPseudoObject.cpp index 528c261c4a297..82774760b34d4 100644 --- a/clang/lib/Sema/SemaPseudoObject.cpp +++ b/clang/lib/Sema/SemaPseudoObject.cpp @@ -613,9 +613,9 @@ bool ObjCPropertyOpBuilder::findGetter() { // Must build the getter selector the hard way. ObjCMethodDecl *setter = RefExpr->getImplicitPropertySetter(); assert(setter && "both setter and getter are null - cannot happen"); - IdentifierInfo *setterName = - setter->getSelector().getIdentifierInfoForSlot(0); - IdentifierInfo *getterName = + const IdentifierInfo *setterName = + setter->getSelector().getIdentifierInfoForSlot(0); + const IdentifierInfo *getterName = &S.Context.Idents.get(setterName->getName().substr(3)); GetterSelector = S.PP.getSelectorTable().getNullarySelector(getterName); @@ -640,9 +640,9 @@ bool ObjCPropertyOpBuilder::findSetter(bool warn) { SetterSelector = setter->getSelector(); return true; } else { - IdentifierInfo *getterName = - RefExpr->getImplicitPropertyGetter()->getSelector() - .getIdentifierInfoForSlot(0); + const IdentifierInfo *getterName = RefExpr->getImplicitPropertyGetter() + ->getSelector() + .getIdentifierInfoForSlot(0); SetterSelector = SelectorTable::constructSetterSelector(S.PP.getIdentifierTable(), S.PP.getSelectorTable(), @@ -667,7 +667,8 @@ bool ObjCPropertyOpBuilder::findSetter(bool warn) { front = isLowercase(front) ? toUppercase(front) : toLowercase(front); SmallString<100> PropertyName = thisPropertyName; PropertyName[0] = front; - IdentifierInfo *AltMember = &S.PP.getIdentifierTable().get(PropertyName); + const IdentifierInfo *AltMember = + &S.PP.getIdentifierTable().get(PropertyName); if (ObjCPropertyDecl *prop1 = IFace->FindPropertyDeclaration( AltMember, prop->getQueryKind())) if (prop != prop1 && (prop1->getSetterMethodDecl() == setter)) { @@ -1126,9 +1127,8 @@ static void CheckKeyForObjCARCConversion(Sema &S, QualType ContainerT, return; // dictionary subscripting. // - (id)objectForKeyedSubscript:(id)key; - IdentifierInfo *KeyIdents[] = { - &S.Context.Idents.get("objectForKeyedSubscript") - }; + const IdentifierInfo *KeyIdents[] = { + &S.Context.Idents.get("objectForKeyedSubscript")}; Selector GetterSelector = S.Context.Selectors.getSelector(1, KeyIdents); ObjCMethodDecl *Getter = S.LookupMethodInObjectType(GetterSelector, ContainerT, true /*instance*/); @@ -1169,16 +1169,14 @@ bool ObjCSubscriptOpBuilder::findAtIndexGetter() { if (!arrayRef) { // dictionary subscripting. // - (id)objectForKeyedSubscript:(id)key; - IdentifierInfo *KeyIdents[] = { - &S.Context.Idents.get("objectForKeyedSubscript") - }; + const IdentifierInfo *KeyIdents[] = { + &S.Context.Idents.get("objectForKeyedSubscript")}; AtIndexGetterSelector = S.Context.Selectors.getSelector(1, KeyIdents); } else { // - (id)objectAtIndexedSubscript:(size_t)index; - IdentifierInfo *KeyIdents[] = { - &S.Context.Idents.get("objectAtIndexedSubscript") - }; + const IdentifierInfo *KeyIdents[] = { + &S.Context.Idents.get("objectAtIndexedSubscript")}; AtIndexGetterSelector = S.Context.Selectors.getSelector(1, KeyIdents); } @@ -1274,18 +1272,16 @@ bool ObjCSubscriptOpBuilder::findAtIndexSetter() { if (!arrayRef) { // dictionary subscripting. // - (void)setObject:(id)object forKeyedSubscript:(id)key; - IdentifierInfo *KeyIdents[] = { - &S.Context.Idents.get("setObject"), - &S.Context.Idents.get("forKeyedSubscript") - }; + const IdentifierInfo *KeyIdents[] = { + &S.Context.Idents.get("setObject"), + &S.Context.Idents.get("forKeyedSubscript")}; AtIndexSetterSelector = S.Context.Selectors.getSelector(2, KeyIdents); } else { // - (void)setObject:(id)object atIndexedSubscript:(NSInteger)index; - IdentifierInfo *KeyIdents[] = { - &S.Context.Idents.get("setObject"), - &S.Context.Idents.get("atIndexedSubscript") - }; + const IdentifierInfo *KeyIdents[] = { + &S.Context.Idents.get("setObject"), + &S.Context.Idents.get("atIndexedSubscript")}; AtIndexSetterSelector = S.Context.Selectors.getSelector(2, KeyIdents); } AtIndexSetter = S.LookupMethodInObjectType(AtIndexSetterSelector, ResultType, @@ -1474,7 +1470,7 @@ ExprResult MSPropertyOpBuilder::buildGet() { } UnqualifiedId GetterName; - IdentifierInfo *II = RefExpr->getPropertyDecl()->getGetterId(); + const IdentifierInfo *II = RefExpr->getPropertyDecl()->getGetterId(); GetterName.setIdentifier(II, RefExpr->getMemberLoc()); CXXScopeSpec SS; SS.Adopt(RefExpr->getQualifierLoc()); @@ -1503,7 +1499,7 @@ ExprResult MSPropertyOpBuilder::buildSet(Expr *op, SourceLocation sl, } UnqualifiedId SetterName; - IdentifierInfo *II = RefExpr->getPropertyDecl()->getSetterId(); + const IdentifierInfo *II = RefExpr->getPropertyDecl()->getSetterId(); SetterName.setIdentifier(II, RefExpr->getMemberLoc()); CXXScopeSpec SS; SS.Adopt(RefExpr->getQualifierLoc()); diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index e72397adec24f..e53c76e65b03d 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -2275,11 +2275,9 @@ Sema::CheckObjCForCollectionOperand(SourceLocation forLoc, Expr *collection) { // Otherwise, if we have any useful type information, check that // the type declares the appropriate method. } else if (iface || !objectType->qual_empty()) { - IdentifierInfo *selectorIdents[] = { - &Context.Idents.get("countByEnumeratingWithState"), - &Context.Idents.get("objects"), - &Context.Idents.get("count") - }; + const IdentifierInfo *selectorIdents[] = { + &Context.Idents.get("countByEnumeratingWithState"), + &Context.Idents.get("objects"), &Context.Idents.get("count")}; Selector selector = Context.Selectors.getSelector(3, &selectorIdents[0]); ObjCMethodDecl *method = nullptr; diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 2013799b5eb81..951e5a31cab3b 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -970,7 +970,7 @@ void Sema::translateTemplateArguments(const ASTTemplateArgsPtr &TemplateArgsIn, static void maybeDiagnoseTemplateParameterShadow(Sema &SemaRef, Scope *S, SourceLocation Loc, - IdentifierInfo *Name) { + const IdentifierInfo *Name) { NamedDecl *PrevDecl = SemaRef.LookupSingleName( S, Name, Loc, Sema::LookupOrdinaryName, Sema::ForVisibleRedeclaration); if (PrevDecl && PrevDecl->isTemplateParameter()) @@ -1578,7 +1578,7 @@ NamedDecl *Sema::ActOnNonTypeTemplateParameter(Scope *S, Declarator &D, CheckFunctionOrTemplateParamDeclarator(S, D); - IdentifierInfo *ParamName = D.getIdentifier(); + const IdentifierInfo *ParamName = D.getIdentifier(); bool IsParameterPack = D.hasEllipsis(); NonTypeTemplateParmDecl *Param = NonTypeTemplateParmDecl::Create( Context, Context.getTranslationUnitDecl(), D.getBeginLoc(), @@ -4702,7 +4702,7 @@ bool Sema::resolveAssumedTemplateNameAsType(Scope *S, TemplateName &Name, TypeResult Sema::ActOnTemplateIdType( Scope *S, CXXScopeSpec &SS, SourceLocation TemplateKWLoc, - TemplateTy TemplateD, IdentifierInfo *TemplateII, + TemplateTy TemplateD, const IdentifierInfo *TemplateII, SourceLocation TemplateIILoc, SourceLocation LAngleLoc, ASTTemplateArgsPtr TemplateArgsIn, SourceLocation RAngleLoc, bool IsCtorOrDtorName, bool IsClassName, @@ -9684,10 +9684,9 @@ Decl *Sema::ActOnTemplateDeclarator(Scope *S, return NewDecl; } -Decl *Sema::ActOnConceptDefinition(Scope *S, - MultiTemplateParamsArg TemplateParameterLists, - IdentifierInfo *Name, SourceLocation NameLoc, - Expr *ConstraintExpr) { +Decl *Sema::ActOnConceptDefinition( + Scope *S, MultiTemplateParamsArg TemplateParameterLists, + const IdentifierInfo *Name, SourceLocation NameLoc, Expr *ConstraintExpr) { DeclContext *DC = CurContext; if (!DC->getRedeclContext()->isFileContext()) { @@ -11511,10 +11510,11 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S, return (Decl*) nullptr; } -TypeResult -Sema::ActOnDependentTag(Scope *S, unsigned TagSpec, TagUseKind TUK, - const CXXScopeSpec &SS, IdentifierInfo *Name, - SourceLocation TagLoc, SourceLocation NameLoc) { +TypeResult Sema::ActOnDependentTag(Scope *S, unsigned TagSpec, TagUseKind TUK, + const CXXScopeSpec &SS, + const IdentifierInfo *Name, + SourceLocation TagLoc, + SourceLocation NameLoc) { // This has to hold, because SS is expected to be defined. assert(Name && "Expected a name in a dependent tag"); @@ -11574,14 +11574,10 @@ TypeResult Sema::ActOnTypenameType(Scope *S, SourceLocation TypenameLoc, } TypeResult -Sema::ActOnTypenameType(Scope *S, - SourceLocation TypenameLoc, - const CXXScopeSpec &SS, - SourceLocation TemplateKWLoc, - TemplateTy TemplateIn, - IdentifierInfo *TemplateII, - SourceLocation TemplateIILoc, - SourceLocation LAngleLoc, +Sema::ActOnTypenameType(Scope *S, SourceLocation TypenameLoc, + const CXXScopeSpec &SS, SourceLocation TemplateKWLoc, + TemplateTy TemplateIn, const IdentifierInfo *TemplateII, + SourceLocation TemplateIILoc, SourceLocation LAngleLoc, ASTTemplateArgsPtr TemplateArgsIn, SourceLocation RAngleLoc) { if (TypenameLoc.isValid() && S && !S->getTemplateParamParent()) @@ -11657,7 +11653,6 @@ Sema::ActOnTypenameType(Scope *S, return CreateParsedType(T, TSI); } - /// Determine whether this failed name lookup should be treated as being /// disabled by a usage of std::enable_if. static bool isEnableIf(NestedNameSpecifierLoc NNS, const IdentifierInfo &II, diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp index 6110e287b7fb5..f8d54c0c39890 100644 --- a/clang/lib/Serialization/ASTCommon.cpp +++ b/clang/lib/Serialization/ASTCommon.cpp @@ -284,7 +284,7 @@ unsigned serialization::ComputeHash(Selector Sel) { ++N; unsigned R = 5381; for (unsigned I = 0; I != N; ++I) - if (IdentifierInfo *II = Sel.getIdentifierInfoForSlot(I)) + if (const IdentifierInfo *II = Sel.getIdentifierInfoForSlot(I)) R = llvm::djbHash(II->getName(), R); return R; } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 0ca7f6600eee3..4f6987f92fc82 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -916,14 +916,14 @@ ASTSelectorLookupTrait::ReadKey(const unsigned char* d, unsigned) { SelectorTable &SelTable = Reader.getContext().Selectors; unsigned N = endian::readNext(d); - IdentifierInfo *FirstII = Reader.getLocalIdentifier( + const IdentifierInfo *FirstII = Reader.getLocalIdentifier( F, endian::readNext(d)); if (N == 0) return SelTable.getNullarySelector(FirstII); else if (N == 1) return SelTable.getUnarySelector(FirstII); - SmallVector Args; + SmallVector Args; Args.push_back(FirstII); for (unsigned I = 1; I != N; ++I) Args.push_back(Reader.getLocalIdentifier( @@ -987,7 +987,7 @@ ASTIdentifierLookupTraitBase::ReadKey(const unsigned char* d, unsigned n) { } /// Whether the given identifier is "interesting". -static bool isInterestingIdentifier(ASTReader &Reader, IdentifierInfo &II, +static bool isInterestingIdentifier(ASTReader &Reader, const IdentifierInfo &II, bool IsModule) { bool IsInteresting = II.getNotableIdentifierID() != tok::NotableIdentifierKind::not_notable || @@ -2229,7 +2229,7 @@ namespace { } // namespace -void ASTReader::updateOutOfDateIdentifier(IdentifierInfo &II) { +void ASTReader::updateOutOfDateIdentifier(const IdentifierInfo &II) { // Note that we are loading an identifier. Deserializing AnIdentifier(this); @@ -2254,11 +2254,11 @@ void ASTReader::updateOutOfDateIdentifier(IdentifierInfo &II) { markIdentifierUpToDate(&II); } -void ASTReader::markIdentifierUpToDate(IdentifierInfo *II) { +void ASTReader::markIdentifierUpToDate(const IdentifierInfo *II) { if (!II) return; - II->setOutOfDate(false); + const_cast(II)->setOutOfDate(false); // Update the generation for this identifier. if (getContext().getLangOpts().Modules) @@ -10168,7 +10168,7 @@ void ASTReader::FinishedDeserializing() { } void ASTReader::pushExternalDeclIntoScope(NamedDecl *D, DeclarationName Name) { - if (IdentifierInfo *II = Name.getAsIdentifierInfo()) { + if (const IdentifierInfo *II = Name.getAsIdentifierInfo()) { // Remove any fake results before adding any real ones. auto It = PendingFakeLookupResults.find(II); if (It != PendingFakeLookupResults.end()) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index d2afe378bb0c3..ffc53292e3912 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3629,7 +3629,7 @@ class ASTIdentifierTableTrait { } public: - using key_type = IdentifierInfo *; + using key_type = const IdentifierInfo *; using key_type_ref = key_type; using data_type = IdentID; @@ -3661,7 +3661,7 @@ class ASTIdentifierTableTrait { } std::pair - EmitKeyDataLength(raw_ostream& Out, IdentifierInfo* II, IdentID ID) { + EmitKeyDataLength(raw_ostream &Out, const IdentifierInfo *II, IdentID ID) { // Record the location of the identifier data. This is used when generating // the mapping from persistent IDs to strings. Writer.SetIdentifierOffset(II, Out.tell()); @@ -3688,13 +3688,12 @@ class ASTIdentifierTableTrait { return emitULEBKeyDataLength(KeyLen, DataLen, Out); } - void EmitKey(raw_ostream& Out, const IdentifierInfo* II, - unsigned KeyLen) { + void EmitKey(raw_ostream &Out, const IdentifierInfo *II, unsigned KeyLen) { Out.write(II->getNameStart(), KeyLen); } - void EmitData(raw_ostream& Out, IdentifierInfo* II, - IdentID ID, unsigned) { + void EmitData(raw_ostream &Out, const IdentifierInfo *II, IdentID ID, + unsigned) { using namespace llvm::support; endian::Writer LE(Out, llvm::endianness::little); @@ -3776,13 +3775,14 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, // for identifiers that appear here for the first time. IdentifierOffsets.resize(NextIdentID - FirstIdentID); for (auto IdentIDPair : IdentifierIDs) { - auto *II = const_cast(IdentIDPair.first); + const IdentifierInfo *II = IdentIDPair.first; IdentID ID = IdentIDPair.second; assert(II && "NULL identifier in identifier table"); + // Write out identifiers if either the ID is local or the identifier has // changed since it was loaded. - if (ID >= FirstIdentID || !Chain || !II->isFromAST() - || II->hasChangedSinceDeserialization() || + if (ID >= FirstIdentID || !Chain || !II->isFromAST() || + II->hasChangedSinceDeserialization() || (Trait.needDecls() && II->hasFETokenInfoChangedSinceDeserialization())) Generator.insert(II, ID, Trait); diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp index 978bc0bb082f8..b4390f0b85bbe 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp @@ -768,8 +768,8 @@ void ObjCDeallocChecker::initIdentifierInfoAndSelectors( Block_releaseII = &Ctx.Idents.get("_Block_release"); CIFilterII = &Ctx.Idents.get("CIFilter"); - IdentifierInfo *DeallocII = &Ctx.Idents.get("dealloc"); - IdentifierInfo *ReleaseII = &Ctx.Idents.get("release"); + const IdentifierInfo *DeallocII = &Ctx.Idents.get("dealloc"); + const IdentifierInfo *ReleaseII = &Ctx.Idents.get("release"); DeallocSel = Ctx.Selectors.getSelector(0, &DeallocII); ReleaseSel = Ctx.Selectors.getSelector(0, &ReleaseII); } diff --git a/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp index 812d787e2e37c..882eb0236a189 100644 --- a/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp @@ -154,11 +154,11 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(UISearchDisplayController, setSearchResultsTitle, 0) NEW_RECEIVER(UITabBarItem) - IdentifierInfo *initWithTitleUITabBarItemTag[] = { + const IdentifierInfo *initWithTitleUITabBarItemTag[] = { &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("image"), &Ctx.Idents.get("tag")}; ADD_METHOD(UITabBarItem, initWithTitleUITabBarItemTag, 3, 0) - IdentifierInfo *initWithTitleUITabBarItemImage[] = { + const IdentifierInfo *initWithTitleUITabBarItemImage[] = { &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("image"), &Ctx.Idents.get("selectedImage")}; ADD_METHOD(UITabBarItem, initWithTitleUITabBarItemImage, 3, 0) @@ -171,7 +171,7 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(NSStatusItem, setToolTip, 0) NEW_RECEIVER(UITableViewRowAction) - IdentifierInfo *rowActionWithStyleUITableViewRowAction[] = { + const IdentifierInfo *rowActionWithStyleUITableViewRowAction[] = { &Ctx.Idents.get("rowActionWithStyle"), &Ctx.Idents.get("title"), &Ctx.Idents.get("handler")}; ADD_METHOD(UITableViewRowAction, rowActionWithStyleUITableViewRowAction, 3, 1) @@ -183,19 +183,19 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { NEW_RECEIVER(NSButton) ADD_UNARY_METHOD(NSButton, setTitle, 0) ADD_UNARY_METHOD(NSButton, setAlternateTitle, 0) - IdentifierInfo *radioButtonWithTitleNSButton[] = { + const IdentifierInfo *radioButtonWithTitleNSButton[] = { &Ctx.Idents.get("radioButtonWithTitle"), &Ctx.Idents.get("target"), &Ctx.Idents.get("action")}; ADD_METHOD(NSButton, radioButtonWithTitleNSButton, 3, 0) - IdentifierInfo *buttonWithTitleNSButtonImage[] = { + const IdentifierInfo *buttonWithTitleNSButtonImage[] = { &Ctx.Idents.get("buttonWithTitle"), &Ctx.Idents.get("image"), &Ctx.Idents.get("target"), &Ctx.Idents.get("action")}; ADD_METHOD(NSButton, buttonWithTitleNSButtonImage, 4, 0) - IdentifierInfo *checkboxWithTitleNSButton[] = { + const IdentifierInfo *checkboxWithTitleNSButton[] = { &Ctx.Idents.get("checkboxWithTitle"), &Ctx.Idents.get("target"), &Ctx.Idents.get("action")}; ADD_METHOD(NSButton, checkboxWithTitleNSButton, 3, 0) - IdentifierInfo *buttonWithTitleNSButtonTarget[] = { + const IdentifierInfo *buttonWithTitleNSButtonTarget[] = { &Ctx.Idents.get("buttonWithTitle"), &Ctx.Idents.get("target"), &Ctx.Idents.get("action")}; ADD_METHOD(NSButton, buttonWithTitleNSButtonTarget, 3, 0) @@ -215,8 +215,8 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(NSTabViewItem, setToolTip, 0) NEW_RECEIVER(NSBrowser) - IdentifierInfo *setTitleNSBrowser[] = {&Ctx.Idents.get("setTitle"), - &Ctx.Idents.get("ofColumn")}; + const IdentifierInfo *setTitleNSBrowser[] = {&Ctx.Idents.get("setTitle"), + &Ctx.Idents.get("ofColumn")}; ADD_METHOD(NSBrowser, setTitleNSBrowser, 2, 0) NEW_RECEIVER(UIAccessibilityElement) @@ -225,14 +225,14 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(UIAccessibilityElement, setAccessibilityValue, 0) NEW_RECEIVER(UIAlertAction) - IdentifierInfo *actionWithTitleUIAlertAction[] = { + const IdentifierInfo *actionWithTitleUIAlertAction[] = { &Ctx.Idents.get("actionWithTitle"), &Ctx.Idents.get("style"), &Ctx.Idents.get("handler")}; ADD_METHOD(UIAlertAction, actionWithTitleUIAlertAction, 3, 0) NEW_RECEIVER(NSPopUpButton) ADD_UNARY_METHOD(NSPopUpButton, addItemWithTitle, 0) - IdentifierInfo *insertItemWithTitleNSPopUpButton[] = { + const IdentifierInfo *insertItemWithTitleNSPopUpButton[] = { &Ctx.Idents.get("insertItemWithTitle"), &Ctx.Idents.get("atIndex")}; ADD_METHOD(NSPopUpButton, insertItemWithTitleNSPopUpButton, 2, 0) ADD_UNARY_METHOD(NSPopUpButton, removeItemWithTitle, 0) @@ -240,7 +240,7 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(NSPopUpButton, setTitle, 0) NEW_RECEIVER(NSTableViewRowAction) - IdentifierInfo *rowActionWithStyleNSTableViewRowAction[] = { + const IdentifierInfo *rowActionWithStyleNSTableViewRowAction[] = { &Ctx.Idents.get("rowActionWithStyle"), &Ctx.Idents.get("title"), &Ctx.Idents.get("handler")}; ADD_METHOD(NSTableViewRowAction, rowActionWithStyleNSTableViewRowAction, 3, 1) @@ -273,10 +273,10 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(NSTableColumn, setHeaderToolTip, 0) NEW_RECEIVER(NSSegmentedControl) - IdentifierInfo *setLabelNSSegmentedControl[] = { + const IdentifierInfo *setLabelNSSegmentedControl[] = { &Ctx.Idents.get("setLabel"), &Ctx.Idents.get("forSegment")}; ADD_METHOD(NSSegmentedControl, setLabelNSSegmentedControl, 2, 0) - IdentifierInfo *setToolTipNSSegmentedControl[] = { + const IdentifierInfo *setToolTipNSSegmentedControl[] = { &Ctx.Idents.get("setToolTip"), &Ctx.Idents.get("forSegment")}; ADD_METHOD(NSSegmentedControl, setToolTipNSSegmentedControl, 2, 0) @@ -301,8 +301,8 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(NSAccessibility, setAccessibilityHelp, 0) NEW_RECEIVER(NSMatrix) - IdentifierInfo *setToolTipNSMatrix[] = {&Ctx.Idents.get("setToolTip"), - &Ctx.Idents.get("forCell")}; + const IdentifierInfo *setToolTipNSMatrix[] = {&Ctx.Idents.get("setToolTip"), + &Ctx.Idents.get("forCell")}; ADD_METHOD(NSMatrix, setToolTipNSMatrix, 2, 0) NEW_RECEIVER(NSPrintPanel) @@ -317,13 +317,13 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(NSSlider, setTitle, 0) NEW_RECEIVER(UIMenuItem) - IdentifierInfo *initWithTitleUIMenuItem[] = {&Ctx.Idents.get("initWithTitle"), - &Ctx.Idents.get("action")}; + const IdentifierInfo *initWithTitleUIMenuItem[] = { + &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("action")}; ADD_METHOD(UIMenuItem, initWithTitleUIMenuItem, 2, 0) ADD_UNARY_METHOD(UIMenuItem, setTitle, 0) NEW_RECEIVER(UIAlertController) - IdentifierInfo *alertControllerWithTitleUIAlertController[] = { + const IdentifierInfo *alertControllerWithTitleUIAlertController[] = { &Ctx.Idents.get("alertControllerWithTitle"), &Ctx.Idents.get("message"), &Ctx.Idents.get("preferredStyle")}; ADD_METHOD(UIAlertController, alertControllerWithTitleUIAlertController, 3, 1) @@ -331,19 +331,19 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(UIAlertController, setMessage, 0) NEW_RECEIVER(UIApplicationShortcutItem) - IdentifierInfo *initWithTypeUIApplicationShortcutItemIcon[] = { + const IdentifierInfo *initWithTypeUIApplicationShortcutItemIcon[] = { &Ctx.Idents.get("initWithType"), &Ctx.Idents.get("localizedTitle"), &Ctx.Idents.get("localizedSubtitle"), &Ctx.Idents.get("icon"), &Ctx.Idents.get("userInfo")}; ADD_METHOD(UIApplicationShortcutItem, initWithTypeUIApplicationShortcutItemIcon, 5, 1) - IdentifierInfo *initWithTypeUIApplicationShortcutItem[] = { + const IdentifierInfo *initWithTypeUIApplicationShortcutItem[] = { &Ctx.Idents.get("initWithType"), &Ctx.Idents.get("localizedTitle")}; ADD_METHOD(UIApplicationShortcutItem, initWithTypeUIApplicationShortcutItem, 2, 1) NEW_RECEIVER(UIActionSheet) - IdentifierInfo *initWithTitleUIActionSheet[] = { + const IdentifierInfo *initWithTitleUIActionSheet[] = { &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("delegate"), &Ctx.Idents.get("cancelButtonTitle"), &Ctx.Idents.get("destructiveButtonTitle"), @@ -353,7 +353,7 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(UIActionSheet, setTitle, 0) NEW_RECEIVER(UIAccessibilityCustomAction) - IdentifierInfo *initWithNameUIAccessibilityCustomAction[] = { + const IdentifierInfo *initWithNameUIAccessibilityCustomAction[] = { &Ctx.Idents.get("initWithName"), &Ctx.Idents.get("target"), &Ctx.Idents.get("selector")}; ADD_METHOD(UIAccessibilityCustomAction, @@ -382,7 +382,7 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { NEW_RECEIVER(NSAttributedString) ADD_UNARY_METHOD(NSAttributedString, initWithString, 0) - IdentifierInfo *initWithStringNSAttributedString[] = { + const IdentifierInfo *initWithStringNSAttributedString[] = { &Ctx.Idents.get("initWithString"), &Ctx.Idents.get("attributes")}; ADD_METHOD(NSAttributedString, initWithStringNSAttributedString, 2, 0) @@ -390,7 +390,7 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(NSText, setString, 0) NEW_RECEIVER(UIKeyCommand) - IdentifierInfo *keyCommandWithInputUIKeyCommand[] = { + const IdentifierInfo *keyCommandWithInputUIKeyCommand[] = { &Ctx.Idents.get("keyCommandWithInput"), &Ctx.Idents.get("modifierFlags"), &Ctx.Idents.get("action"), &Ctx.Idents.get("discoverabilityTitle")}; ADD_METHOD(UIKeyCommand, keyCommandWithInputUIKeyCommand, 4, 3) @@ -400,7 +400,7 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(UILabel, setText, 0) NEW_RECEIVER(NSAlert) - IdentifierInfo *alertWithMessageTextNSAlert[] = { + const IdentifierInfo *alertWithMessageTextNSAlert[] = { &Ctx.Idents.get("alertWithMessageText"), &Ctx.Idents.get("defaultButton"), &Ctx.Idents.get("alternateButton"), &Ctx.Idents.get("otherButton"), &Ctx.Idents.get("informativeTextWithFormat")}; @@ -415,13 +415,13 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(UIMutableApplicationShortcutItem, setLocalizedSubtitle, 0) NEW_RECEIVER(UIButton) - IdentifierInfo *setTitleUIButton[] = {&Ctx.Idents.get("setTitle"), - &Ctx.Idents.get("forState")}; + const IdentifierInfo *setTitleUIButton[] = {&Ctx.Idents.get("setTitle"), + &Ctx.Idents.get("forState")}; ADD_METHOD(UIButton, setTitleUIButton, 2, 0) NEW_RECEIVER(NSWindow) ADD_UNARY_METHOD(NSWindow, setTitle, 0) - IdentifierInfo *minFrameWidthWithTitleNSWindow[] = { + const IdentifierInfo *minFrameWidthWithTitleNSWindow[] = { &Ctx.Idents.get("minFrameWidthWithTitle"), &Ctx.Idents.get("styleMask")}; ADD_METHOD(NSWindow, minFrameWidthWithTitleNSWindow, 2, 0) ADD_UNARY_METHOD(NSWindow, setMiniwindowTitle, 0) @@ -430,7 +430,7 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(NSPathCell, setPlaceholderString, 0) NEW_RECEIVER(UIDocumentMenuViewController) - IdentifierInfo *addOptionWithTitleUIDocumentMenuViewController[] = { + const IdentifierInfo *addOptionWithTitleUIDocumentMenuViewController[] = { &Ctx.Idents.get("addOptionWithTitle"), &Ctx.Idents.get("image"), &Ctx.Idents.get("order"), &Ctx.Idents.get("handler")}; ADD_METHOD(UIDocumentMenuViewController, @@ -442,7 +442,7 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(UINavigationItem, setPrompt, 0) NEW_RECEIVER(UIAlertView) - IdentifierInfo *initWithTitleUIAlertView[] = { + const IdentifierInfo *initWithTitleUIAlertView[] = { &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("message"), &Ctx.Idents.get("delegate"), &Ctx.Idents.get("cancelButtonTitle"), &Ctx.Idents.get("otherButtonTitles")}; @@ -474,11 +474,11 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(NSProgress, setLocalizedAdditionalDescription, 0) NEW_RECEIVER(NSSegmentedCell) - IdentifierInfo *setLabelNSSegmentedCell[] = {&Ctx.Idents.get("setLabel"), - &Ctx.Idents.get("forSegment")}; + const IdentifierInfo *setLabelNSSegmentedCell[] = { + &Ctx.Idents.get("setLabel"), &Ctx.Idents.get("forSegment")}; ADD_METHOD(NSSegmentedCell, setLabelNSSegmentedCell, 2, 0) - IdentifierInfo *setToolTipNSSegmentedCell[] = {&Ctx.Idents.get("setToolTip"), - &Ctx.Idents.get("forSegment")}; + const IdentifierInfo *setToolTipNSSegmentedCell[] = { + &Ctx.Idents.get("setToolTip"), &Ctx.Idents.get("forSegment")}; ADD_METHOD(NSSegmentedCell, setToolTipNSSegmentedCell, 2, 0) NEW_RECEIVER(NSUndoManager) @@ -487,7 +487,7 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(NSUndoManager, redoMenuTitleForUndoActionName, 0) NEW_RECEIVER(NSMenuItem) - IdentifierInfo *initWithTitleNSMenuItem[] = { + const IdentifierInfo *initWithTitleNSMenuItem[] = { &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("action"), &Ctx.Idents.get("keyEquivalent")}; ADD_METHOD(NSMenuItem, initWithTitleNSMenuItem, 3, 0) @@ -495,11 +495,11 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(NSMenuItem, setToolTip, 0) NEW_RECEIVER(NSPopUpButtonCell) - IdentifierInfo *initTextCellNSPopUpButtonCell[] = { + const IdentifierInfo *initTextCellNSPopUpButtonCell[] = { &Ctx.Idents.get("initTextCell"), &Ctx.Idents.get("pullsDown")}; ADD_METHOD(NSPopUpButtonCell, initTextCellNSPopUpButtonCell, 2, 0) ADD_UNARY_METHOD(NSPopUpButtonCell, addItemWithTitle, 0) - IdentifierInfo *insertItemWithTitleNSPopUpButtonCell[] = { + const IdentifierInfo *insertItemWithTitleNSPopUpButtonCell[] = { &Ctx.Idents.get("insertItemWithTitle"), &Ctx.Idents.get("atIndex")}; ADD_METHOD(NSPopUpButtonCell, insertItemWithTitleNSPopUpButtonCell, 2, 0) ADD_UNARY_METHOD(NSPopUpButtonCell, removeItemWithTitle, 0) @@ -511,11 +511,11 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { NEW_RECEIVER(NSMenu) ADD_UNARY_METHOD(NSMenu, initWithTitle, 0) - IdentifierInfo *insertItemWithTitleNSMenu[] = { + const IdentifierInfo *insertItemWithTitleNSMenu[] = { &Ctx.Idents.get("insertItemWithTitle"), &Ctx.Idents.get("action"), &Ctx.Idents.get("keyEquivalent"), &Ctx.Idents.get("atIndex")}; ADD_METHOD(NSMenu, insertItemWithTitleNSMenu, 4, 0) - IdentifierInfo *addItemWithTitleNSMenu[] = { + const IdentifierInfo *addItemWithTitleNSMenu[] = { &Ctx.Idents.get("addItemWithTitle"), &Ctx.Idents.get("action"), &Ctx.Idents.get("keyEquivalent")}; ADD_METHOD(NSMenu, addItemWithTitleNSMenu, 3, 0) @@ -526,15 +526,15 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { NEW_RECEIVER(NSForm) ADD_UNARY_METHOD(NSForm, addEntry, 0) - IdentifierInfo *insertEntryNSForm[] = {&Ctx.Idents.get("insertEntry"), - &Ctx.Idents.get("atIndex")}; + const IdentifierInfo *insertEntryNSForm[] = {&Ctx.Idents.get("insertEntry"), + &Ctx.Idents.get("atIndex")}; ADD_METHOD(NSForm, insertEntryNSForm, 2, 0) NEW_RECEIVER(NSTextFieldCell) ADD_UNARY_METHOD(NSTextFieldCell, setPlaceholderString, 0) NEW_RECEIVER(NSUserNotificationAction) - IdentifierInfo *actionWithIdentifierNSUserNotificationAction[] = { + const IdentifierInfo *actionWithIdentifierNSUserNotificationAction[] = { &Ctx.Idents.get("actionWithIdentifier"), &Ctx.Idents.get("title")}; ADD_METHOD(NSUserNotificationAction, actionWithIdentifierNSUserNotificationAction, 2, 1) @@ -544,7 +544,7 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(UITextField, setPlaceholder, 0) NEW_RECEIVER(UIBarButtonItem) - IdentifierInfo *initWithTitleUIBarButtonItem[] = { + const IdentifierInfo *initWithTitleUIBarButtonItem[] = { &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("style"), &Ctx.Idents.get("target"), &Ctx.Idents.get("action")}; ADD_METHOD(UIBarButtonItem, initWithTitleUIBarButtonItem, 4, 0) @@ -553,16 +553,16 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(UIViewController, setTitle, 0) NEW_RECEIVER(UISegmentedControl) - IdentifierInfo *insertSegmentWithTitleUISegmentedControl[] = { + const IdentifierInfo *insertSegmentWithTitleUISegmentedControl[] = { &Ctx.Idents.get("insertSegmentWithTitle"), &Ctx.Idents.get("atIndex"), &Ctx.Idents.get("animated")}; ADD_METHOD(UISegmentedControl, insertSegmentWithTitleUISegmentedControl, 3, 0) - IdentifierInfo *setTitleUISegmentedControl[] = { + const IdentifierInfo *setTitleUISegmentedControl[] = { &Ctx.Idents.get("setTitle"), &Ctx.Idents.get("forSegmentAtIndex")}; ADD_METHOD(UISegmentedControl, setTitleUISegmentedControl, 2, 0) NEW_RECEIVER(NSAccessibilityCustomRotorItemResult) - IdentifierInfo + const IdentifierInfo *initWithItemLoadingTokenNSAccessibilityCustomRotorItemResult[] = { &Ctx.Idents.get("initWithItemLoadingToken"), &Ctx.Idents.get("customLabel")}; @@ -571,7 +571,7 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(NSAccessibilityCustomRotorItemResult, setCustomLabel, 0) NEW_RECEIVER(UIContextualAction) - IdentifierInfo *contextualActionWithStyleUIContextualAction[] = { + const IdentifierInfo *contextualActionWithStyleUIContextualAction[] = { &Ctx.Idents.get("contextualActionWithStyle"), &Ctx.Idents.get("title"), &Ctx.Idents.get("handler")}; ADD_METHOD(UIContextualAction, contextualActionWithStyleUIContextualAction, 3, @@ -579,7 +579,7 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(UIContextualAction, setTitle, 0) NEW_RECEIVER(NSAccessibilityCustomRotor) - IdentifierInfo *initWithLabelNSAccessibilityCustomRotor[] = { + const IdentifierInfo *initWithLabelNSAccessibilityCustomRotor[] = { &Ctx.Idents.get("initWithLabel"), &Ctx.Idents.get("itemSearchDelegate")}; ADD_METHOD(NSAccessibilityCustomRotor, initWithLabelNSAccessibilityCustomRotor, 2, 0) @@ -590,11 +590,11 @@ void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { ADD_UNARY_METHOD(NSWindowTab, setToolTip, 0) NEW_RECEIVER(NSAccessibilityCustomAction) - IdentifierInfo *initWithNameNSAccessibilityCustomAction[] = { + const IdentifierInfo *initWithNameNSAccessibilityCustomAction[] = { &Ctx.Idents.get("initWithName"), &Ctx.Idents.get("handler")}; ADD_METHOD(NSAccessibilityCustomAction, initWithNameNSAccessibilityCustomAction, 2, 0) - IdentifierInfo *initWithNameTargetNSAccessibilityCustomAction[] = { + const IdentifierInfo *initWithNameTargetNSAccessibilityCustomAction[] = { &Ctx.Idents.get("initWithName"), &Ctx.Idents.get("target"), &Ctx.Idents.get("selector")}; ADD_METHOD(NSAccessibilityCustomAction, @@ -618,12 +618,12 @@ void NonLocalizedStringChecker::initLocStringsMethods(ASTContext &Ctx) const { if (!LSM.empty()) return; - IdentifierInfo *LocalizedStringMacro[] = { + const IdentifierInfo *LocalizedStringMacro[] = { &Ctx.Idents.get("localizedStringForKey"), &Ctx.Idents.get("value"), &Ctx.Idents.get("table")}; LSM_INSERT_SELECTOR("NSBundle", LocalizedStringMacro, 3) LSM_INSERT_UNARY("NSDateFormatter", "stringFromDate") - IdentifierInfo *LocalizedStringFromDate[] = { + const IdentifierInfo *LocalizedStringFromDate[] = { &Ctx.Idents.get("localizedStringFromDate"), &Ctx.Idents.get("dateStyle"), &Ctx.Idents.get("timeStyle")}; LSM_INSERT_SELECTOR("NSDateFormatter", LocalizedStringFromDate, 3) @@ -903,7 +903,7 @@ static inline bool isNSStringType(QualType T, ASTContext &Ctx) { if (!Cls) return false; - IdentifierInfo *ClsName = Cls->getIdentifier(); + const IdentifierInfo *ClsName = Cls->getIdentifier(); // FIXME: Should we walk the chain of classes? return ClsName == &Ctx.Idents.get("NSString") || diff --git a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp index 06f1ad00eaf20..60934e51febe8 100644 --- a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp @@ -1082,7 +1082,8 @@ void NullabilityChecker::checkPostObjCMessage(const ObjCMethodCall &M, M.getMessageKind() == OCM_PropertyAccess && !C.wasInlined) { bool LookupResolved = false; if (const MemRegion *ReceiverRegion = getTrackRegion(M.getReceiverSVal())) { - if (IdentifierInfo *Ident = M.getSelector().getIdentifierInfoForSlot(0)) { + if (const IdentifierInfo *Ident = + M.getSelector().getIdentifierInfoForSlot(0)) { LookupResolved = true; ObjectPropPair Key = std::make_pair(ReceiverRegion, Ident); const ConstrainedPropertyVal *PrevPropVal = diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCMissingSuperCallChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCMissingSuperCallChecker.cpp index 598b368e74d47..03dab4f7ada7a 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ObjCMissingSuperCallChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCMissingSuperCallChecker.cpp @@ -107,7 +107,7 @@ void ObjCSuperCallChecker::fillSelectors(ASTContext &Ctx, assert(Descriptor.ArgumentCount <= 1); // No multi-argument selectors yet. // Get the selector. - IdentifierInfo *II = &Ctx.Idents.get(Descriptor.SelectorName); + const IdentifierInfo *II = &Ctx.Idents.get(Descriptor.SelectorName); Selector Sel = Ctx.Selectors.getSelector(Descriptor.ArgumentCount, &II); ClassSelectors.insert(Sel); diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp index eb40711812e16..a6c4186cb15bb 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp @@ -26,8 +26,8 @@ namespace { class ObjCSuperDeallocChecker : public Checker { - mutable IdentifierInfo *IIdealloc = nullptr; - mutable IdentifierInfo *IINSObject = nullptr; + mutable const IdentifierInfo *IIdealloc = nullptr; + mutable const IdentifierInfo *IINSObject = nullptr; mutable Selector SELdealloc; const BugType DoubleSuperDeallocBugType{ diff --git a/clang/tools/libclang/CIndexCodeCompletion.cpp b/clang/tools/libclang/CIndexCodeCompletion.cpp index 3c5f390f6d888..850c004680fd9 100644 --- a/clang/tools/libclang/CIndexCodeCompletion.cpp +++ b/clang/tools/libclang/CIndexCodeCompletion.cpp @@ -601,15 +601,15 @@ namespace { AllocatedResults.Contexts = getContextsForContextKind(contextKind, S); AllocatedResults.Selector = ""; - ArrayRef SelIdents = Context.getSelIdents(); - for (ArrayRef::iterator I = SelIdents.begin(), - E = SelIdents.end(); + ArrayRef SelIdents = Context.getSelIdents(); + for (ArrayRef::iterator I = SelIdents.begin(), + E = SelIdents.end(); I != E; ++I) { - if (IdentifierInfo *selIdent = *I) + if (const IdentifierInfo *selIdent = *I) AllocatedResults.Selector += selIdent->getName(); AllocatedResults.Selector += ":"; } - + QualType baseType = Context.getBaseType(); NamedDecl *D = nullptr; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h b/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h index 95e8a600f8382..cefec15a79809 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h @@ -56,7 +56,7 @@ class ExternalASTSourceWrapper : public clang::ExternalSemaSource { return m_Source->GetExternalCXXBaseSpecifiers(Offset); } - void updateOutOfDateIdentifier(clang::IdentifierInfo &II) override { + void updateOutOfDateIdentifier(const clang::IdentifierInfo &II) override { m_Source->updateOutOfDateIdentifier(II); } diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp index a95a9e9f01e3f..75493eb10d731 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp @@ -713,17 +713,18 @@ bool ClangASTSource::FindObjCMethodDeclsWithOrigin( Selector original_selector; if (decl_name.isObjCZeroArgSelector()) { - IdentifierInfo *ident = &original_ctx->Idents.get(decl_name.getAsString()); + const IdentifierInfo *ident = + &original_ctx->Idents.get(decl_name.getAsString()); original_selector = original_ctx->Selectors.getSelector(0, &ident); } else if (decl_name.isObjCOneArgSelector()) { const std::string &decl_name_string = decl_name.getAsString(); std::string decl_name_string_without_colon(decl_name_string.c_str(), decl_name_string.length() - 1); - IdentifierInfo *ident = + const IdentifierInfo *ident = &original_ctx->Idents.get(decl_name_string_without_colon); original_selector = original_ctx->Selectors.getSelector(1, &ident); } else { - SmallVector idents; + SmallVector idents; clang::Selector sel = decl_name.getObjCSelector(); diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp index 5d7c5f38d1805..6894cdccaf95a 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp @@ -316,7 +316,7 @@ class ObjCRuntimeMethodType { const bool HasRelatedResultType = false; const bool for_expression = true; - std::vector selector_components; + std::vector selector_components; const char *name_cursor = name; bool is_zero_argument = true; @@ -335,7 +335,7 @@ class ObjCRuntimeMethodType { } } - clang::IdentifierInfo **identifier_infos = selector_components.data(); + const clang::IdentifierInfo **identifier_infos = selector_components.data(); if (!identifier_infos) { return nullptr; } diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 4a1c8d5765521..ee634d12b3c41 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -7910,14 +7910,14 @@ bool TypeSystemClang::AddObjCClassProperty( if (property_setter_name) { std::string property_setter_no_colon(property_setter_name, strlen(property_setter_name) - 1); - clang::IdentifierInfo *setter_ident = + const clang::IdentifierInfo *setter_ident = &clang_ast.Idents.get(property_setter_no_colon); setter_sel = clang_ast.Selectors.getSelector(1, &setter_ident); } else if (!(property_attributes & DW_APPLE_PROPERTY_readonly)) { std::string setter_sel_string("set"); setter_sel_string.push_back(::toupper(property_name[0])); setter_sel_string.append(&property_name[1]); - clang::IdentifierInfo *setter_ident = + const clang::IdentifierInfo *setter_ident = &clang_ast.Idents.get(setter_sel_string); setter_sel = clang_ast.Selectors.getSelector(1, &setter_ident); } @@ -7925,11 +7925,12 @@ bool TypeSystemClang::AddObjCClassProperty( property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_setter); if (property_getter_name != nullptr) { - clang::IdentifierInfo *getter_ident = + const clang::IdentifierInfo *getter_ident = &clang_ast.Idents.get(property_getter_name); getter_sel = clang_ast.Selectors.getSelector(0, &getter_ident); } else { - clang::IdentifierInfo *getter_ident = &clang_ast.Idents.get(property_name); + const clang::IdentifierInfo *getter_ident = + &clang_ast.Idents.get(property_name); getter_sel = clang_ast.Selectors.getSelector(0, &getter_ident); } property_decl->setGetterName(getter_sel); @@ -8091,7 +8092,7 @@ clang::ObjCMethodDecl *TypeSystemClang::AddMethodToObjCObjectType( return nullptr; selector_start++; - llvm::SmallVector selector_idents; + llvm::SmallVector selector_idents; size_t len = 0; const char *start; From 51f1681424f1a8ccf1e3432d71c341e799597171 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Wed, 10 Apr 2024 19:06:29 -0700 Subject: [PATCH 086/886] [clang-format] Don't merge a short block for SBS_Never (#88238) Also fix unit tests. Fixes #87484. --- clang/lib/Format/FormatToken.h | 2 + clang/lib/Format/UnwrappedLineFormatter.cpp | 8 +- clang/lib/Format/UnwrappedLineParser.cpp | 7 +- clang/unittests/Format/BracesRemoverTest.cpp | 4 +- clang/unittests/Format/FormatTest.cpp | 84 +++++++++++++++---- .../Format/FormatTestMacroExpansion.cpp | 7 +- clang/unittests/Format/TokenAnnotatorTest.cpp | 24 ++++++ 7 files changed, 110 insertions(+), 26 deletions(-) diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 48b6a9092a8c0..f651e6228c206 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -35,6 +35,8 @@ namespace format { TYPE(BinaryOperator) \ TYPE(BitFieldColon) \ TYPE(BlockComment) \ + /* l_brace of a block that is not the body of a (e.g. loop) statement. */ \ + TYPE(BlockLBrace) \ TYPE(BracedListLBrace) \ /* The colon at the end of a case label. */ \ TYPE(CaseLabelColon) \ diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index fb31980ab9f49..4ae54e56331bd 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -796,8 +796,12 @@ class LineJoiner { } } - if (const auto *LastNonComment = Line.getLastNonComment(); - LastNonComment && LastNonComment->is(tok::l_brace)) { + if (Line.endsWith(tok::l_brace)) { + if (Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never && + Line.First->is(TT_BlockLBrace)) { + return 0; + } + if (IsSplitBlock && Line.First == Line.Last && I > AnnotatedLines.begin() && (I[-1]->endsWith(tok::kw_else) || IsCtrlStmt(*I[-1]))) { diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index c1f7e2874beb2..603268f771ac5 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -395,9 +395,10 @@ bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, ParseDefault(); continue; } - if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin) && - tryToParseBracedList()) { - continue; + if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) { + if (tryToParseBracedList()) + continue; + FormatTok->setFinalizedType(TT_BlockLBrace); } parseBlock(); ++StatementCount; diff --git a/clang/unittests/Format/BracesRemoverTest.cpp b/clang/unittests/Format/BracesRemoverTest.cpp index 5155eefb9e08c..2e983b887ffcb 100644 --- a/clang/unittests/Format/BracesRemoverTest.cpp +++ b/clang/unittests/Format/BracesRemoverTest.cpp @@ -209,7 +209,9 @@ TEST_F(BracesRemoverTest, RemoveBraces) { verifyFormat("if (a) {\n" " b;\n" "} else {\n" - " { c; }\n" + " {\n" + " c;\n" + " }\n" "}", Style); diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index f312a9e21158a..4906b3350b5b2 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -52,7 +52,13 @@ TEST_F(FormatTest, FormatsUnwrappedLinesAtFirstFormat) { } TEST_F(FormatTest, FormatsNestedBlockStatements) { - verifyFormat("{\n {\n {}\n }\n}", "{{{}}}"); + verifyFormat("{\n" + " {\n" + " {\n" + " }\n" + " }\n" + "}", + "{{{}}}"); } TEST_F(FormatTest, FormatsNestedCall) { @@ -5669,7 +5675,10 @@ TEST_F(FormatTest, LayoutCodeInMacroDefinitions) { getLLVMStyleWithColumns(14)); } -TEST_F(FormatTest, LayoutRemainingTokens) { verifyFormat("{}"); } +TEST_F(FormatTest, LayoutRemainingTokens) { + verifyFormat("{\n" + "}"); +} TEST_F(FormatTest, MacroDefinitionInsideStatement) { verifyFormat("int x,\n" @@ -6577,7 +6586,11 @@ TEST_F(FormatTest, FormatAlignInsidePreprocessorElseBlock) { } TEST_F(FormatTest, FormatHashIfNotAtStartOfLine) { - verifyFormat("{\n { a #c; }\n}"); + verifyFormat("{\n" + " {\n" + " a #c;\n" + " }\n" + "}"); } TEST_F(FormatTest, FormatUnbalancedStructuralElements) { @@ -6937,13 +6950,13 @@ TEST_F(FormatTest, FormatNestedBlocksInMacros) { } TEST_F(FormatTest, PutEmptyBlocksIntoOneLine) { - verifyFormat("{}"); verifyFormat("enum E {};"); verifyFormat("enum E {}"); FormatStyle Style = getLLVMStyle(); Style.SpaceInEmptyBlock = true; verifyFormat("void f() { }", "void f() {}", Style); Style.AllowShortBlocksOnASingleLine = FormatStyle::SBS_Empty; + verifyFormat("{ }", Style); verifyFormat("while (true) { }", "while (true) {}", Style); Style.BreakBeforeBraces = FormatStyle::BS_Custom; Style.BraceWrapping.BeforeElse = false; @@ -11527,10 +11540,18 @@ TEST_F(FormatTest, UnderstandsNewAndDelete) { "void new (link p);\n" "void delete (link p);"); - verifyFormat("{ p->new(); }\n" - "{ p->delete(); }", - "{ p->new (); }\n" - "{ p->delete (); }"); + verifyFormat("{\n" + " p->new();\n" + "}\n" + "{\n" + " p->delete();\n" + "}", + "{\n" + " p->new ();\n" + "}\n" + "{\n" + " p->delete ();\n" + "}"); FormatStyle AfterPlacementOperator = getLLVMStyle(); AfterPlacementOperator.SpaceBeforeParens = FormatStyle::SBPO_Custom; @@ -12352,7 +12373,9 @@ TEST_F(FormatTest, FormatsCasts) { // FIXME: single value wrapped with paren will be treated as cast. verifyFormat("void f(int i = (kValue)*kMask) {}"); - verifyFormat("{ (void)F; }"); + verifyFormat("{\n" + " (void)F;\n" + "}"); // Don't break after a cast's verifyFormat("int aaaaaaaaaaaaaaaaaaaaaaaaaaa =\n" @@ -13575,7 +13598,8 @@ TEST_F(FormatTest, IncorrectAccessSpecifier) { verifyFormat("public\n" "B {}"); verifyFormat("public\n" - "{}"); + "{\n" + "}"); verifyFormat("public\n" "B { int x; }"); } @@ -13632,10 +13656,31 @@ TEST_F(FormatTest, DoesNotTouchUnwrappedLinesWithErrors) { } TEST_F(FormatTest, IncorrectCodeErrorDetection) { - verifyFormat("{\n {}", "{\n{\n}"); - verifyFormat("{\n {}", "{\n {\n}"); - verifyFormat("{\n {}", "{\n {\n }"); - verifyFormat("{\n {}\n}\n}", "{\n {\n }\n }\n}"); + verifyFormat("{\n" + " {\n" + " }", + "{\n" + "{\n" + "}"); + verifyFormat("{\n" + " {\n" + " }", + "{\n" + " {\n" + "}"); + verifyFormat("{\n" + " {\n" + " }"); + verifyFormat("{\n" + " {\n" + " }\n" + "}\n" + "}", + "{\n" + " {\n" + " }\n" + " }\n" + "}"); verifyFormat("{\n" " {\n" @@ -14080,10 +14125,14 @@ TEST_F(FormatTest, FormatsBracedListsInColumnLayout) { "}"); verifyFormat("void foo() {\n" " { // asdf\n" - " { int a; }\n" + " {\n" + " int a;\n" + " }\n" " }\n" " {\n" - " { int b; }\n" + " {\n" + " int b;\n" + " }\n" " }\n" "}"); verifyFormat("namespace n {\n" @@ -14095,7 +14144,8 @@ TEST_F(FormatTest, FormatsBracedListsInColumnLayout) { " }\n" " }\n" " }\n" - " {}\n" + " {\n" + " }\n" "}\n" "} // namespace n"); diff --git a/clang/unittests/Format/FormatTestMacroExpansion.cpp b/clang/unittests/Format/FormatTestMacroExpansion.cpp index 85ab6ea3794e8..d391fe3d715c3 100644 --- a/clang/unittests/Format/FormatTestMacroExpansion.cpp +++ b/clang/unittests/Format/FormatTestMacroExpansion.cpp @@ -43,9 +43,10 @@ TEST_F(FormatTestMacroExpansion, UnexpandConfiguredMacros) { "STMT", Style); verifyFormat("void f() { ID(a *b); }", Style); - verifyFormat(R"(ID( - { ID(a *b); }); -)", + verifyFormat("ID(\n" + " {\n" + " ID(a *b);\n" + " });", Style); verifyIncompleteFormat("ID3({, ID(a *b),\n" " ;\n" diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index c3153cf6b16f0..da02ced8c7a94 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2856,6 +2856,30 @@ TEST_F(TokenAnnotatorTest, UnderstandsElaboratedTypeSpecifier) { EXPECT_TOKEN(Tokens[7], tok::l_brace, TT_FunctionLBrace); } +TEST_F(TokenAnnotatorTest, BlockLBrace) { + auto Tokens = annotate("{\n" + " {\n" + " foo();\n" + " }\n" + "}"); + ASSERT_EQ(Tokens.size(), 9u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::l_brace, TT_BlockLBrace); + EXPECT_BRACE_KIND(Tokens[0], BK_Block); + EXPECT_TOKEN(Tokens[1], tok::l_brace, TT_BlockLBrace); + EXPECT_BRACE_KIND(Tokens[1], BK_Block); + + Tokens = annotate("void bar() {\n" + " {\n" + " foo();\n" + " }\n" + "}"); + ASSERT_EQ(Tokens.size(), 13u) << Tokens; + EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_FunctionLBrace); + EXPECT_BRACE_KIND(Tokens[4], BK_Block); + EXPECT_TOKEN(Tokens[5], tok::l_brace, TT_BlockLBrace); + EXPECT_BRACE_KIND(Tokens[5], BK_Block); +} + } // namespace } // namespace format } // namespace clang From 6b46166ef2612d2a58767447b3db8f0343afb552 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Thu, 11 Apr 2024 02:14:07 +0000 Subject: [PATCH 087/886] [llvm][NFC] Suppress `-Wunused-result` call to `write` Commit 87e6f87fe7e343eb656e9b49d30cbb065c086651 adds a call to `::write()`, which may be annotated w/ `warn_unused_result`, leading to `-Wunused-result` failures. --- llvm/lib/Support/raw_socket_stream.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Support/raw_socket_stream.cpp b/llvm/lib/Support/raw_socket_stream.cpp index 1dcf6352f2cc2..14e2308df4d7e 100644 --- a/llvm/lib/Support/raw_socket_stream.cpp +++ b/llvm/lib/Support/raw_socket_stream.cpp @@ -265,7 +265,10 @@ void ListeningSocket::shutdown() { // Ensure ::poll returns if shutdown is called by a seperate thread char Byte = 'A'; - ::write(PipeFD[1], &Byte, 1); + ssize_t written = ::write(PipeFD[1], &Byte, 1); + + // Ignore any write() error + (void)written; } ListeningSocket::~ListeningSocket() { From f4509cf284ced95f31dc7eb63144b4bc47899c43 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Thu, 11 Apr 2024 10:18:29 +0800 Subject: [PATCH 088/886] [X86][MC] Support enc/dec for SETZUCC and promoted SETCC. (#86473) apx-spec: https://cdrdv2.intel.com/v1/dl/getContent/784266 apx-syntax-recommendation: https://cdrdv2.intel.com/v1/dl/getContent/817241 --- .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 1 + .../X86/MCTargetDesc/X86MCCodeEmitter.cpp | 2 + llvm/lib/Target/X86/X86InstrAsmAlias.td | 8 +- llvm/lib/Target/X86/X86InstrCMovSetCC.td | 19 +++ llvm/lib/Target/X86/X86InstrControl.td | 24 ++-- .../MC/Disassembler/X86/apx/evex-format.txt | 10 ++ llvm/test/MC/Disassembler/X86/apx/setcc.txt | 130 ++++++++++++++++++ llvm/test/MC/Disassembler/X86/apx/setzucc.txt | 130 ++++++++++++++++++ llvm/test/MC/X86/apx/evex-format-att.s | 10 ++ llvm/test/MC/X86/apx/evex-format-intel.s | 10 ++ llvm/test/MC/X86/apx/setcc-att.s | 101 ++++++++++++++ llvm/test/MC/X86/apx/setcc-intel.s | 98 +++++++++++++ llvm/test/MC/X86/apx/setzucc-att.s | 101 ++++++++++++++ llvm/test/MC/X86/apx/setzucc-intel.s | 98 +++++++++++++ llvm/test/TableGen/x86-fold-tables.inc | 1 + llvm/utils/TableGen/AsmMatcherEmitter.cpp | 13 +- 16 files changed, 736 insertions(+), 20 deletions(-) create mode 100644 llvm/test/MC/Disassembler/X86/apx/setcc.txt create mode 100644 llvm/test/MC/Disassembler/X86/apx/setzucc.txt create mode 100644 llvm/test/MC/X86/apx/setcc-att.s create mode 100644 llvm/test/MC/X86/apx/setcc-intel.s create mode 100644 llvm/test/MC/X86/apx/setzucc-att.s create mode 100644 llvm/test/MC/X86/apx/setzucc-intel.s diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 6401df9f49f03..b05a036fb2f06 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -3287,6 +3287,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // FIXME: Hack to recognize setneb as setne. if (PatchedName.starts_with("set") && PatchedName.ends_with("b") && + PatchedName != "setzub" && PatchedName != "setzunb" && PatchedName != "setb" && PatchedName != "setnb") PatchedName = PatchedName.substr(0, Name.size()-1); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 92a14226a0dc0..a5859f98bae02 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1155,6 +1155,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg); break; } + case X86II::MRMXmCC: case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: @@ -1282,6 +1283,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, Prefix.setRR2(MI, CurOp++); break; } + case X86II::MRMXrCC: case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: diff --git a/llvm/lib/Target/X86/X86InstrAsmAlias.td b/llvm/lib/Target/X86/X86InstrAsmAlias.td index 6b15213a2e683..d06a0c79b46bb 100644 --- a/llvm/lib/Target/X86/X86InstrAsmAlias.td +++ b/llvm/lib/Target/X86/X86InstrAsmAlias.td @@ -760,7 +760,7 @@ def : InstAlias<"xor{q}\t{$imm, %rax|rax, $imm}", (XOR64ri8 RAX, i64i8imm:$imm), def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}", (MMX_MOVQ64rr_REV VR64:$dst, VR64:$src), 0>; -// CMOV SETCC Aliases +// CMOV SETCC SETZUCC Aliases multiclass CMOV_SETCC_Aliases { def : InstAlias<"cmov"#Cond#"{w}\t{$src, $dst|$dst, $src}", (CMOV16rr GR16:$dst, GR16:$src, CC), 0>; @@ -787,8 +787,12 @@ let Predicates = [In64BitMode] in { (CMOV64rr_ND GR64:$dst, GR64:$src1, GR64:$src2, CC), 0>; def : InstAlias<"cmov"#Cond#"{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", (CMOV64rm_ND GR64:$dst, GR64:$src1, i64mem:$src2, CC), 0>; -} + def : InstAlias<"setzu"#Cond#"\t$dst", (SETZUCCr GR8:$dst, CC), 0>; + def : InstAlias<"setzu"#Cond#"\t$dst", (SETZUCCm i8mem:$dst, CC), 0>; + def : InstAlias<"set"#Cond#"\t$dst", (SETCCr_EVEX GR8:$dst, CC), 0>; + def : InstAlias<"set"#Cond#"\t$dst", (SETCCm_EVEX i8mem:$dst, CC), 0>; +} def : InstAlias<"set"#Cond#"\t$dst", (SETCCr GR8:$dst, CC), 0>; def : InstAlias<"set"#Cond#"\t$dst", (SETCCm i8mem:$dst, CC), 0>; } diff --git a/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/llvm/lib/Target/X86/X86InstrCMovSetCC.td index d41591f68a605..27a0c889a4da3 100644 --- a/llvm/lib/Target/X86/X86InstrCMovSetCC.td +++ b/llvm/lib/Target/X86/X86InstrCMovSetCC.td @@ -127,6 +127,25 @@ let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1 in { TB, Sched<[WriteSETCCStore]>; } // Uses = [EFLAGS] +// SetZUCC and promoted SetCC instructions. +let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1, + hasSideEffects = 0, Predicates = [In64BitMode], Predicates = [HasNDD] in { + def SETZUCCr : I<0x40, MRMXrCC, (outs GR8:$dst), (ins ccode:$cond), + "setzu${cond}\t$dst", []>, + XD, ZU, NoCD8, Sched<[WriteSETCC]>; + def SETCCr_EVEX : I<0x40, MRMXrCC, (outs GR8:$dst), (ins ccode:$cond), + "set${cond}\t$dst", []>, + XD, PL, Sched<[WriteSETCC]>; + let mayStore = 1 in { + def SETZUCCm : I<0x40, MRMXmCC, (outs), (ins i8mem:$dst, ccode:$cond), + "setzu${cond}\t$dst", []>, + XD, ZU, NoCD8, Sched<[WriteSETCCStore]>; + def SETCCm_EVEX : I<0x40, MRMXmCC, (outs), (ins i8mem:$dst, ccode:$cond), + "set${cond}\t$dst", []>, + XD, PL, Sched<[WriteSETCCStore]>; + } +} + // SALC is an undocumented instruction. Information for this instruction can be found // here http://www.rcollins.org/secrets/opcodes/SALC.html // Set AL if carry. diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td index 5171c2249dee9..62cc758cc594b 100644 --- a/llvm/lib/Target/X86/X86InstrControl.td +++ b/llvm/lib/Target/X86/X86InstrControl.td @@ -167,24 +167,24 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { } let Predicates = [Not64BitMode], AsmVariantName = "att" in { - def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs), - (ins i16imm:$off, i16imm:$seg), - "ljmp{w}\t$seg, $off", []>, - OpSize16, Sched<[WriteJump]>; def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs), (ins i32imm:$off, i16imm:$seg), "ljmp{l}\t$seg, $off", []>, OpSize32, Sched<[WriteJump]>; + def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs), + (ins i16imm:$off, i16imm:$seg), + "ljmp{w}\t$seg, $off", []>, + OpSize16, Sched<[WriteJump]>; } let mayLoad = 1 in { def FARJMP64m : RI<0xFF, MRM5m, (outs), (ins opaquemem:$dst), "ljmp{q}\t{*}$dst", []>, Sched<[WriteJump]>, Requires<[In64BitMode]>; + def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst), + "{l}jmp{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>; let AsmVariantName = "att" in def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst), "ljmp{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>; - def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst), - "{l}jmp{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>; } } @@ -253,21 +253,21 @@ let isCall = 1 in } let Predicates = [Not64BitMode], AsmVariantName = "att" in { - def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs), - (ins i16imm:$off, i16imm:$seg), - "lcall{w}\t$seg, $off", []>, - OpSize16, Sched<[WriteJump]>; def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs), (ins i32imm:$off, i16imm:$seg), "lcall{l}\t$seg, $off", []>, OpSize32, Sched<[WriteJump]>; + def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs), + (ins i16imm:$off, i16imm:$seg), + "lcall{w}\t$seg, $off", []>, + OpSize16, Sched<[WriteJump]>; } let mayLoad = 1 in { - def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst), - "lcall{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>; def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst), "{l}call{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>; + def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst), + "lcall{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>; } } diff --git a/llvm/test/MC/Disassembler/X86/apx/evex-format.txt b/llvm/test/MC/Disassembler/X86/apx/evex-format.txt index 1156f5c409922..e9a9f1327a17e 100644 --- a/llvm/test/MC/Disassembler/X86/apx/evex-format.txt +++ b/llvm/test/MC/Disassembler/X86/apx/evex-format.txt @@ -215,6 +215,16 @@ # INTEL: sar r17, r16, 123 0x62,0xfc,0xf4,0x10,0xc1,0xf8,0x7b +## MRMXrCC +# ATT: setzuo %r16b +# INTEL: setzuo r16b +0x62,0xfc,0x7f,0x18,0x40,0xc0 + +## MRMXmCC +# ATT: setzuo (%r16,%r17) +# INTEL: setzuo byte ptr [r16 + r17] +0x62,0xfc,0x7b,0x18,0x40,0x04,0x08 + ## NoCD8 # ATT: {nf} negq 123(%r16) diff --git a/llvm/test/MC/Disassembler/X86/apx/setcc.txt b/llvm/test/MC/Disassembler/X86/apx/setcc.txt new file mode 100644 index 0000000000000..1c00acfb76672 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/setcc.txt @@ -0,0 +1,130 @@ +# RUN: llvm-mc -triple x86_64 -disassemble %s | FileCheck %s --check-prefix=ATT +# RUN: llvm-mc -triple x86_64 -disassemble -output-asm-variant=1 %s | FileCheck %s --check-prefix=INTEL + +# ATT: {evex} seto %al +# INTEL: {evex} seto al +0x62,0xf4,0x7f,0x08,0x40,0xc0 + +# ATT: {evex} setno %al +# INTEL: {evex} setno al +0x62,0xf4,0x7f,0x08,0x41,0xc0 + +# ATT: {evex} setb %al +# INTEL: {evex} setb al +0x62,0xf4,0x7f,0x08,0x42,0xc0 + +# ATT: {evex} setae %al +# INTEL: {evex} setae al +0x62,0xf4,0x7f,0x08,0x43,0xc0 + +# ATT: {evex} sete %al +# INTEL: {evex} sete al +0x62,0xf4,0x7f,0x08,0x44,0xc0 + +# ATT: {evex} setne %al +# INTEL: {evex} setne al +0x62,0xf4,0x7f,0x08,0x45,0xc0 + +# ATT: {evex} setbe %al +# INTEL: {evex} setbe al +0x62,0xf4,0x7f,0x08,0x46,0xc0 + +# ATT: {evex} seta %al +# INTEL: {evex} seta al +0x62,0xf4,0x7f,0x08,0x47,0xc0 + +# ATT: {evex} sets %al +# INTEL: {evex} sets al +0x62,0xf4,0x7f,0x08,0x48,0xc0 + +# ATT: {evex} setns %al +# INTEL: {evex} setns al +0x62,0xf4,0x7f,0x08,0x49,0xc0 + +# ATT: {evex} setp %al +# INTEL: {evex} setp al +0x62,0xf4,0x7f,0x08,0x4a,0xc0 + +# ATT: {evex} setnp %al +# INTEL: {evex} setnp al +0x62,0xf4,0x7f,0x08,0x4b,0xc0 + +# ATT: {evex} setl %al +# INTEL: {evex} setl al +0x62,0xf4,0x7f,0x08,0x4c,0xc0 + +# ATT: {evex} setge %al +# INTEL: {evex} setge al +0x62,0xf4,0x7f,0x08,0x4d,0xc0 + +# ATT: {evex} setle %al +# INTEL: {evex} setle al +0x62,0xf4,0x7f,0x08,0x4e,0xc0 + +# ATT: {evex} setg %al +# INTEL: {evex} setg al +0x62,0xf4,0x7f,0x08,0x4f,0xc0 + +# ATT: {evex} seto (%rax) +# INTEL: {evex} seto byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x40,0x00 + +# ATT: {evex} setno (%rax) +# INTEL: {evex} setno byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x41,0x00 + +# ATT: {evex} setb (%rax) +# INTEL: {evex} setb byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x42,0x00 + +# ATT: {evex} setae (%rax) +# INTEL: {evex} setae byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x43,0x00 + +# ATT: {evex} sete (%rax) +# INTEL: {evex} sete byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x44,0x00 + +# ATT: {evex} setne (%rax) +# INTEL: {evex} setne byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x45,0x00 + +# ATT: {evex} setbe (%rax) +# INTEL: {evex} setbe byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x46,0x00 + +# ATT: {evex} seta (%rax) +# INTEL: {evex} seta byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x47,0x00 + +# ATT: {evex} sets (%rax) +# INTEL: {evex} sets byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x48,0x00 + +# ATT: {evex} setns (%rax) +# INTEL: {evex} setns byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x49,0x00 + +# ATT: {evex} setp (%rax) +# INTEL: {evex} setp byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x4a,0x00 + +# ATT: {evex} setnp (%rax) +# INTEL: {evex} setnp byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x4b,0x00 + +# ATT: {evex} setl (%rax) +# INTEL: {evex} setl byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x4c,0x00 + +# ATT: {evex} setge (%rax) +# INTEL: {evex} setge byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x4d,0x00 + +# ATT: {evex} setle (%rax) +# INTEL: {evex} setle byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x4e,0x00 + +# ATT: {evex} setg (%rax) +# INTEL: {evex} setg byte ptr [rax] +0x62,0xf4,0x7f,0x08,0x4f,0x00 diff --git a/llvm/test/MC/Disassembler/X86/apx/setzucc.txt b/llvm/test/MC/Disassembler/X86/apx/setzucc.txt new file mode 100644 index 0000000000000..44aaa4b33cc85 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/setzucc.txt @@ -0,0 +1,130 @@ +# RUN: llvm-mc -triple x86_64 -disassemble %s | FileCheck %s --check-prefix=ATT +# RUN: llvm-mc -triple x86_64 -disassemble -output-asm-variant=1 %s | FileCheck %s --check-prefix=INTEL + +# ATT: setzuo %al +# INTEL: setzuo al +0x62,0xf4,0x7f,0x18,0x40,0xc0 + +# ATT: setzuno %al +# INTEL: setzuno al +0x62,0xf4,0x7f,0x18,0x41,0xc0 + +# ATT: setzub %al +# INTEL: setzub al +0x62,0xf4,0x7f,0x18,0x42,0xc0 + +# ATT: setzuae %al +# INTEL: setzuae al +0x62,0xf4,0x7f,0x18,0x43,0xc0 + +# ATT: setzue %al +# INTEL: setzue al +0x62,0xf4,0x7f,0x18,0x44,0xc0 + +# ATT: setzune %al +# INTEL: setzune al +0x62,0xf4,0x7f,0x18,0x45,0xc0 + +# ATT: setzube %al +# INTEL: setzube al +0x62,0xf4,0x7f,0x18,0x46,0xc0 + +# ATT: setzua %al +# INTEL: setzua al +0x62,0xf4,0x7f,0x18,0x47,0xc0 + +# ATT: setzus %al +# INTEL: setzus al +0x62,0xf4,0x7f,0x18,0x48,0xc0 + +# ATT: setzuns %al +# INTEL: setzuns al +0x62,0xf4,0x7f,0x18,0x49,0xc0 + +# ATT: setzup %al +# INTEL: setzup al +0x62,0xf4,0x7f,0x18,0x4a,0xc0 + +# ATT: setzunp %al +# INTEL: setzunp al +0x62,0xf4,0x7f,0x18,0x4b,0xc0 + +# ATT: setzul %al +# INTEL: setzul al +0x62,0xf4,0x7f,0x18,0x4c,0xc0 + +# ATT: setzuge %al +# INTEL: setzuge al +0x62,0xf4,0x7f,0x18,0x4d,0xc0 + +# ATT: setzule %al +# INTEL: setzule al +0x62,0xf4,0x7f,0x18,0x4e,0xc0 + +# ATT: setzug %al +# INTEL: setzug al +0x62,0xf4,0x7f,0x18,0x4f,0xc0 + +# ATT: setzuo (%rax) +# INTEL: setzuo byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x40,0x00 + +# ATT: setzuno (%rax) +# INTEL: setzuno byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x41,0x00 + +# ATT: setzub (%rax) +# INTEL: setzub byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x42,0x00 + +# ATT: setzuae (%rax) +# INTEL: setzuae byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x43,0x00 + +# ATT: setzue (%rax) +# INTEL: setzue byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x44,0x00 + +# ATT: setzune (%rax) +# INTEL: setzune byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x45,0x00 + +# ATT: setzube (%rax) +# INTEL: setzube byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x46,0x00 + +# ATT: setzua (%rax) +# INTEL: setzua byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x47,0x00 + +# ATT: setzus (%rax) +# INTEL: setzus byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x48,0x00 + +# ATT: setzuns (%rax) +# INTEL: setzuns byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x49,0x00 + +# ATT: setzup (%rax) +# INTEL: setzup byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x4a,0x00 + +# ATT: setzunp (%rax) +# INTEL: setzunp byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x4b,0x00 + +# ATT: setzul (%rax) +# INTEL: setzul byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x4c,0x00 + +# ATT: setzuge (%rax) +# INTEL: setzuge byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x4d,0x00 + +# ATT: setzule (%rax) +# INTEL: setzule byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x4e,0x00 + +# ATT: setzug (%rax) +# INTEL: setzug byte ptr [rax] +0x62,0xf4,0x7f,0x18,0x4f,0x00 diff --git a/llvm/test/MC/X86/apx/evex-format-att.s b/llvm/test/MC/X86/apx/evex-format-att.s index 36df3f3757dc3..e59039ea2d822 100644 --- a/llvm/test/MC/X86/apx/evex-format-att.s +++ b/llvm/test/MC/X86/apx/evex-format-att.s @@ -210,6 +210,16 @@ # CHECK: encoding: [0x62,0xfc,0xf4,0x10,0xc1,0xf8,0x7b] sarq $123, %r16, %r17 +## MRMXrCC +# CHECK: setzuo %r16b +# CHECK: encoding: [0x62,0xfc,0x7f,0x18,0x40,0xc0] + setzuo %r16b + +## MRMXmCC +# CHECK: setzuo (%r16,%r17) +# CHECK: encoding: [0x62,0xfc,0x7b,0x18,0x40,0x04,0x08] + setzuo (%r16,%r17) + ## NoCD8 # CHECK: {nf} negq 123(%r16) diff --git a/llvm/test/MC/X86/apx/evex-format-intel.s b/llvm/test/MC/X86/apx/evex-format-intel.s index 2b346e0e85806..42d4c0c0081a7 100644 --- a/llvm/test/MC/X86/apx/evex-format-intel.s +++ b/llvm/test/MC/X86/apx/evex-format-intel.s @@ -210,6 +210,16 @@ # CHECK: encoding: [0x62,0xfc,0xf4,0x10,0xc1,0xf8,0x7b] sar r17, r16, 123 +## MRMXrCC +# CHECK: setzuo r16b +# CHECK: encoding: [0x62,0xfc,0x7f,0x18,0x40,0xc0] + setzuo r16b + +## MRMXmCC +# CHECK: setzuo byte ptr [r16 + r17] +# CHECK: encoding: [0x62,0xfc,0x7b,0x18,0x40,0x04,0x08] + setzuo byte ptr [r16 + r17] + ## NoCD8 # CHECK: {nf} neg qword ptr [r16 + 123] diff --git a/llvm/test/MC/X86/apx/setcc-att.s b/llvm/test/MC/X86/apx/setcc-att.s new file mode 100644 index 0000000000000..b5518081a8201 --- /dev/null +++ b/llvm/test/MC/X86/apx/setcc-att.s @@ -0,0 +1,101 @@ +# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-32: error: +# ERROR-NOT: error: +# CHECK: {evex} seto %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x40,0xc0] + {evex} seto %al +# CHECK: {evex} setno %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x41,0xc0] + {evex} setno %al +# CHECK: {evex} setb %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x42,0xc0] + {evex} setb %al +# CHECK: {evex} setae %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x43,0xc0] + {evex} setae %al +# CHECK: {evex} sete %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x44,0xc0] + {evex} sete %al +# CHECK: {evex} setne %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x45,0xc0] + {evex} setne %al +# CHECK: {evex} setbe %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x46,0xc0] + {evex} setbe %al +# CHECK: {evex} seta %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x47,0xc0] + {evex} seta %al +# CHECK: {evex} sets %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x48,0xc0] + {evex} sets %al +# CHECK: {evex} setns %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x49,0xc0] + {evex} setns %al +# CHECK: {evex} setp %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4a,0xc0] + {evex} setp %al +# CHECK: {evex} setnp %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4b,0xc0] + {evex} setnp %al +# CHECK: {evex} setl %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4c,0xc0] + {evex} setl %al +# CHECK: {evex} setge %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4d,0xc0] + {evex} setge %al +# CHECK: {evex} setle %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4e,0xc0] + {evex} setle %al +# CHECK: {evex} setg %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4f,0xc0] + {evex} setg %al +# CHECK: {evex} seto (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x40,0x00] + {evex} seto (%rax) +# CHECK: {evex} setno (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x41,0x00] + {evex} setno (%rax) +# CHECK: {evex} setb (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x42,0x00] + {evex} setb (%rax) +# CHECK: {evex} setae (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x43,0x00] + {evex} setae (%rax) +# CHECK: {evex} sete (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x44,0x00] + {evex} sete (%rax) +# CHECK: {evex} setne (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x45,0x00] + {evex} setne (%rax) +# CHECK: {evex} setbe (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x46,0x00] + {evex} setbe (%rax) +# CHECK: {evex} seta (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x47,0x00] + {evex} seta (%rax) +# CHECK: {evex} sets (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x48,0x00] + {evex} sets (%rax) +# CHECK: {evex} setns (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x49,0x00] + {evex} setns (%rax) +# CHECK: {evex} setp (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4a,0x00] + {evex} setp (%rax) +# CHECK: {evex} setnp (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4b,0x00] + {evex} setnp (%rax) +# CHECK: {evex} setl (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4c,0x00] + {evex} setl (%rax) +# CHECK: {evex} setge (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4d,0x00] + {evex} setge (%rax) +# CHECK: {evex} setle (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4e,0x00] + {evex} setle (%rax) +# CHECK: {evex} setg (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4f,0x00] + {evex} setgb (%rax) diff --git a/llvm/test/MC/X86/apx/setcc-intel.s b/llvm/test/MC/X86/apx/setcc-intel.s new file mode 100644 index 0000000000000..e005c2edb95c4 --- /dev/null +++ b/llvm/test/MC/X86/apx/setcc-intel.s @@ -0,0 +1,98 @@ +# RUN: llvm-mc -triple x86_64 -show-encoding -x86-asm-syntax=intel -output-asm-variant=1 %s | FileCheck %s + +# CHECK: {evex} seto al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x40,0xc0] + {evex} seto al +# CHECK: {evex} setno al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x41,0xc0] + {evex} setno al +# CHECK: {evex} setb al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x42,0xc0] + {evex} setb al +# CHECK: {evex} setae al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x43,0xc0] + {evex} setae al +# CHECK: {evex} sete al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x44,0xc0] + {evex} sete al +# CHECK: {evex} setne al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x45,0xc0] + {evex} setne al +# CHECK: {evex} setbe al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x46,0xc0] + {evex} setbe al +# CHECK: {evex} seta al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x47,0xc0] + {evex} seta al +# CHECK: {evex} sets al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x48,0xc0] + {evex} sets al +# CHECK: {evex} setns al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x49,0xc0] + {evex} setns al +# CHECK: {evex} setp al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4a,0xc0] + {evex} setp al +# CHECK: {evex} setnp al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4b,0xc0] + {evex} setnp al +# CHECK: {evex} setl al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4c,0xc0] + {evex} setl al +# CHECK: {evex} setge al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4d,0xc0] + {evex} setge al +# CHECK: {evex} setle al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4e,0xc0] + {evex} setle al +# CHECK: {evex} setg al +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4f,0xc0] + {evex} setg al +# CHECK: {evex} seto byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x40,0x00] + {evex} seto byte ptr [rax] +# CHECK: {evex} setno byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x41,0x00] + {evex} setno byte ptr [rax] +# CHECK: {evex} setb byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x42,0x00] + {evex} setb byte ptr [rax] +# CHECK: {evex} setae byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x43,0x00] + {evex} setae byte ptr [rax] +# CHECK: {evex} sete byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x44,0x00] + {evex} sete byte ptr [rax] +# CHECK: {evex} setne byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x45,0x00] + {evex} setne byte ptr [rax] +# CHECK: {evex} setbe byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x46,0x00] + {evex} setbe byte ptr [rax] +# CHECK: {evex} seta byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x47,0x00] + {evex} seta byte ptr [rax] +# CHECK: {evex} sets byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x48,0x00] + {evex} sets byte ptr [rax] +# CHECK: {evex} setns byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x49,0x00] + {evex} setns byte ptr [rax] +# CHECK: {evex} setp byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4a,0x00] + {evex} setp byte ptr [rax] +# CHECK: {evex} setnp byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4b,0x00] + {evex} setnp byte ptr [rax] +# CHECK: {evex} setl byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4c,0x00] + {evex} setl byte ptr [rax] +# CHECK: {evex} setge byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4d,0x00] + {evex} setge byte ptr [rax] +# CHECK: {evex} setle byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4e,0x00] + {evex} setle byte ptr [rax] +# CHECK: {evex} setg byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x08,0x4f,0x00] + {evex} setg byte ptr [rax] diff --git a/llvm/test/MC/X86/apx/setzucc-att.s b/llvm/test/MC/X86/apx/setzucc-att.s new file mode 100644 index 0000000000000..b4b7a633fa319 --- /dev/null +++ b/llvm/test/MC/X86/apx/setzucc-att.s @@ -0,0 +1,101 @@ +# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-32: error: +# ERROR-NOT: error: +# CHECK: setzuo %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x40,0xc0] + setzuo %al +# CHECK: setzuno %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x41,0xc0] + setzuno %al +# CHECK: setzub %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x42,0xc0] + setzub %al +# CHECK: setzuae %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x43,0xc0] + setzuae %al +# CHECK: setzue %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0] + setzue %al +# CHECK: setzune %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x45,0xc0] + setzune %al +# CHECK: setzube %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x46,0xc0] + setzube %al +# CHECK: setzua %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x47,0xc0] + setzua %al +# CHECK: setzus %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x48,0xc0] + setzus %al +# CHECK: setzuns %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x49,0xc0] + setzuns %al +# CHECK: setzup %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4a,0xc0] + setzup %al +# CHECK: setzunp %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4b,0xc0] + setzunp %al +# CHECK: setzul %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4c,0xc0] + setzul %al +# CHECK: setzuge %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4d,0xc0] + setzuge %al +# CHECK: setzule %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4e,0xc0] + setzule %al +# CHECK: setzug %al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4f,0xc0] + setzug %al +# CHECK: setzuo (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x40,0x00] + setzuo (%rax) +# CHECK: setzuno (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x41,0x00] + setzuno (%rax) +# CHECK: setzub (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x42,0x00] + setzub (%rax) +# CHECK: setzuae (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x43,0x00] + setzuae (%rax) +# CHECK: setzue (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x44,0x00] + setzue (%rax) +# CHECK: setzune (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x45,0x00] + setzune (%rax) +# CHECK: setzube (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x46,0x00] + setzube (%rax) +# CHECK: setzua (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x47,0x00] + setzua (%rax) +# CHECK: setzus (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x48,0x00] + setzus (%rax) +# CHECK: setzuns (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x49,0x00] + setzuns (%rax) +# CHECK: setzup (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4a,0x00] + setzup (%rax) +# CHECK: setzunp (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4b,0x00] + setzunp (%rax) +# CHECK: setzul (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4c,0x00] + setzul (%rax) +# CHECK: setzuge (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4d,0x00] + setzuge (%rax) +# CHECK: setzule (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4e,0x00] + setzule (%rax) +# CHECK: setzug (%rax) +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4f,0x00] + setzug (%rax) diff --git a/llvm/test/MC/X86/apx/setzucc-intel.s b/llvm/test/MC/X86/apx/setzucc-intel.s new file mode 100644 index 0000000000000..bdefba6ac8d30 --- /dev/null +++ b/llvm/test/MC/X86/apx/setzucc-intel.s @@ -0,0 +1,98 @@ +# RUN: llvm-mc -triple x86_64 -show-encoding -x86-asm-syntax=intel -output-asm-variant=1 %s | FileCheck %s + +# CHECK: setzuo al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x40,0xc0] + setzuo al +# CHECK: setzuno al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x41,0xc0] + setzuno al +# CHECK: setzub al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x42,0xc0] + setzub al +# CHECK: setzuae al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x43,0xc0] + setzuae al +# CHECK: setzue al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0] + setzue al +# CHECK: setzune al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x45,0xc0] + setzune al +# CHECK: setzube al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x46,0xc0] + setzube al +# CHECK: setzua al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x47,0xc0] + setzua al +# CHECK: setzus al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x48,0xc0] + setzus al +# CHECK: setzuns al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x49,0xc0] + setzuns al +# CHECK: setzup al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4a,0xc0] + setzup al +# CHECK: setzunp al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4b,0xc0] + setzunp al +# CHECK: setzul al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4c,0xc0] + setzul al +# CHECK: setzuge al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4d,0xc0] + setzuge al +# CHECK: setzule al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4e,0xc0] + setzule al +# CHECK: setzug al +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4f,0xc0] + setzug al +# CHECK: setzuo byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x40,0x00] + setzuo byte ptr [rax] +# CHECK: setzuno byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x41,0x00] + setzuno byte ptr [rax] +# CHECK: setzub byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x42,0x00] + setzub byte ptr [rax] +# CHECK: setzuae byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x43,0x00] + setzuae byte ptr [rax] +# CHECK: setzue byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x44,0x00] + setzue byte ptr [rax] +# CHECK: setzune byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x45,0x00] + setzune byte ptr [rax] +# CHECK: setzube byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x46,0x00] + setzube byte ptr [rax] +# CHECK: setzua byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x47,0x00] + setzua byte ptr [rax] +# CHECK: setzus byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x48,0x00] + setzus byte ptr [rax] +# CHECK: setzuns byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x49,0x00] + setzuns byte ptr [rax] +# CHECK: setzup byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4a,0x00] + setzup byte ptr [rax] +# CHECK: setzunp byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4b,0x00] + setzunp byte ptr [rax] +# CHECK: setzul byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4c,0x00] + setzul byte ptr [rax] +# CHECK: setzuge byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4d,0x00] + setzuge byte ptr [rax] +# CHECK: setzule byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4e,0x00] + setzule byte ptr [rax] +# CHECK: setzug byte ptr [rax] +# CHECK: encoding: [0x62,0xf4,0x7f,0x18,0x4f,0x00] + setzug byte ptr [rax] diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 493350d7bd630..6f983d2efabe9 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -468,6 +468,7 @@ static const X86FoldTableEntry Table0[] = { {X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD}, {X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD}, {X86::SETCCr, X86::SETCCm, TB_FOLDED_STORE}, + {X86::SETZUCCr, X86::SETZUCCm, TB_FOLDED_STORE}, {X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD}, {X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD}, {X86::TAILJMPr64_REX, X86::TAILJMPm64_REX, TB_FOLDED_LOAD}, diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index 8b82ce899a48a..53d49a2900a15 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -645,12 +645,13 @@ struct MatchableInfo { // vex encoding size is smaller. Since X86InstrSSE.td is included ahead // of X86InstrAVX512.td, the AVX instruction ID is less than AVX512 ID. // We use the ID to sort AVX instruction before AVX512 instruction in - // matching table. - if (TheDef->isSubClassOf("Instruction") && - TheDef->getValueAsBit("HasPositionOrder") && - RHS.TheDef->isSubClassOf("Instruction") && - RHS.TheDef->getValueAsBit("HasPositionOrder")) - return TheDef->getID() < RHS.TheDef->getID(); + // matching table. As well as InstAlias. + if (getResultInst()->TheDef->isSubClassOf("Instruction") && + getResultInst()->TheDef->getValueAsBit("HasPositionOrder") && + RHS.getResultInst()->TheDef->isSubClassOf("Instruction") && + RHS.getResultInst()->TheDef->getValueAsBit("HasPositionOrder")) + return getResultInst()->TheDef->getID() < + RHS.getResultInst()->TheDef->getID(); // Give matches that require more features higher precedence. This is useful // because we cannot define AssemblerPredicates with the negation of From fd50151180498f0de4fe26ff21d3e3b8accc4de0 Mon Sep 17 00:00:00 2001 From: Jianjian Guan Date: Thu, 11 Apr 2024 10:23:26 +0800 Subject: [PATCH 089/886] [RISCV] Only support SPLAT_VECTOR for Zvfhmin when also enable the scalar extension of half fp (#88275) --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +- llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll | 35 +++++++++++++++------ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 944d8b6de895d..1a3ef6feea3e3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1063,7 +1063,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, VT, Custom); - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + if (Subtarget.hasStdExtZfhminOrZhinxmin()) + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); // load/store setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll index 707ef8a94432d..1aebd32cefca6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll @@ -1,19 +1,36 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfh,+v -target-abi ilp32d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,OPTIMIZED +; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,OPTIMIZED ; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfh,+v -target-abi lp64d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,OPTIMIZED +; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,OPTIMIZED ; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfh,+v,+no-optimized-zero-stride-load -target-abi ilp32d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,NOT-OPTIMIZED +; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,NOT-OPTIMIZED ; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfh,+v,+no-optimized-zero-stride-load -target-abi lp64d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,NOT-OPTIMIZED +; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,NOT-OPTIMIZED +; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfhmin,+v -target-abi ilp32d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,OPTIMIZED +; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfhmin,+v -target-abi lp64d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,OPTIMIZED +; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfhmin,+v,+no-optimized-zero-stride-load -target-abi ilp32d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,NOT-OPTIMIZED +; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfhmin,+v,+no-optimized-zero-stride-load -target-abi lp64d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,NOT-OPTIMIZED define @vsplat_nxv8f16(half %f) { -; CHECK-LABEL: vsplat_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vsplat_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vsplat_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %f, i32 0 %splat = shufflevector %head, poison, zeroinitializer ret %splat From 999b9e6ddb4324600a46c8f7006acec81fe3af0f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 10 Apr 2024 19:36:31 -0700 Subject: [PATCH 090/886] [RISCV] Use vector getConstant instead of getSplatVector+getConstant. NFC --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1a3ef6feea3e3..357432081ddb0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4634,9 +4634,7 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, } else if (Subtarget.hasStdExtZvbb()) { // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV. SDValue OffsetVec = - DAG.getSplatVector(VecContainerVT, DL, - DAG.getConstant(VecVT.getScalarSizeInBits(), DL, - Subtarget.getXLenVT())); + DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT); Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV, OffsetVec, Passthru, Mask, VL); if (!EvenV.isUndef()) From dda73336ad22bd0b5ecda17040c50fb10fcbe5fb Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Wed, 10 Apr 2024 19:46:01 -0700 Subject: [PATCH 091/886] [ThinLTO]Record import type in GlobalValueSummary::GVFlags (#87597) The motivating use case is to support import the function declaration across modules to construct call graph edges for indirect calls [1] when importing the function definition costs too much compile time (e.g., the function is too large has no `noinline` attribute). 1. Currently, when the compiled IR module doesn't have a function definition but its postlink combined summary contains the function summary or a global alias summary with this function as aliasee, the function definition will be imported from source module by IRMover. The implementation is in FunctionImporter::importFunctions [2] 2. In order for FunctionImporter to import a declaration of a function, both function summary and alias summary need to carry the def / decl state. Specifically, all existing summary fields doesn't differ across import modules, but the def / decl state of is decided by ``. This change encodes the def/decl state in `GlobalValueSummary::GVFlags`. In the subsequent changes 1. The indexing step `computeImportForModule` [3] will compute the set of definitions and the set of declarations for each module, and passing on the information to bitcode writer. 2. Bitcode writer will look up the def/decl state and sets the state when it writes out the flag value. This is demonstrated in https://github.com/llvm/llvm-project/pull/87600 3. Function importer will read the def/decl state when reading the combined summary to figure out two sets of global values, and IRMover will be updated to import the declaration (aka linkGlobalValuePrototype [4]) into the destination module. - The next change is https://github.com/llvm/llvm-project/pull/87600 [1] mentioned in rfc https://discourse.llvm.org/t/rfc-for-better-call-graph-sort-build-a-more-complete-call-graph-by-adding-more-indirect-call-edges/74029#support-cross-module-function-declaration-import-5 [2] https://github.com/llvm/llvm-project/blob/3b337242ee165554f0017b00671381ec5b1ba855/llvm/lib/Transforms/IPO/FunctionImport.cpp#L1608-L1764 [3] https://github.com/llvm/llvm-project/blob/3b337242ee165554f0017b00671381ec5b1ba855/llvm/lib/Transforms/IPO/FunctionImport.cpp#L856 [4] https://github.com/llvm/llvm-project/blob/3b337242ee165554f0017b00671381ec5b1ba855/llvm/lib/Linker/IRMover.cpp#L605 --- .../CodeGen/thinlto-distributed-cfi-devirt.ll | 2 +- clang/test/CodeGen/thinlto-distributed-cfi.ll | 2 +- clang/test/CodeGen/thinlto-funcattr-prop.ll | 4 +- lld/test/ELF/lto/comdat-nodeduplicate.ll | 8 +- llvm/include/llvm/AsmParser/LLParser.h | 2 + llvm/include/llvm/AsmParser/LLToken.h | 3 + llvm/include/llvm/IR/ModuleSummaryIndex.h | 29 +++++- llvm/include/llvm/IR/ModuleSummaryIndexYAML.h | 11 ++- llvm/lib/Analysis/ModuleSummaryAnalysis.cpp | 12 ++- llvm/lib/AsmParser/LLLexer.cpp | 3 + llvm/lib/AsmParser/LLParser.cpp | 33 ++++++- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 3 +- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 2 + llvm/lib/IR/AsmWriter.cpp | 12 +++ llvm/lib/IR/ModuleSummaryIndex.cpp | 4 + .../test/Assembler/thinlto-memprof-summary.ll | 20 ++-- .../thinlto-multiple-summaries-for-guid.ll | 4 +- .../Assembler/thinlto-summary-visibility.ll | 6 +- llvm/test/Assembler/thinlto-summary.ll | 92 ++++++++++--------- llvm/test/Assembler/thinlto-vtable-summary.ll | 4 +- llvm/test/Bitcode/thinlto-alias.ll | 10 +- .../thinlto-func-summary-vtableref-pgo.ll | 2 +- ...ction-summary-callgraph-profile-summary.ll | 18 ++-- ...hinlto-function-summary-callgraph-relbf.ll | 2 +- .../thinlto-function-summary-refgraph.ll | 14 +-- .../thinlto-index-disassembled-by-llvm-dis.ll | 2 +- llvm/test/Bitcode/thinlto-type-tests.ll | 12 +-- llvm/test/Bitcode/thinlto-type-vcalls.ll | 24 ++--- llvm/test/ThinLTO/X86/dot-dumper.ll | 12 +-- .../ThinLTO/X86/funcattrs-prop-maythrow.ll | 8 +- .../ThinLTO/X86/funcimport_alwaysinline.ll | 2 +- llvm/test/ThinLTO/X86/load-store-caching.ll | 4 +- .../Transforms/LowerTypeTests/import-unsat.ll | 1 + .../Inputs/import-indir.yaml | 4 + .../WholeProgramDevirt/import-indir.ll | 9 ++ 35 files changed, 241 insertions(+), 139 deletions(-) diff --git a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll index 2309ed717c2a2..433fd1fe20430 100644 --- a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll +++ b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll @@ -34,7 +34,7 @@ ; Round trip it through llvm-as ; RUN: llvm-dis %t.o.thinlto.bc -o - | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-DIS ; CHECK-DIS: ^0 = module: (path: "{{.*}}thinlto-distributed-cfi-devirt.ll.tmp.o", hash: ({{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}})) -; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 18, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeTests: (^2), typeCheckedLoadVCalls: (vFuncId: (^2, offset: 8), vFuncId: (^2, offset: 0)))))) +; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 18, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeTests: (^2), typeCheckedLoadVCalls: (vFuncId: (^2, offset: 8), vFuncId: (^2, offset: 0)))))) ; CHECK-DIS: ^2 = typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: allOnes, sizeM1BitWidth: 7), wpdResolutions: ((offset: 0, wpdRes: (kind: branchFunnel)), (offset: 8, wpdRes: (kind: singleImpl, singleImplName: "_ZN1A1nEi"))))) ; guid = 7004155349499253778 ; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \ diff --git a/clang/test/CodeGen/thinlto-distributed-cfi.ll b/clang/test/CodeGen/thinlto-distributed-cfi.ll index f5dde2d32a42d..47e56c091a612 100644 --- a/clang/test/CodeGen/thinlto-distributed-cfi.ll +++ b/clang/test/CodeGen/thinlto-distributed-cfi.ll @@ -24,7 +24,7 @@ ; Round trip it through llvm-as ; RUN: llvm-dis %t.o.thinlto.bc -o - | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-DIS ; CHECK-DIS: ^0 = module: (path: "{{.*}}thinlto-distributed-cfi.ll.tmp.o", hash: ({{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}})) -; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 7, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 1, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), typeIdInfo: (typeTests: (^2))))) +; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 7, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 1, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), typeIdInfo: (typeTests: (^2))))) ; CHECK-DIS: ^2 = typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: single, sizeM1BitWidth: 0))) ; guid = 7004155349499253778 ; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \ diff --git a/clang/test/CodeGen/thinlto-funcattr-prop.ll b/clang/test/CodeGen/thinlto-funcattr-prop.ll index c1274776fe9ce..daaa6e2da8048 100644 --- a/clang/test/CodeGen/thinlto-funcattr-prop.ll +++ b/clang/test/CodeGen/thinlto-funcattr-prop.ll @@ -15,9 +15,9 @@ ; RUN: llvm-dis %t1.o.1.1.promote.bc -o - | FileCheck %s --check-prefix=CHECK-IR ;; Summary for call_extern. Note that llvm-lto2 writes out the index before propagation occurs so call_extern doesn't have its flags updated. -; CHECK-INDEX: ^2 = gv: (guid: 13959900437860518209, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0), insts: 2, calls: ((callee: ^3))))) +; CHECK-INDEX: ^2 = gv: (guid: 13959900437860518209, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, calls: ((callee: ^3))))) ;; Summary for extern -; CHECK-INDEX: ^3 = gv: (guid: 14959766916849974397, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 1, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 1, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0)))) +; CHECK-INDEX: ^3 = gv: (guid: 14959766916849974397, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 1, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0)))) ;--- a.ll target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/lld/test/ELF/lto/comdat-nodeduplicate.ll b/lld/test/ELF/lto/comdat-nodeduplicate.ll index eef17d41c5a4d..13d4ab394813b 100644 --- a/lld/test/ELF/lto/comdat-nodeduplicate.ll +++ b/lld/test/ELF/lto/comdat-nodeduplicate.ll @@ -56,15 +56,15 @@ ; IR_AB-DAG: gv: (name: "__profc_foo", {{.*}} guid = [[PROFC:[0-9]+]] ;; Check extra attributes. b.bc:__profc_foo is prevailing, so it can be internalized. -; IR_AB-DAG: gv: (guid: [[PROFD]], summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0), -; IR_AB-DAG: gv: (guid: [[PROFC]], summaries: (variable: (module: ^0, flags: (linkage: internal, visibility: hidden, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) +; IR_AB-DAG: gv: (guid: [[PROFD]], summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), varFlags: (readonly: 0, writeonly: 0, constant: 0), +; IR_AB-DAG: gv: (guid: [[PROFC]], summaries: (variable: (module: ^0, flags: (linkage: internal, visibility: hidden, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) ; IR_ABC-DAG: gv: (name: "__profd_foo", {{.*}} guid = [[PROFD:[0-9]+]] ; IR_ABC-DAG: gv: (name: "__profc_foo", {{.*}} guid = [[PROFC:[0-9]+]] ;; b.bc:__profc_foo prevails c.bc:__profc_foo, so it is exported and therefore not internalized. -; IR_ABC-DAG: gv: (guid: [[PROFD]], summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0), -; IR_ABC-DAG: gv: (guid: [[PROFC]], summaries: (variable: (module: ^0, flags: (linkage: weak, visibility: hidden, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) +; IR_ABC-DAG: gv: (guid: [[PROFD]], summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), varFlags: (readonly: 0, writeonly: 0, constant: 0), +; IR_ABC-DAG: gv: (guid: [[PROFC]], summaries: (variable: (module: ^0, flags: (linkage: weak, visibility: hidden, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) ;--- a.ll target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index b4e971fea1a13..b2dcdfad0a04b 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -301,6 +301,8 @@ namespace llvm { bool &DSOLocal); void parseOptionalDSOLocal(bool &DSOLocal); void parseOptionalVisibility(unsigned &Res); + bool parseOptionalImportType(lltok::Kind Kind, + GlobalValueSummary::ImportKind &Res); void parseOptionalDLLStorageClass(unsigned &Res); bool parseOptionalCallingConv(unsigned &CC); bool parseOptionalAlignment(MaybeAlign &Alignment, diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index 65ccb1b81b3a8..0cbcdcd9ffac7 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -370,6 +370,9 @@ enum Kind { kw_live, kw_dsoLocal, kw_canAutoHide, + kw_importType, + kw_definition, + kw_declaration, kw_function, kw_insts, kw_funcFlags, diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index d5ff15063671d..5d137d4b3553c 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -432,6 +432,18 @@ class GlobalValueSummary { /// Sububclass discriminator (for dyn_cast<> et al.) enum SummaryKind : unsigned { AliasKind, FunctionKind, GlobalVarKind }; + enum ImportKind : unsigned { + // The global value definition corresponding to the summary should be + // imported from source module + Definition = 0, + + // When its definition doesn't exist in the destination module and not + // imported (e.g., function is too large to be inlined), the global value + // declaration corresponding to the summary should be imported, or the + // attributes from summary should be annotated on the function declaration. + Declaration = 1, + }; + /// Group flags (Linkage, NotEligibleToImport, etc.) as a bitfield. struct GVFlags { /// The linkage type of the associated global value. @@ -472,14 +484,19 @@ class GlobalValueSummary { /// means the symbol was externally visible. unsigned CanAutoHide : 1; + /// This field is written by the ThinLTO indexing step to postlink combined + /// summary. The value is interpreted as 'ImportKind' enum defined above. + unsigned ImportType : 1; + /// Convenience Constructors explicit GVFlags(GlobalValue::LinkageTypes Linkage, GlobalValue::VisibilityTypes Visibility, bool NotEligibleToImport, bool Live, bool IsLocal, - bool CanAutoHide) + bool CanAutoHide, ImportKind ImportType) : Linkage(Linkage), Visibility(Visibility), NotEligibleToImport(NotEligibleToImport), Live(Live), - DSOLocal(IsLocal), CanAutoHide(CanAutoHide) {} + DSOLocal(IsLocal), CanAutoHide(CanAutoHide), + ImportType(static_cast(ImportType)) {} }; private: @@ -564,6 +581,12 @@ class GlobalValueSummary { bool canAutoHide() const { return Flags.CanAutoHide; } + bool shouldImportAsDecl() const { + return Flags.ImportType == GlobalValueSummary::ImportKind::Declaration; + } + + void setImportKind(ImportKind IK) { Flags.ImportType = IK; } + GlobalValue::VisibilityTypes getVisibility() const { return (GlobalValue::VisibilityTypes)Flags.Visibility; } @@ -813,7 +836,7 @@ class FunctionSummary : public GlobalValueSummary { GlobalValue::LinkageTypes::AvailableExternallyLinkage, GlobalValue::DefaultVisibility, /*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false, - /*CanAutoHide=*/false), + /*CanAutoHide=*/false, GlobalValueSummary::ImportKind::Definition), /*NumInsts=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0, std::vector(), std::move(Edges), std::vector(), diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h index 33e57e5f2102f..b2747d24c5396 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -138,6 +138,7 @@ template <> struct MappingTraits { struct FunctionSummaryYaml { unsigned Linkage, Visibility; bool NotEligibleToImport, Live, IsLocal, CanAutoHide; + unsigned ImportType; std::vector Refs; std::vector TypeTests; std::vector TypeTestAssumeVCalls, @@ -183,6 +184,7 @@ template <> struct MappingTraits { io.mapOptional("Live", summary.Live); io.mapOptional("Local", summary.IsLocal); io.mapOptional("CanAutoHide", summary.CanAutoHide); + io.mapOptional("ImportType", summary.ImportType); io.mapOptional("Refs", summary.Refs); io.mapOptional("TypeTests", summary.TypeTests); io.mapOptional("TypeTestAssumeVCalls", summary.TypeTestAssumeVCalls); @@ -227,7 +229,8 @@ template <> struct CustomMappingTraits { static_cast(FSum.Linkage), static_cast(FSum.Visibility), FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal, - FSum.CanAutoHide), + FSum.CanAutoHide, + static_cast(FSum.ImportType)), /*NumInsts=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0, Refs, ArrayRef{}, std::move(FSum.TypeTests), std::move(FSum.TypeTestAssumeVCalls), @@ -251,9 +254,9 @@ template <> struct CustomMappingTraits { static_cast(FSum->flags().NotEligibleToImport), static_cast(FSum->flags().Live), static_cast(FSum->flags().DSOLocal), - static_cast(FSum->flags().CanAutoHide), Refs, - FSum->type_tests(), FSum->type_test_assume_vcalls(), - FSum->type_checked_load_vcalls(), + static_cast(FSum->flags().CanAutoHide), + FSum->flags().ImportType, Refs, FSum->type_tests(), + FSum->type_test_assume_vcalls(), FSum->type_checked_load_vcalls(), FSum->type_test_assume_const_vcalls(), FSum->type_checked_load_const_vcalls()}); } diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 3ad0bab827a51..deda1eebb3b57 100644 --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -635,7 +635,8 @@ static void computeFunctionSummary( HasIndirBranchToBlockAddress || HasIFuncCall; GlobalValueSummary::GVFlags Flags( F.getLinkage(), F.getVisibility(), NotEligibleForImport, - /* Live = */ false, F.isDSOLocal(), F.canBeOmittedFromSymbolTable()); + /* Live = */ false, F.isDSOLocal(), F.canBeOmittedFromSymbolTable(), + GlobalValueSummary::ImportKind::Definition); FunctionSummary::FFlags FunFlags{ F.doesNotAccessMemory(), F.onlyReadsMemory() && !F.doesNotAccessMemory(), F.hasFnAttribute(Attribute::NoRecurse), F.returnDoesNotAlias(), @@ -761,7 +762,8 @@ static void computeVariableSummary(ModuleSummaryIndex &Index, bool NonRenamableLocal = isNonRenamableLocal(V); GlobalValueSummary::GVFlags Flags( V.getLinkage(), V.getVisibility(), NonRenamableLocal, - /* Live = */ false, V.isDSOLocal(), V.canBeOmittedFromSymbolTable()); + /* Live = */ false, V.isDSOLocal(), V.canBeOmittedFromSymbolTable(), + GlobalValueSummary::Definition); VTableFuncList VTableFuncs; // If splitting is not enabled, then we compute the summary information @@ -807,7 +809,8 @@ static void computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, bool NonRenamableLocal = isNonRenamableLocal(A); GlobalValueSummary::GVFlags Flags( A.getLinkage(), A.getVisibility(), NonRenamableLocal, - /* Live = */ false, A.isDSOLocal(), A.canBeOmittedFromSymbolTable()); + /* Live = */ false, A.isDSOLocal(), A.canBeOmittedFromSymbolTable(), + GlobalValueSummary::Definition); auto AS = std::make_unique(Flags); auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID()); assert(AliaseeVI && "Alias expects aliasee summary to be available"); @@ -887,7 +890,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( GlobalValue::InternalLinkage, GlobalValue::DefaultVisibility, /* NotEligibleToImport = */ true, /* Live = */ true, - /* Local */ GV->isDSOLocal(), GV->canBeOmittedFromSymbolTable()); + /* Local */ GV->isDSOLocal(), GV->canBeOmittedFromSymbolTable(), + GlobalValueSummary::Definition); CantBePromoted.insert(GV->getGUID()); // Create the appropriate summary type. if (Function *F = dyn_cast(GV)) { diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 2301a27731eaf..8ded07ffd8bd2 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -737,6 +737,9 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(live); KEYWORD(dsoLocal); KEYWORD(canAutoHide); + KEYWORD(importType); + KEYWORD(definition); + KEYWORD(declaration); KEYWORD(function); KEYWORD(insts); KEYWORD(funcFlags); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index f546e05a5d37d..63104129f8c2d 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -2083,6 +2083,20 @@ void LLParser::parseOptionalVisibility(unsigned &Res) { Lex.Lex(); } +bool LLParser::parseOptionalImportType(lltok::Kind Kind, + GlobalValueSummary::ImportKind &Res) { + switch (Kind) { + default: + return tokError("unknown import kind. Expect definition or declaration."); + case lltok::kw_definition: + Res = GlobalValueSummary::Definition; + return false; + case lltok::kw_declaration: + Res = GlobalValueSummary::Declaration; + return false; + } +} + /// parseOptionalDLLStorageClass /// ::= /*empty*/ /// ::= 'dllimport' @@ -9230,7 +9244,8 @@ bool LLParser::parseFunctionSummary(std::string Name, GlobalValue::GUID GUID, GlobalValueSummary::GVFlags GVFlags = GlobalValueSummary::GVFlags( GlobalValue::ExternalLinkage, GlobalValue::DefaultVisibility, /*NotEligibleToImport=*/false, - /*Live=*/false, /*IsLocal=*/false, /*CanAutoHide=*/false); + /*Live=*/false, /*IsLocal=*/false, /*CanAutoHide=*/false, + GlobalValueSummary::Definition); unsigned InstCount; std::vector Calls; FunctionSummary::TypeIdInfo TypeIdInfo; @@ -9317,7 +9332,8 @@ bool LLParser::parseVariableSummary(std::string Name, GlobalValue::GUID GUID, GlobalValueSummary::GVFlags GVFlags = GlobalValueSummary::GVFlags( GlobalValue::ExternalLinkage, GlobalValue::DefaultVisibility, /*NotEligibleToImport=*/false, - /*Live=*/false, /*IsLocal=*/false, /*CanAutoHide=*/false); + /*Live=*/false, /*IsLocal=*/false, /*CanAutoHide=*/false, + GlobalValueSummary::Definition); GlobalVarSummary::GVarFlags GVarFlags(/*ReadOnly*/ false, /* WriteOnly */ false, /* Constant */ false, @@ -9375,7 +9391,8 @@ bool LLParser::parseAliasSummary(std::string Name, GlobalValue::GUID GUID, GlobalValueSummary::GVFlags GVFlags = GlobalValueSummary::GVFlags( GlobalValue::ExternalLinkage, GlobalValue::DefaultVisibility, /*NotEligibleToImport=*/false, - /*Live=*/false, /*IsLocal=*/false, /*CanAutoHide=*/false); + /*Live=*/false, /*IsLocal=*/false, /*CanAutoHide=*/false, + GlobalValueSummary::Definition); if (parseToken(lltok::colon, "expected ':' here") || parseToken(lltok::lparen, "expected '(' here") || parseModuleReference(ModulePath) || @@ -10161,6 +10178,16 @@ bool LLParser::parseGVFlags(GlobalValueSummary::GVFlags &GVFlags) { return true; GVFlags.CanAutoHide = Flag; break; + case lltok::kw_importType: + Lex.Lex(); + if (parseToken(lltok::colon, "expected ':'")) + return true; + GlobalValueSummary::ImportKind IK; + if (parseOptionalImportType(Lex.getKind(), IK)) + return true; + GVFlags.ImportType = static_cast(IK); + Lex.Lex(); + break; default: return error(Lex.getLoc(), "expected gv flag type"); } diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 92c349525aff5..fe4f0d6dca6c0 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1141,6 +1141,7 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags, // to getDecodedLinkage() will need to be taken into account here as above. auto Linkage = GlobalValue::LinkageTypes(RawFlags & 0xF); // 4 bits auto Visibility = GlobalValue::VisibilityTypes((RawFlags >> 8) & 3); // 2 bits + auto IK = GlobalValueSummary::ImportKind((RawFlags >> 10) & 1); // 1 bit RawFlags = RawFlags >> 4; bool NotEligibleToImport = (RawFlags & 0x1) || Version < 3; // The Live flag wasn't introduced until version 3. For dead stripping @@ -1151,7 +1152,7 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags, bool AutoHide = (RawFlags & 0x8); return GlobalValueSummary::GVFlags(Linkage, Visibility, NotEligibleToImport, - Live, Local, AutoHide); + Live, Local, AutoHide, IK); } // Decode the flags for GlobalVariable in the summary diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index dd554e422516f..6d01e3b4d8218 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1217,6 +1217,8 @@ static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) { RawFlags |= (Flags.Visibility << 8); // 2 bits + RawFlags |= (Flags.ImportType << 10); // 1 bit + return RawFlags; } diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index b778a14158ef2..609de920ba7dd 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -3306,6 +3306,16 @@ static const char *getVisibilityName(GlobalValue::VisibilityTypes Vis) { llvm_unreachable("invalid visibility"); } +static const char *getImportTypeName(GlobalValueSummary::ImportKind IK) { + switch (IK) { + case GlobalValueSummary::Definition: + return "definition"; + case GlobalValueSummary::Declaration: + return "declaration"; + } + assert(false && "invalid import kind"); +} + void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) { Out << ", insts: " << FS->instCount(); if (FS->fflags().anyFlagSet()) @@ -3545,6 +3555,8 @@ void AssemblyWriter::printSummary(const GlobalValueSummary &Summary) { Out << ", live: " << GVFlags.Live; Out << ", dsoLocal: " << GVFlags.DSOLocal; Out << ", canAutoHide: " << GVFlags.CanAutoHide; + Out << ", importType: " + << getImportTypeName(GlobalValueSummary::ImportKind(GVFlags.ImportType)); Out << ")"; if (Summary.getSummaryKind() == GlobalValueSummary::AliasKind) diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp index 198c730418c72..6713d32fb787c 100644 --- a/llvm/lib/IR/ModuleSummaryIndex.cpp +++ b/llvm/lib/IR/ModuleSummaryIndex.cpp @@ -644,6 +644,10 @@ void ModuleSummaryIndex::exportToDot( A.addComment("dsoLocal"); if (Flags.CanAutoHide) A.addComment("canAutoHide"); + if (Flags.ImportType == GlobalValueSummary::ImportKind::Definition) + A.addComment("definition"); + else if (Flags.ImportType == GlobalValueSummary::ImportKind::Declaration) + A.addComment("declaration"); if (GUIDPreservedSymbols.count(SummaryIt.first)) A.addComment("preserved"); diff --git a/llvm/test/Assembler/thinlto-memprof-summary.ll b/llvm/test/Assembler/thinlto-memprof-summary.ll index b72271bb401fd..69eafc967c2a3 100644 --- a/llvm/test/Assembler/thinlto-memprof-summary.ll +++ b/llvm/test/Assembler/thinlto-memprof-summary.ll @@ -5,20 +5,20 @@ ^0 = module: (path: "thinlto-memprof-summary.o", hash: (1369602428, 2747878711, 259090915, 2507395659, 1141468049)) ;; Function with single alloc, multiple memprof MIBs, no versioning -^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321)))))))) +^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321)))))))) ;; Function with callsite stack ids calling above function, no versioning -^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (8632435727821051414)), (callee: ^1, clones: (0), stackIds: (15025054523792398438, 12345678)), (callee: ^1, clones: (0), stackIds: (23456789)))))) +^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (8632435727821051414)), (callee: ^1, clones: (0), stackIds: (15025054523792398438, 12345678)), (callee: ^1, clones: (0), stackIds: (23456789)))))) ;; Function with multiple allocs, multiple memprof MIBs, multiple versions -^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789)))))))) +^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789)))))))) ;; Function with callsite stack ids calling above function, multiple versions -^4 = gv: (guid: 27, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^3)), callsites: ((callee: ^3, clones: (0, 1), stackIds: (3456789)), (callee: ^3, clones: (1, 1), stackIds: (456789)))))) +^4 = gv: (guid: 27, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^3)), callsites: ((callee: ^3, clones: (0, 1), stackIds: (3456789)), (callee: ^3, clones: (1, 1), stackIds: (456789)))))) ;; Function with null callsite stack id (can happen in distributed indexes if callsite not imported) -^5 = gv: (guid: 28, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), callsites: ((callee: null, clones: (0), stackIds: (8632435727821051414)))))) +^5 = gv: (guid: 28, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), callsites: ((callee: null, clones: (0), stackIds: (8632435727821051414)))))) ; Make sure we get back from llvm-dis what we put in via llvm-as. ; CHECK: ^0 = module: (path: "thinlto-memprof-summary.o", hash: (1369602428, 2747878711, 259090915, 2507395659, 1141468049)) -; CHECK: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321)))))))) -; CHECK: ^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (8632435727821051414)), (callee: ^1, clones: (0), stackIds: (15025054523792398438, 12345678)), (callee: ^1, clones: (0), stackIds: (23456789)))))) -; CHECK: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789)))))))) -; CHECK: ^4 = gv: (guid: 27, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^3)), callsites: ((callee: ^3, clones: (0, 1), stackIds: (3456789)), (callee: ^3, clones: (1, 1), stackIds: (456789)))))) -; CHECK: ^5 = gv: (guid: 28, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), callsites: ((callee: null, clones: (0), stackIds: (8632435727821051414)))))) +; CHECK: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321)))))))) +; CHECK: ^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (8632435727821051414)), (callee: ^1, clones: (0), stackIds: (15025054523792398438, 12345678)), (callee: ^1, clones: (0), stackIds: (23456789)))))) +; CHECK: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789)))))))) +; CHECK: ^4 = gv: (guid: 27, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^3)), callsites: ((callee: ^3, clones: (0, 1), stackIds: (3456789)), (callee: ^3, clones: (1, 1), stackIds: (456789)))))) +; CHECK: ^5 = gv: (guid: 28, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), callsites: ((callee: null, clones: (0), stackIds: (8632435727821051414)))))) diff --git a/llvm/test/Assembler/thinlto-multiple-summaries-for-guid.ll b/llvm/test/Assembler/thinlto-multiple-summaries-for-guid.ll index 4f849fa6e6adc..117280a279d09 100644 --- a/llvm/test/Assembler/thinlto-multiple-summaries-for-guid.ll +++ b/llvm/test/Assembler/thinlto-multiple-summaries-for-guid.ll @@ -8,5 +8,5 @@ source_filename = "index.bc" ; CHECK: ^0 = module: (path: "[Regular LTO]", hash: (0, 0, 0, 0, 0)) ^1 = module: (path: "main.bc", hash: (3499594384, 1671013073, 3271036935, 1830411232, 59290952)) ; CHECK-NEXT: ^1 = module: (path: "main.bc", hash: (3499594384, 1671013073, 3271036935, 1830411232, 59290952)) -^2 = gv: (guid: 13351721993301222997, summaries: (function: (module: ^1, flags: (linkage: linkonce_odr, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 1), insts: 1), function: (module: ^1, flags: (linkage: available_externally, notEligibleToImport: 1, live: 1, dsoLocal: 1, canAutoHide: 0), insts: 1))) -; CHECK-NEXT: ^2 = gv: (guid: 13351721993301222997, summaries: (function: (module: ^1, flags: (linkage: linkonce_odr, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 1), insts: 1), function: (module: ^1, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 1, live: 1, dsoLocal: 1, canAutoHide: 0), insts: 1))) +^2 = gv: (guid: 13351721993301222997, summaries: (function: (module: ^1, flags: (linkage: linkonce_odr, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 1), function: (module: ^1, flags: (linkage: available_externally, notEligibleToImport: 1, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 1))) +; CHECK-NEXT: ^2 = gv: (guid: 13351721993301222997, summaries: (function: (module: ^1, flags: (linkage: linkonce_odr, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 1), function: (module: ^1, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 1, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 1))) diff --git a/llvm/test/Assembler/thinlto-summary-visibility.ll b/llvm/test/Assembler/thinlto-summary-visibility.ll index 77f652a9e97dc..67ddcb961d7ae 100644 --- a/llvm/test/Assembler/thinlto-summary-visibility.ll +++ b/llvm/test/Assembler/thinlto-summary-visibility.ll @@ -4,9 +4,9 @@ ^0 = module: (path: "thinlto-summary-visibility1.o", hash: (1369602428, 2747878711, 259090915, 2507395659, 1141468049)) ^1 = module: (path: "thinlto-summary-visibility2.o", hash: (2998369023, 4283347029, 1195487472, 2757298015, 1852134156)) -; CHECK: ^2 = gv: (guid: 2, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 10))) -; CHECK-NEXT: ^3 = gv: (guid: 3, summaries: (function: (module: ^0, flags: (linkage: external, visibility: protected, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 10))) -; CHECK-NEXT: ^4 = gv: (guid: 4, summaries: (function: (module: ^0, flags: (linkage: external, visibility: hidden, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 10))) +; CHECK: ^2 = gv: (guid: 2, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 10))) +; CHECK-NEXT: ^3 = gv: (guid: 3, summaries: (function: (module: ^0, flags: (linkage: external, visibility: protected, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 10))) +; CHECK-NEXT: ^4 = gv: (guid: 4, summaries: (function: (module: ^0, flags: (linkage: external, visibility: hidden, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 10))) ^2 = gv: (guid: 2, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default), insts: 10))) ^3 = gv: (guid: 3, summaries: (function: (module: ^0, flags: (linkage: external, visibility: protected), insts: 10))) diff --git a/llvm/test/Assembler/thinlto-summary.ll b/llvm/test/Assembler/thinlto-summary.ll index 9eb3c6669780d..05dad2c7acad4 100644 --- a/llvm/test/Assembler/thinlto-summary.ll +++ b/llvm/test/Assembler/thinlto-summary.ll @@ -16,13 +16,13 @@ ^3 = gv: (guid: 2, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 10, calls: ((callee: ^15, relbf: 256, tail: 1))))) ; Summaries with different linkage types. -^4 = gv: (guid: 3, summaries: (function: (module: ^0, flags: (linkage: internal, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1), insts: 1))) +^4 = gv: (guid: 3, summaries: (function: (module: ^0, flags: (linkage: internal, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, importType: definition), insts: 1))) ; Make this one an alias with a forward reference to aliasee. -^5 = gv: (guid: 4, summaries: (alias: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1), aliasee: ^14))) +^5 = gv: (guid: 4, summaries: (alias: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, importType: definition), aliasee: ^14))) ^6 = gv: (guid: 5, summaries: (function: (module: ^0, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 1))) ^7 = gv: (guid: 6, summaries: (function: (module: ^0, flags: (linkage: linkonce, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 1))) ^8 = gv: (guid: 7, summaries: (function: (module: ^0, flags: (linkage: linkonce_odr, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 1))) -^9 = gv: (guid: 8, summaries: (function: (module: ^0, flags: (linkage: weak_odr, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 1), insts: 1))) +^9 = gv: (guid: 8, summaries: (function: (module: ^0, flags: (linkage: weak_odr, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 1, importType: definition), insts: 1))) ^10 = gv: (guid: 9, summaries: (function: (module: ^0, flags: (linkage: weak, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 1))) ^11 = gv: (guid: 10, summaries: (variable: (module: ^0, flags: (linkage: common, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), varFlags: (readonly: 0)))) ; Test appending globel variable with reference (tests backward reference on @@ -46,60 +46,64 @@ ^18 = gv: (guid: 17, summaries: (alias: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1), aliasee: ^14))) ; Test all types of TypeIdInfo on function summaries. -^19 = gv: (guid: 18, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 4, typeIdInfo: (typeTests: (^24, ^26))))) -^20 = gv: (guid: 19, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 8, typeIdInfo: (typeTestAssumeVCalls: (vFuncId: (^27, offset: 16)))))) -^21 = gv: (guid: 20, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 5, typeIdInfo: (typeCheckedLoadVCalls: (vFuncId: (^25, offset: 16)))))) -^22 = gv: (guid: 21, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 15, typeIdInfo: (typeTestAssumeConstVCalls: ((vFuncId: (^27, offset: 16), args: (42)), (vFuncId: (^27, offset: 24))))))) -^23 = gv: (guid: 22, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 5, typeIdInfo: (typeCheckedLoadConstVCalls: ((vFuncId: (^28, offset: 16), args: (42))))))) +^19 = gv: (guid: 18, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 4, typeIdInfo: (typeTests: (^25, ^27))))) +^20 = gv: (guid: 19, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 8, typeIdInfo: (typeTestAssumeVCalls: (vFuncId: (^28, offset: 16)))))) +^21 = gv: (guid: 20, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 5, typeIdInfo: (typeCheckedLoadVCalls: (vFuncId: (^26, offset: 16)))))) +^22 = gv: (guid: 21, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 15, typeIdInfo: (typeTestAssumeConstVCalls: ((vFuncId: (^28, offset: 16), args: (42)), (vFuncId: (^28, offset: 24))))))) +^23 = gv: (guid: 22, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 5, typeIdInfo: (typeCheckedLoadConstVCalls: ((vFuncId: (^29, offset: 16), args: (42))))))) + +; Function summary with an import type of declaration +^24 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, importType: declaration), insts: 5))) ; Test TypeId summaries: -^24 = typeid: (name: "_ZTS1C", summary: (typeTestRes: (kind: single, sizeM1BitWidth: 0))) +^25 = typeid: (name: "_ZTS1C", summary: (typeTestRes: (kind: single, sizeM1BitWidth: 0))) ; Test TypeId with other optional fields (alignLog2/sizeM1/bitMask/inlineBits) -^25 = typeid: (name: "_ZTS1B", summary: (typeTestRes: (kind: inline, sizeM1BitWidth: 0, alignLog2: 1, sizeM1: 2, bitMask: 3, inlineBits: 4))) +^26 = typeid: (name: "_ZTS1B", summary: (typeTestRes: (kind: inline, sizeM1BitWidth: 0, alignLog2: 1, sizeM1: 2, bitMask: 3, inlineBits: 4))) ; Test the AllOnes resolution, and all kinds of WholeProgramDevirtResolution ; types, including all optional resolution by argument kinds. -^26 = typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: allOnes, sizeM1BitWidth: 7), wpdResolutions: ((offset: 0, wpdRes: (kind: branchFunnel)), (offset: 8, wpdRes: (kind: singleImpl, singleImplName: "_ZN1A1nEi")), (offset: 16, wpdRes: (kind: indir, resByArg: (args: (1, 2), byArg: (kind: indir, byte: 2, bit: 3), args: (3), byArg: (kind: uniformRetVal, info: 1), args: (4), byArg: (kind: uniqueRetVal, info: 1), args: (5), byArg: (kind: virtualConstProp))))))) +^27 = typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: allOnes, sizeM1BitWidth: 7), wpdResolutions: ((offset: 0, wpdRes: (kind: branchFunnel)), (offset: 8, wpdRes: (kind: singleImpl, singleImplName: "_ZN1A1nEi")), (offset: 16, wpdRes: (kind: indir, resByArg: (args: (1, 2), byArg: (kind: indir, byte: 2, bit: 3), args: (3), byArg: (kind: uniformRetVal, info: 1), args: (4), byArg: (kind: uniqueRetVal, info: 1), args: (5), byArg: (kind: virtualConstProp))))))) ; Test the other kinds of type test resoultions -^27 = typeid: (name: "_ZTS1D", summary: (typeTestRes: (kind: byteArray, sizeM1BitWidth: 0))) -^28 = typeid: (name: "_ZTS1E", summary: (typeTestRes: (kind: unsat, sizeM1BitWidth: 0))) -^29 = flags: 8 -^30 = blockcount: 1888 +^28 = typeid: (name: "_ZTS1D", summary: (typeTestRes: (kind: byteArray, sizeM1BitWidth: 0))) +^29 = typeid: (name: "_ZTS1E", summary: (typeTestRes: (kind: unsat, sizeM1BitWidth: 0))) +^30 = flags: 8 +^31 = blockcount: 1888 ; Make sure we get back from llvm-dis essentially what we put in via llvm-as. ; CHECK: ^0 = module: (path: "thinlto-summary1.o", hash: (1369602428, 2747878711, 259090915, 2507395659, 1141468049)) ; CHECK: ^1 = module: (path: "thinlto-summary2.o", hash: (2998369023, 4283347029, 1195487472, 2757298015, 1852134156)) -; CHECK: ^2 = gv: (guid: 1, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 10, calls: ((callee: ^15, hotness: hot), (callee: ^17, hotness: cold), (callee: ^16, hotness: none, tail: 1)), refs: (^11, readonly ^13, writeonly ^14)))) +; CHECK: ^2 = gv: (guid: 1, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 10, calls: ((callee: ^15, hotness: hot), (callee: ^17, hotness: cold), (callee: ^16, hotness: none, tail: 1)), refs: (^11, readonly ^13, writeonly ^14)))) ;; relbf is not emitted since this is a combined summary, and that is only ;; emitted for per-module summaries. -; CHECK: ^3 = gv: (guid: 2, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 10, calls: ((callee: ^15, tail: 1))))) -; CHECK: ^4 = gv: (guid: 3, summaries: (function: (module: ^0, flags: (linkage: internal, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 1))) -; CHECK: ^5 = gv: (guid: 4, summaries: (alias: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), aliasee: ^14))) -; CHECK: ^6 = gv: (guid: 5, summaries: (function: (module: ^0, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 1))) -; CHECK: ^7 = gv: (guid: 6, summaries: (function: (module: ^0, flags: (linkage: linkonce, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 1))) -; CHECK: ^8 = gv: (guid: 7, summaries: (function: (module: ^0, flags: (linkage: linkonce_odr, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 1))) -; CHECK: ^9 = gv: (guid: 8, summaries: (function: (module: ^0, flags: (linkage: weak_odr, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 1), insts: 1))) -; CHECK: ^10 = gv: (guid: 9, summaries: (function: (module: ^0, flags: (linkage: weak, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 1))) -; CHECK: ^11 = gv: (guid: 10, summaries: (variable: (module: ^0, flags: (linkage: common, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) -; CHECK: ^12 = gv: (guid: 11, summaries: (variable: (module: ^0, flags: (linkage: appending, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0), refs: (^4)))) -; CHECK: ^13 = gv: (guid: 12, summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 1, writeonly: 0, constant: 0)))) -; CHECK: ^14 = gv: (guid: 13, summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) -; CHECK: ^15 = gv: (guid: 14, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 1, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 1, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0)))) -; CHECK: ^16 = gv: (guid: 15, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 1, funcFlags: (readNone: 1, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 1, noUnwind: 1, mayThrow: 1, hasUnknownCall: 1, mustBeUnreachable: 0)))) -; CHECK: ^17 = gv: (guid: 16, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 1, funcFlags: (readNone: 0, readOnly: 1, noRecurse: 0, returnDoesNotAlias: 1, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 1), calls: ((callee: ^15))))) -; CHECK: ^18 = gv: (guid: 17, summaries: (alias: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), aliasee: ^14))) -; CHECK: ^19 = gv: (guid: 18, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 4, typeIdInfo: (typeTests: (^24, ^26))))) -; CHECK: ^20 = gv: (guid: 19, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 8, typeIdInfo: (typeTestAssumeVCalls: (vFuncId: (^27, offset: 16)))))) -; CHECK: ^21 = gv: (guid: 20, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 5, typeIdInfo: (typeCheckedLoadVCalls: (vFuncId: (^25, offset: 16)))))) -; CHECK: ^22 = gv: (guid: 21, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 15, typeIdInfo: (typeTestAssumeConstVCalls: ((vFuncId: (^27, offset: 16), args: (42)), (vFuncId: (^27, offset: 24))))))) -; CHECK: ^23 = gv: (guid: 22, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 5, typeIdInfo: (typeCheckedLoadConstVCalls: ((vFuncId: (^28, offset: 16), args: (42))))))) -; CHECK: ^24 = typeid: (name: "_ZTS1C", summary: (typeTestRes: (kind: single, sizeM1BitWidth: 0))) ; guid = 1884921850105019584 -; CHECK: ^25 = typeid: (name: "_ZTS1B", summary: (typeTestRes: (kind: inline, sizeM1BitWidth: 0, alignLog2: 1, sizeM1: 2, bitMask: 3, inlineBits: 4))) ; guid = 6203814149063363976 -; CHECK: ^26 = typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: allOnes, sizeM1BitWidth: 7), wpdResolutions: ((offset: 0, wpdRes: (kind: branchFunnel)), (offset: 8, wpdRes: (kind: singleImpl, singleImplName: "_ZN1A1nEi")), (offset: 16, wpdRes: (kind: indir, resByArg: (args: (1, 2), byArg: (kind: indir, byte: 2, bit: 3), args: (3), byArg: (kind: uniformRetVal, info: 1), args: (4), byArg: (kind: uniqueRetVal, info: 1), args: (5), byArg: (kind: virtualConstProp))))))) ; guid = 7004155349499253778 -; CHECK: ^27 = typeid: (name: "_ZTS1D", summary: (typeTestRes: (kind: byteArray, sizeM1BitWidth: 0))) ; guid = 9614786172484273522 -; CHECK: ^28 = typeid: (name: "_ZTS1E", summary: (typeTestRes: (kind: unsat, sizeM1BitWidth: 0))) ; guid = 17437243864166745132 -; CHECK: ^29 = flags: 8 -; CHECK: ^30 = blockcount: 1888 +; CHECK: ^3 = gv: (guid: 2, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 10, calls: ((callee: ^15, tail: 1))))) +; CHECK: ^4 = gv: (guid: 3, summaries: (function: (module: ^0, flags: (linkage: internal, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 1))) +; CHECK: ^5 = gv: (guid: 4, summaries: (alias: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), aliasee: ^14))) +; CHECK: ^6 = gv: (guid: 5, summaries: (function: (module: ^0, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1))) +; CHECK: ^7 = gv: (guid: 6, summaries: (function: (module: ^0, flags: (linkage: linkonce, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1))) +; CHECK: ^8 = gv: (guid: 7, summaries: (function: (module: ^0, flags: (linkage: linkonce_odr, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1))) +; CHECK: ^9 = gv: (guid: 8, summaries: (function: (module: ^0, flags: (linkage: weak_odr, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 1, importType: definition), insts: 1))) +; CHECK: ^10 = gv: (guid: 9, summaries: (function: (module: ^0, flags: (linkage: weak, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1))) +; CHECK: ^11 = gv: (guid: 10, summaries: (variable: (module: ^0, flags: (linkage: common, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) +; CHECK: ^12 = gv: (guid: 11, summaries: (variable: (module: ^0, flags: (linkage: appending, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), varFlags: (readonly: 0, writeonly: 0, constant: 0), refs: (^4)))) +; CHECK: ^13 = gv: (guid: 12, summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), varFlags: (readonly: 1, writeonly: 0, constant: 0)))) +; CHECK: ^14 = gv: (guid: 13, summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) +; CHECK: ^15 = gv: (guid: 14, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 1, live: 1, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0)))) +; CHECK: ^16 = gv: (guid: 15, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1, funcFlags: (readNone: 1, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 1, noUnwind: 1, mayThrow: 1, hasUnknownCall: 1, mustBeUnreachable: 0)))) +; CHECK: ^17 = gv: (guid: 16, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1, funcFlags: (readNone: 0, readOnly: 1, noRecurse: 0, returnDoesNotAlias: 1, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 1), calls: ((callee: ^15))))) +; CHECK: ^18 = gv: (guid: 17, summaries: (alias: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), aliasee: ^14))) +; CHECK: ^19 = gv: (guid: 18, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 4, typeIdInfo: (typeTests: (^25, ^27))))) +; CHECK: ^20 = gv: (guid: 19, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 8, typeIdInfo: (typeTestAssumeVCalls: (vFuncId: (^28, offset: 16)))))) +; CHECK: ^21 = gv: (guid: 20, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 5, typeIdInfo: (typeCheckedLoadVCalls: (vFuncId: (^26, offset: 16)))))) +; CHECK: ^22 = gv: (guid: 21, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 15, typeIdInfo: (typeTestAssumeConstVCalls: ((vFuncId: (^28, offset: 16), args: (42)), (vFuncId: (^28, offset: 24))))))) +; CHECK: ^23 = gv: (guid: 22, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 5, typeIdInfo: (typeCheckedLoadConstVCalls: ((vFuncId: (^29, offset: 16), args: (42))))))) +; CHECK: ^24 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: declaration), insts: 5))) +; CHECK: ^25 = typeid: (name: "_ZTS1C", summary: (typeTestRes: (kind: single, sizeM1BitWidth: 0))) ; guid = 1884921850105019584 +; CHECK: ^26 = typeid: (name: "_ZTS1B", summary: (typeTestRes: (kind: inline, sizeM1BitWidth: 0, alignLog2: 1, sizeM1: 2, bitMask: 3, inlineBits: 4))) ; guid = 6203814149063363976 +; CHECK: ^27 = typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: allOnes, sizeM1BitWidth: 7), wpdResolutions: ((offset: 0, wpdRes: (kind: branchFunnel)), (offset: 8, wpdRes: (kind: singleImpl, singleImplName: "_ZN1A1nEi")), (offset: 16, wpdRes: (kind: indir, resByArg: (args: (1, 2), byArg: (kind: indir, byte: 2, bit: 3), args: (3), byArg: (kind: uniformRetVal, info: 1), args: (4), byArg: (kind: uniqueRetVal, info: 1), args: (5), byArg: (kind: virtualConstProp))))))) ; guid = 7004155349499253778 +; CHECK: ^28 = typeid: (name: "_ZTS1D", summary: (typeTestRes: (kind: byteArray, sizeM1BitWidth: 0))) ; guid = 9614786172484273522 +; CHECK: ^29 = typeid: (name: "_ZTS1E", summary: (typeTestRes: (kind: unsat, sizeM1BitWidth: 0))) ; guid = 17437243864166745132 +; CHECK: ^30 = flags: 8 +; CHECK: ^31 = blockcount: 1888 ; Make sure parsing of a non-summary entry containing a ":" does not fail ; after summary parsing, which handles colons differently. diff --git a/llvm/test/Assembler/thinlto-vtable-summary.ll b/llvm/test/Assembler/thinlto-vtable-summary.ll index 5a0ff32a83904..80720287f7a09 100644 --- a/llvm/test/Assembler/thinlto-vtable-summary.ll +++ b/llvm/test/Assembler/thinlto-vtable-summary.ll @@ -29,9 +29,9 @@ declare i32 @_ZN1C1fEi(ptr, i32) ^0 = module: (path: "", hash: (0, 0, 0, 0, 0)) ^1 = gv: (name: "_ZN1A1nEi") ; guid = 1621563287929432257 -^2 = gv: (name: "_ZTV1B", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0, vcall_visibility: 0), vTableFuncs: ((virtFunc: ^3, offset: 16), (virtFunc: ^1, offset: 24)), refs: (^3, ^1)))) ; guid = 5283576821522790367 +^2 = gv: (name: "_ZTV1B", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), varFlags: (readonly: 0, writeonly: 0, constant: 0, vcall_visibility: 0), vTableFuncs: ((virtFunc: ^3, offset: 16), (virtFunc: ^1, offset: 24)), refs: (^3, ^1)))) ; guid = 5283576821522790367 ^3 = gv: (name: "_ZN1B1fEi") ; guid = 7162046368816414394 -^4 = gv: (name: "_ZTV1C", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0, vcall_visibility: 0), vTableFuncs: ((virtFunc: ^5, offset: 16), (virtFunc: ^1, offset: 24)), refs: (^1, ^5)))) ; guid = 13624023785555846296 +^4 = gv: (name: "_ZTV1C", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), varFlags: (readonly: 0, writeonly: 0, constant: 0, vcall_visibility: 0), vTableFuncs: ((virtFunc: ^5, offset: 16), (virtFunc: ^1, offset: 24)), refs: (^1, ^5)))) ; guid = 13624023785555846296 ^5 = gv: (name: "_ZN1C1fEi") ; guid = 14876272565662207556 ^6 = typeidCompatibleVTable: (name: "_ZTS1A", summary: ((offset: 16, ^2), (offset: 16, ^4))) ; guid = 7004155349499253778 ^7 = typeidCompatibleVTable: (name: "_ZTS1B", summary: ((offset: 16, ^2))) ; guid = 6203814149063363976 diff --git a/llvm/test/Bitcode/thinlto-alias.ll b/llvm/test/Bitcode/thinlto-alias.ll index eb794f4e631d1..5dfff0f796198 100644 --- a/llvm/test/Bitcode/thinlto-alias.ll +++ b/llvm/test/Bitcode/thinlto-alias.ll @@ -53,11 +53,11 @@ entry: declare void @analias(...) ; DIS: ^0 = module: (path: "{{.*}}", hash: (0, 0, 0, 0, 0)) -; DIS: ^1 = gv: (name: "analias", summaries: (alias: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), aliasee: ^2))) ; guid = 12695095382722328222 -; DIS: ^2 = gv: (name: "aliasee", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 1))) ; guid = 17407585008595848568 +; DIS: ^1 = gv: (name: "analias", summaries: (alias: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), aliasee: ^2))) ; guid = 12695095382722328222 +; DIS: ^2 = gv: (name: "aliasee", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1))) ; guid = 17407585008595848568 ; COMBINED-DIS: ^0 = module: (path: "{{.*}}thinlto-alias.ll.tmp.o", hash: (0, 0, 0, 0, 0)) ; COMBINED-DIS: ^1 = module: (path: "{{.*}}thinlto-alias.ll.tmp2.o", hash: (0, 0, 0, 0, 0)) -; COMBINED-DIS: ^2 = gv: (guid: 12695095382722328222, summaries: (alias: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), aliasee: ^4))) -; COMBINED-DIS: ^3 = gv: (guid: 15822663052811949562, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 2, calls: ((callee: ^2))))) -; COMBINED-DIS: ^4 = gv: (guid: 17407585008595848568, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 1))) +; COMBINED-DIS: ^2 = gv: (guid: 12695095382722328222, summaries: (alias: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), aliasee: ^4))) +; COMBINED-DIS: ^3 = gv: (guid: 15822663052811949562, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 2, calls: ((callee: ^2))))) +; COMBINED-DIS: ^4 = gv: (guid: 17407585008595848568, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1))) diff --git a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll index ba3ce9a75ee83..19e228fd5355c 100644 --- a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll +++ b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll @@ -70,5 +70,5 @@ define i32 @_Z4testP4Base(ptr %0) !prof !15 { ; DIS: ^0 = module: (path: "{{.*}}", hash: (0, 0, 0, 0, 0)) ; DIS: ^1 = gv: (guid: 1960855528937986108) ; DIS: ^2 = gv: (guid: 5459407273543877811) -; DIS: ^3 = gv: (name: "_Z4testP4Base", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), calls: ((callee: ^2, hotness: hot)), refs: (readonly ^1)))) ; guid = 15857150948103218965 +; DIS: ^3 = gv: (name: "_Z4testP4Base", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), calls: ((callee: ^2, hotness: hot)), refs: (readonly ^1)))) ; guid = 15857150948103218965 ; DIS: ^4 = blockcount: 0 diff --git a/llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll b/llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll index d7679b6f5af20..563fb18107d35 100644 --- a/llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll +++ b/llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll @@ -150,16 +150,16 @@ declare void @none3() #1 ; DIS: ^6 = gv: (name: "cold") ; guid = 11668175513417606517 ; DIS: ^7 = gv: (name: "hot4") ; guid = 13161834114071272798 ; DIS: ^8 = gv: (name: "none3") ; guid = 16213681105727317812 -; DIS: ^9 = gv: (name: "hot_function", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 16, calls: ((callee: ^5, hotness: hot), (callee: ^6, hotness: cold), (callee: ^4, hotness: hot), (callee: ^7, hotness: cold), (callee: ^10, hotness: none), (callee: ^3, hotness: hot), (callee: ^2, hotness: none), (callee: ^8, hotness: none), (callee: ^1, hotness: critical))))) ; guid = 17381606045411660303 +; DIS: ^9 = gv: (name: "hot_function", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 16, calls: ((callee: ^5, hotness: hot), (callee: ^6, hotness: cold), (callee: ^4, hotness: hot), (callee: ^7, hotness: cold), (callee: ^10, hotness: none), (callee: ^3, hotness: hot), (callee: ^2, hotness: none), (callee: ^8, hotness: none), (callee: ^1, hotness: critical))))) ; guid = 17381606045411660303 ; DIS: ^10 = gv: (name: "none1") ; guid = 17712061229457633252 ; COMBINED-DIS: ^0 = module: (path: "{{.*}}thinlto-function-summary-callgraph-profile-summary.ll.tmp.o", hash: (0, 0, 0, 0, 0)) ; COMBINED-DIS: ^1 = module: (path: "{{.*}}thinlto-function-summary-callgraph-profile-summary.ll.tmp2.o", hash: (0, 0, 0, 0, 0)) -; COMBINED-DIS: ^2 = gv: (guid: 3741006263754194003, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 1))) -; COMBINED-DIS: ^3 = gv: (guid: 5026609803865204483, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 1))) -; COMBINED-DIS: ^4 = gv: (guid: 8117347573235780485, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 1))) -; COMBINED-DIS: ^5 = gv: (guid: 9453975128311291976, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 1))) -; COMBINED-DIS: ^6 = gv: (guid: 11668175513417606517, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 1))) -; COMBINED-DIS: ^7 = gv: (guid: 16213681105727317812, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 1))) -; COMBINED-DIS: ^8 = gv: (guid: 17381606045411660303, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 16, calls: ((callee: ^5, hotness: hot), (callee: ^6, hotness: cold), (callee: ^4, hotness: hot), (callee: ^9, hotness: none), (callee: ^3, hotness: hot), (callee: ^2, hotness: none), (callee: ^7, hotness: none))))) -; COMBINED-DIS: ^9 = gv: (guid: 17712061229457633252, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 1))) +; COMBINED-DIS: ^2 = gv: (guid: 3741006263754194003, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1))) +; COMBINED-DIS: ^3 = gv: (guid: 5026609803865204483, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1))) +; COMBINED-DIS: ^4 = gv: (guid: 8117347573235780485, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1))) +; COMBINED-DIS: ^5 = gv: (guid: 9453975128311291976, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1))) +; COMBINED-DIS: ^6 = gv: (guid: 11668175513417606517, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1))) +; COMBINED-DIS: ^7 = gv: (guid: 16213681105727317812, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1))) +; COMBINED-DIS: ^8 = gv: (guid: 17381606045411660303, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 16, calls: ((callee: ^5, hotness: hot), (callee: ^6, hotness: cold), (callee: ^4, hotness: hot), (callee: ^9, hotness: none), (callee: ^3, hotness: hot), (callee: ^2, hotness: none), (callee: ^7, hotness: none))))) +; COMBINED-DIS: ^9 = gv: (guid: 17712061229457633252, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 1))) diff --git a/llvm/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll b/llvm/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll index ca4f62907cb0a..3c827247a6c9f 100644 --- a/llvm/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll +++ b/llvm/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll @@ -39,5 +39,5 @@ declare void @func(...) #1 ; DIS: ^0 = module: (path: "{{.*}}", hash: (0, 0, 0, 0, 0)) ; DIS: ^1 = gv: (name: "func") ; guid = 7289175272376759421 -; DIS: ^2 = gv: (name: "main", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 3, calls: ((callee: ^1, relbf: 256)), refs: (readonly ^3)))) ; guid = 15822663052811949562 +; DIS: ^2 = gv: (name: "main", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 3, calls: ((callee: ^1, relbf: 256)), refs: (readonly ^3)))) ; guid = 15822663052811949562 ; DIS: ^3 = gv: (name: "undefinedglob") ; guid = 18036901804029949403 diff --git a/llvm/test/Bitcode/thinlto-function-summary-refgraph.ll b/llvm/test/Bitcode/thinlto-function-summary-refgraph.ll index fc42b5369644c..c76d70b8c4cc0 100644 --- a/llvm/test/Bitcode/thinlto-function-summary-refgraph.ll +++ b/llvm/test/Bitcode/thinlto-function-summary-refgraph.ll @@ -148,18 +148,18 @@ entry: ; order, which depends on GUID, and the private function Y GUID will depend ; on the path to the test. ; DIS: ^0 = module: (path: "{{.*}}", hash: (0, 0, 0, 0, 0)) -; DIS-DAG: = gv: (name: "Z", summaries: (function: (module: ^0, flags: (linkage: linkonce_odr, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 2, calls: ((callee: ^{{.*}}, tail: 1))))) ; guid = 104084381700047393 -; DIS-DAG: = gv: (name: "X", summaries: (function: (module: ^0, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 2, calls: ((callee: ^{{.*}})), refs: (^{{.*}})))) ; guid = 1881667236089500162 -; DIS-DAG: = gv: (name: "W", summaries: (function: (module: ^0, flags: (linkage: weak_odr, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 2, calls: ((callee: ^{{.*}}, tail: 1)), refs: (^{{.*}})))) ; guid = 5790125716599269729 +; DIS-DAG: = gv: (name: "Z", summaries: (function: (module: ^0, flags: (linkage: linkonce_odr, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 2, calls: ((callee: ^{{.*}}, tail: 1))))) ; guid = 104084381700047393 +; DIS-DAG: = gv: (name: "X", summaries: (function: (module: ^0, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 2, calls: ((callee: ^{{.*}})), refs: (^{{.*}})))) ; guid = 1881667236089500162 +; DIS-DAG: = gv: (name: "W", summaries: (function: (module: ^0, flags: (linkage: weak_odr, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 2, calls: ((callee: ^{{.*}}, tail: 1)), refs: (^{{.*}})))) ; guid = 5790125716599269729 ; DIS-DAG: = gv: (name: "foo") ; guid = 6699318081062747564 ; DIS-DAG: = gv: (name: "func") ; guid = 7289175272376759421 ; DIS-DAG: = gv: (name: "func3") ; guid = 11517462787082255043 ; Check that default value of writeonly attribute is zero for constant variables -; DIS-DAG: = gv: (name: "globalvar", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 1, writeonly: 0, constant: 1)))) ; guid = 12887606300320728018 +; DIS-DAG: = gv: (name: "globalvar", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), varFlags: (readonly: 1, writeonly: 0, constant: 1)))) ; guid = 12887606300320728018 ; DIS-DAG: = gv: (name: "func2") ; guid = 14069196320850861797 ; DIS-DAG: = gv: (name: "llvm.ctpop.i8") ; guid = 15254915475081819833 -; DIS-DAG: = gv: (name: "main", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 9, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), calls: ((callee: ^{{.*}})), refs: (^{{.*}})))) ; guid = 15822663052811949562 -; DIS-DAG: = gv: (name: "bar", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 1, writeonly: 1, constant: 0), refs: (^{{.*}})))) ; guid = 16434608426314478903 +; DIS-DAG: = gv: (name: "main", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 9, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), calls: ((callee: ^{{.*}})), refs: (^{{.*}})))) ; guid = 15822663052811949562 +; DIS-DAG: = gv: (name: "bar", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), varFlags: (readonly: 1, writeonly: 1, constant: 0), refs: (^{{.*}})))) ; guid = 16434608426314478903 ; Don't try to match the exact GUID. Since it is private, the file path ; will get hashed, and that will be test dependent. -; DIS-DAG: = gv: (name: "Y", summaries: (function: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 14, calls: ((callee: ^{{.*}}, tail: 1))))) ; guid = +; DIS-DAG: = gv: (name: "Y", summaries: (function: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 14, calls: ((callee: ^{{.*}}, tail: 1))))) ; guid = diff --git a/llvm/test/Bitcode/thinlto-index-disassembled-by-llvm-dis.ll b/llvm/test/Bitcode/thinlto-index-disassembled-by-llvm-dis.ll index 0d6a8e3b4b8d5..3a121c2d5d426 100644 --- a/llvm/test/Bitcode/thinlto-index-disassembled-by-llvm-dis.ll +++ b/llvm/test/Bitcode/thinlto-index-disassembled-by-llvm-dis.ll @@ -18,7 +18,7 @@ ; RUN: llvm-dis --print-thinlto-index-only %t.o -o - | FileCheck %s --check-prefix=DIS ; DIS: ^0 = module: (path: "{{.*}}thinlto-index-disassembled-by-llvm-dis.ll.tmp -; DIS: ^1 = gv: (name: "aplusb", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 2))) ; guid = +; DIS: ^1 = gv: (name: "aplusb", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 2))) ; guid = source_filename = "add.cpp" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Bitcode/thinlto-type-tests.ll b/llvm/test/Bitcode/thinlto-type-tests.ll index 76021a87c52c1..6dc49b849c2fe 100644 --- a/llvm/test/Bitcode/thinlto-type-tests.ll +++ b/llvm/test/Bitcode/thinlto-type-tests.ll @@ -37,11 +37,11 @@ declare i1 @llvm.type.test(i8*, metadata) nounwind readnone ; DIS: ^0 = module: (path: "{{.*}}", hash: (0, 0, 0, 0, 0)) ; DIS: ^1 = gv: (name: "llvm.type.test") ; guid = 608142985856744218 -; DIS: ^2 = gv: (name: "h", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 2, typeIdInfo: (typeTests: (16434608426314478903))))) ; guid = 8124147457056772133 -; DIS: ^3 = gv: (name: "g", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 4, typeIdInfo: (typeTests: (6699318081062747564, 16434608426314478903))))) ; guid = 13146401226427987378 -; DIS: ^4 = gv: (name: "f", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 2, typeIdInfo: (typeTests: (6699318081062747564))))) ; guid = 14740650423002898831 +; DIS: ^2 = gv: (name: "h", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 2, typeIdInfo: (typeTests: (16434608426314478903))))) ; guid = 8124147457056772133 +; DIS: ^3 = gv: (name: "g", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 4, typeIdInfo: (typeTests: (6699318081062747564, 16434608426314478903))))) ; guid = 13146401226427987378 +; DIS: ^4 = gv: (name: "f", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 2, typeIdInfo: (typeTests: (6699318081062747564))))) ; guid = 14740650423002898831 ; COMBINED-DIS: ^0 = module: (path: "{{.*}}thinlto-type-tests.ll.tmp.o", hash: (0, 0, 0, 0, 0)) -; COMBINED-DIS: ^1 = gv: (guid: 8124147457056772133, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 2, typeIdInfo: (typeTests: (16434608426314478903))))) -; COMBINED-DIS: ^2 = gv: (guid: 13146401226427987378, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 4, typeIdInfo: (typeTests: (6699318081062747564, 16434608426314478903))))) -; COMBINED-DIS: ^3 = gv: (guid: 14740650423002898831, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 2, typeIdInfo: (typeTests: (6699318081062747564))))) +; COMBINED-DIS: ^1 = gv: (guid: 8124147457056772133, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 2, typeIdInfo: (typeTests: (16434608426314478903))))) +; COMBINED-DIS: ^2 = gv: (guid: 13146401226427987378, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 4, typeIdInfo: (typeTests: (6699318081062747564, 16434608426314478903))))) +; COMBINED-DIS: ^3 = gv: (guid: 14740650423002898831, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 2, typeIdInfo: (typeTests: (6699318081062747564))))) diff --git a/llvm/test/Bitcode/thinlto-type-vcalls.ll b/llvm/test/Bitcode/thinlto-type-vcalls.ll index ede5b483bfebf..16c93097101df 100644 --- a/llvm/test/Bitcode/thinlto-type-vcalls.ll +++ b/llvm/test/Bitcode/thinlto-type-vcalls.ll @@ -112,19 +112,19 @@ declare {i8*, i1} @llvm.type.checked.load(i8*, i32, metadata) ; DIS: ^0 = module: (path: "{{.*}}", hash: (0, 0, 0, 0, 0)) ; DIS: ^1 = gv: (name: "llvm.type.test") ; guid = 608142985856744218 -; DIS: ^2 = gv: (name: "f1", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 8, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeTestAssumeVCalls: (vFuncId: (guid: 6699318081062747564, offset: 16)))))) ; guid = 2072045998141807037 -; DIS: ^3 = gv: (name: "f3", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 5, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeCheckedLoadVCalls: (vFuncId: (guid: 6699318081062747564, offset: 16)))))) ; guid = 4197650231481825559 +; DIS: ^2 = gv: (name: "f1", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 8, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeTestAssumeVCalls: (vFuncId: (guid: 6699318081062747564, offset: 16)))))) ; guid = 2072045998141807037 +; DIS: ^3 = gv: (name: "f3", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 5, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeCheckedLoadVCalls: (vFuncId: (guid: 6699318081062747564, offset: 16)))))) ; guid = 4197650231481825559 ; DIS: ^4 = gv: (name: "llvm.type.checked.load") ; guid = 5568222536364573403 ; DIS: ^5 = gv: (name: "llvm.assume") ; guid = 6385187066495850096 -; DIS: ^6 = gv: (name: "f2", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 15, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeTestAssumeVCalls: (vFuncId: (guid: 6699318081062747564, offset: 24), vFuncId: (guid: 16434608426314478903, offset: 32)))))) ; guid = 8471399308421654326 -; DIS: ^7 = gv: (name: "f4", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 15, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeTestAssumeConstVCalls: ((vFuncId: (guid: 6699318081062747564, offset: 16), args: (42)), (vFuncId: (guid: 6699318081062747564, offset: 24), args: (43))))))) ; guid = 10064745020953272174 -; DIS: ^8 = gv: (name: "f5", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 5, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeCheckedLoadConstVCalls: ((vFuncId: (guid: 6699318081062747564, offset: 16), args: (42))))))) ; guid = 11686717102184386164 -; DIS: ^9 = gv: (name: "f6", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 2, typeIdInfo: (typeTests: (7546896869197086323))))) ; guid = 11834966808443348068 +; DIS: ^6 = gv: (name: "f2", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 15, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeTestAssumeVCalls: (vFuncId: (guid: 6699318081062747564, offset: 24), vFuncId: (guid: 16434608426314478903, offset: 32)))))) ; guid = 8471399308421654326 +; DIS: ^7 = gv: (name: "f4", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 15, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeTestAssumeConstVCalls: ((vFuncId: (guid: 6699318081062747564, offset: 16), args: (42)), (vFuncId: (guid: 6699318081062747564, offset: 24), args: (43))))))) ; guid = 10064745020953272174 +; DIS: ^8 = gv: (name: "f5", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 5, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeCheckedLoadConstVCalls: ((vFuncId: (guid: 6699318081062747564, offset: 16), args: (42))))))) ; guid = 11686717102184386164 +; DIS: ^9 = gv: (name: "f6", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 2, typeIdInfo: (typeTests: (7546896869197086323))))) ; guid = 11834966808443348068 ; COMBINED-DIS: ^0 = module: (path: "{{.*}}thinlto-type-vcalls.ll.tmp.o", hash: (0, 0, 0, 0, 0)) -; COMBINED-DIS: ^1 = gv: (guid: 2072045998141807037, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 8, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeTestAssumeVCalls: (vFuncId: (guid: 6699318081062747564, offset: 16)))))) -; COMBINED-DIS: ^2 = gv: (guid: 4197650231481825559, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 5, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeCheckedLoadVCalls: (vFuncId: (guid: 6699318081062747564, offset: 16)))))) -; COMBINED-DIS: ^3 = gv: (guid: 8471399308421654326, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 15, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeTestAssumeVCalls: (vFuncId: (guid: 6699318081062747564, offset: 24), vFuncId: (guid: 16434608426314478903, offset: 32)))))) -; COMBINED-DIS: ^4 = gv: (guid: 10064745020953272174, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 15, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeTestAssumeConstVCalls: ((vFuncId: (guid: 6699318081062747564, offset: 16), args: (42)), (vFuncId: (guid: 6699318081062747564, offset: 24), args: (43))))))) -; COMBINED-DIS: ^5 = gv: (guid: 11686717102184386164, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 5, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeCheckedLoadConstVCalls: ((vFuncId: (guid: 6699318081062747564, offset: 16), args: (42))))))) -; COMBINED-DIS: ^6 = gv: (guid: 11834966808443348068, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 2, typeIdInfo: (typeTests: (7546896869197086323))))) +; COMBINED-DIS: ^1 = gv: (guid: 2072045998141807037, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 8, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeTestAssumeVCalls: (vFuncId: (guid: 6699318081062747564, offset: 16)))))) +; COMBINED-DIS: ^2 = gv: (guid: 4197650231481825559, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 5, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeCheckedLoadVCalls: (vFuncId: (guid: 6699318081062747564, offset: 16)))))) +; COMBINED-DIS: ^3 = gv: (guid: 8471399308421654326, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 15, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeTestAssumeVCalls: (vFuncId: (guid: 6699318081062747564, offset: 24), vFuncId: (guid: 16434608426314478903, offset: 32)))))) +; COMBINED-DIS: ^4 = gv: (guid: 10064745020953272174, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 15, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeTestAssumeConstVCalls: ((vFuncId: (guid: 6699318081062747564, offset: 16), args: (42)), (vFuncId: (guid: 6699318081062747564, offset: 24), args: (43))))))) +; COMBINED-DIS: ^5 = gv: (guid: 11686717102184386164, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 5, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), typeIdInfo: (typeCheckedLoadConstVCalls: ((vFuncId: (guid: 6699318081062747564, offset: 16), args: (42))))))) +; COMBINED-DIS: ^6 = gv: (guid: 11834966808443348068, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 2, typeIdInfo: (typeTests: (7546896869197086323))))) diff --git a/llvm/test/ThinLTO/X86/dot-dumper.ll b/llvm/test/ThinLTO/X86/dot-dumper.ll index 53122160b1b25..149039203a68c 100644 --- a/llvm/test/ThinLTO/X86/dot-dumper.ll +++ b/llvm/test/ThinLTO/X86/dot-dumper.ll @@ -20,8 +20,8 @@ ; PERMODULE-NEXT: color = lightgrey; ; PERMODULE-NEXT: label = ""; ; PERMODULE-NEXT: node [style=filled,fillcolor=lightblue]; -; PERMODULE-NEXT: M0_[[MAIN_ALIAS:[0-9]+]] [style="dotted,filled",shape="box",label="main_alias",fillcolor="red"]; // alias, dead -; PERMODULE-NEXT: M0_[[MAIN:[0-9]+]] [shape="record",label="main|extern (inst: 4, ffl: 0000000000)}",fillcolor="red"]; // function, dead +; PERMODULE-NEXT: M0_[[MAIN_ALIAS:[0-9]+]] [style="dotted,filled",shape="box",label="main_alias",fillcolor="red"]; // alias, definition, dead +; PERMODULE-NEXT: M0_[[MAIN:[0-9]+]] [shape="record",label="main|extern (inst: 4, ffl: 0000000000)}",fillcolor="red"]; // function, definition, dead ; PERMODULE-NEXT: // Edges: ; PERMODULE-NEXT: M0_[[MAIN_ALIAS]] -> M0_[[MAIN]] [style=dotted]; // alias ; PERMODULE-NEXT: } @@ -39,8 +39,8 @@ ; COMBINED-NEXT: color = lightgrey; ; COMBINED-NEXT: label = "dot-dumper{{.*}}1.bc"; ; COMBINED-NEXT: node [style=filled,fillcolor=lightblue]; -; COMBINED-NEXT: M0_[[MAIN_ALIAS:[0-9]+]] [style="dotted,filled",shape="box",label="main_alias",fillcolor="red"]; // alias, dead -; COMBINED-NEXT: M0_[[MAIN:[0-9]+]] [shape="record",label="main|extern (inst: 4, ffl: 0000000000)}"]; // function, preserved +; COMBINED-NEXT: M0_[[MAIN_ALIAS:[0-9]+]] [style="dotted,filled",shape="box",label="main_alias",fillcolor="red"]; // alias, definition, dead +; COMBINED-NEXT: M0_[[MAIN:[0-9]+]] [shape="record",label="main|extern (inst: 4, ffl: 0000000000)}"]; // function, definition, preserved ; COMBINED-NEXT: // Edges: ; COMBINED-NEXT: M0_[[MAIN_ALIAS]] -> M0_[[MAIN]] [style=dotted]; // alias ; COMBINED-NEXT: } @@ -50,10 +50,10 @@ ; COMBINED-NEXT: color = lightgrey; ; COMBINED-NEXT: label = "dot-dumper{{.*}}2.bc"; ; COMBINED-NEXT: node [style=filled,fillcolor=lightblue]; -; COMBINED-NEXT: M1_[[FOO:[0-9]+]] [shape="record",label="foo|extern (inst: 4, ffl: 0000100000)}"]; // function +; COMBINED-NEXT: M1_[[FOO:[0-9]+]] [shape="record",label="foo|extern (inst: 4, ffl: 0000100000)}"]; // function, definition ; COMBINED-NEXT: M1_[[A:[0-9]+]] [shape="Mrecord",label="A|extern}"]; // variable, immutable ; COMBINED-NEXT: M1_[[B:[0-9]+]] [shape="Mrecord",label="B|extern}"]; // variable, immutable, constant -; COMBINED-NEXT: M1_{{[0-9]+}} [shape="record",label="bar|extern (inst: 1, ffl: 0000000000)}",fillcolor="red"]; // function, dead +; COMBINED-NEXT: M1_{{[0-9]+}} [shape="record",label="bar|extern (inst: 1, ffl: 0000000000)}",fillcolor="red"]; // function, definition, dead ; COMBINED-NEXT: // Edges: ; COMBINED-NEXT: M1_[[FOO]] -> M1_[[B]] [style=dashed,color=forestgreen]; // const-ref ; COMBINED-NEXT: M1_[[FOO]] -> M1_[[A]] [style=dashed,color=forestgreen]; // const-ref diff --git a/llvm/test/ThinLTO/X86/funcattrs-prop-maythrow.ll b/llvm/test/ThinLTO/X86/funcattrs-prop-maythrow.ll index 6489e952251d0..abfe820075bb9 100644 --- a/llvm/test/ThinLTO/X86/funcattrs-prop-maythrow.ll +++ b/llvm/test/ThinLTO/X86/funcattrs-prop-maythrow.ll @@ -48,9 +48,9 @@ define void @caller_nounwind() { ; CHECK-DAG: attributes [[ATTR_NOUNWIND]] = { norecurse nounwind } ; CHECK-DAG: attributes [[ATTR_MAYTHROW]] = { norecurse } -; SUMMARY-DAG: = gv: (name: "cleanupret", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 1, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^{{.*}})), refs: (^{{.*}})))) -; SUMMARY-DAG: = gv: (name: "resume", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 1, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^{{.*}})), refs: (^{{.*}})))) -; SUMMARY-DAG: = gv: (name: "catchret", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 5, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 1, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^{{.*}})), refs: (^{{.*}})))) +; SUMMARY-DAG: = gv: (name: "cleanupret", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 1, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^{{.*}})), refs: (^{{.*}})))) +; SUMMARY-DAG: = gv: (name: "resume", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 1, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^{{.*}})), refs: (^{{.*}})))) +; SUMMARY-DAG: = gv: (name: "catchret", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 5, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 1, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^{{.*}})), refs: (^{{.*}})))) ;--- callees.ll target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -112,4 +112,4 @@ exit: ret void } -attributes #0 = { nounwind } \ No newline at end of file +attributes #0 = { nounwind } diff --git a/llvm/test/ThinLTO/X86/funcimport_alwaysinline.ll b/llvm/test/ThinLTO/X86/funcimport_alwaysinline.ll index 90c708fd2d115..67acc2a2892db 100644 --- a/llvm/test/ThinLTO/X86/funcimport_alwaysinline.ll +++ b/llvm/test/ThinLTO/X86/funcimport_alwaysinline.ll @@ -23,4 +23,4 @@ entry: } attributes #0 = { alwaysinline nounwind uwtable } -; CHECK2: ^2 = gv: (guid: {{.*}}, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0), insts: 1, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 1, noUnwind: 1, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0)))) +; CHECK2: ^2 = gv: (guid: {{.*}}, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 1, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 1, noUnwind: 1, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0)))) diff --git a/llvm/test/ThinLTO/X86/load-store-caching.ll b/llvm/test/ThinLTO/X86/load-store-caching.ll index 4fb2d4693042e..b25308bf17616 100644 --- a/llvm/test/ThinLTO/X86/load-store-caching.ll +++ b/llvm/test/ThinLTO/X86/load-store-caching.ll @@ -22,5 +22,5 @@ entry: } ; CHECK: ^0 = module: -; CHECK-NEXT: ^1 = gv: (name: "obj", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 1, writeonly: 1, constant: 0)))) ; guid = -; CHECK-NEXT: ^2 = gv: (name: "foo", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 3, refs: (^1)))) ; guid = +; CHECK-NEXT: ^1 = gv: (name: "obj", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), varFlags: (readonly: 1, writeonly: 1, constant: 0)))) ; guid = +; CHECK-NEXT: ^2 = gv: (name: "foo", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 3, refs: (^1)))) ; guid = diff --git a/llvm/test/Transforms/LowerTypeTests/import-unsat.ll b/llvm/test/Transforms/LowerTypeTests/import-unsat.ll index 76afe68d21891..f766c2d324163 100644 --- a/llvm/test/Transforms/LowerTypeTests/import-unsat.ll +++ b/llvm/test/Transforms/LowerTypeTests/import-unsat.ll @@ -10,6 +10,7 @@ ; SUMMARY-NEXT: Live: true ; SUMMARY-NEXT: Local: false ; SUMMARY-NEXT: CanAutoHide: false +; SUMMARY-NEXT: ImportType: 0 ; SUMMARY-NEXT: TypeTests: [ 123 ] ; SUMMARY-NEXT: TypeIdMap: ; SUMMARY-NEXT: typeid1: diff --git a/llvm/test/Transforms/WholeProgramDevirt/Inputs/import-indir.yaml b/llvm/test/Transforms/WholeProgramDevirt/Inputs/import-indir.yaml index 30159c5012b08..22533ed636a50 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/Inputs/import-indir.yaml +++ b/llvm/test/Transforms/WholeProgramDevirt/Inputs/import-indir.yaml @@ -2,6 +2,7 @@ GlobalValueMap: 42: - Live: true + ImportType: 0 TypeTestAssumeVCalls: - GUID: 123 Offset: 0 @@ -22,6 +23,9 @@ GlobalValueMap: GUID: 456 Offset: 8 Args: [24, 12] + 43: + - Live: true + ImportType : 1 TypeIdMap: typeid1: WPDRes: diff --git a/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll b/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll index 1d74a59769787..e4d6f1d52b540 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll @@ -10,6 +10,7 @@ ; SUMMARY-NEXT: Live: true ; SUMMARY-NEXT: Local: false ; SUMMARY-NEXT: CanAutoHide: false +; SUMMARY-NEXT: ImportType: 0 ; SUMMARY-NEXT: TypeTestAssumeVCalls: ; SUMMARY-NEXT: - GUID: 123 ; SUMMARY-NEXT: Offset: 0 @@ -30,6 +31,14 @@ ; SUMMARY-NEXT: GUID: 456 ; SUMMARY-NEXT: Offset: 8 ; SUMMARY-NEXT: Args: [ 24, 12 ] +; SUMMARY-NEXT: 43: +; SUMMARY-NEXT: - Linkage: 0 +; SUMMARY-NEXT: Visibility: 0 +; SUMMARY-NEXT: NotEligibleToImport: false +; SUMMARY-NEXT: Live: true +; SUMMARY-NEXT: Local: false +; SUMMARY-NEXT: CanAutoHide: false +; SUMMARY-NEXT: ImportType: 1 ; SUMMARY-NEXT: TypeIdMap: ; SUMMARY-NEXT: typeid1: ; SUMMARY-NEXT: TTRes: From 75edf0c18c777d69df7cfc6462e5233649bd47d4 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Thu, 11 Apr 2024 11:05:55 +0800 Subject: [PATCH 092/886] [NFC] [Serialization] Avoid accessing PendingBodies as much as possible The 'HaveBody' parameter in isConsumerInterestedIn is only used for the function decl if it doesn't have a body already. It should be relatively less frequent than the call to isConsumerInterestedIn. So we can delay the computing of `HaveBdoy` to make it more efficient. --- clang/include/clang/Serialization/ASTReader.h | 1 + clang/lib/Serialization/ASTReaderDecl.cpp | 14 ++++++-------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 5fd55a519c6b0..e8b9f28690d9f 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1492,6 +1492,7 @@ class ASTReader getModuleFileLevelDecls(ModuleFile &Mod); private: + bool isConsumerInterestedIn(Decl *D); void PassInterestingDeclsToConsumer(); void PassInterestingDeclToConsumer(Decl *D); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 78448855fba09..9e49a3780ff41 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -3198,7 +3198,7 @@ inline void ASTReader::LoadedDecl(unsigned Index, Decl *D) { /// This routine should return true for anything that might affect /// code generation, e.g., inline function definitions, Objective-C /// declarations with metadata, etc. -static bool isConsumerInterestedIn(ASTContext &Ctx, Decl *D, bool HasBody) { +bool ASTReader::isConsumerInterestedIn(Decl *D) { // An ObjCMethodDecl is never considered as "interesting" because its // implementation container always is. @@ -3207,7 +3207,7 @@ static bool isConsumerInterestedIn(ASTContext &Ctx, Decl *D, bool HasBody) { if (isPartOfPerModuleInitializer(D)) { auto *M = D->getImportedOwningModule(); if (M && M->Kind == Module::ModuleMapModule && - Ctx.DeclMustBeEmitted(D)) + getContext().DeclMustBeEmitted(D)) return false; } @@ -3222,7 +3222,7 @@ static bool isConsumerInterestedIn(ASTContext &Ctx, Decl *D, bool HasBody) { (Var->isThisDeclarationADefinition() == VarDecl::Definition || OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Var)); if (const auto *Func = dyn_cast(D)) - return Func->doesThisDeclarationHaveABody() || HasBody; + return Func->doesThisDeclarationHaveABody() || PendingBodies.count(D); if (auto *ES = D->getASTContext().getExternalSource()) if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never) @@ -4173,7 +4173,7 @@ void ASTReader::PassInterestingDeclsToConsumer() { while (!PotentiallyInterestingDecls.empty()) { Decl *D = PotentiallyInterestingDecls.front(); PotentiallyInterestingDecls.pop_front(); - if (isConsumerInterestedIn(getContext(), D, PendingBodies.count(D))) + if (isConsumerInterestedIn(D)) PassInterestingDeclToConsumer(D); } } @@ -4197,8 +4197,7 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) { // the declaration, then we know it was interesting and we skip the call // to isConsumerInterestedIn because it is unsafe to call in the // current ASTReader state. - bool WasInteresting = - Record.JustLoaded || isConsumerInterestedIn(getContext(), D, false); + bool WasInteresting = Record.JustLoaded || isConsumerInterestedIn(D); for (auto &FileAndOffset : UpdateOffsets) { ModuleFile *F = FileAndOffset.first; uint64_t Offset = FileAndOffset.second; @@ -4230,8 +4229,7 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) { // We might have made this declaration interesting. If so, remember that // we need to hand it off to the consumer. - if (!WasInteresting && - isConsumerInterestedIn(getContext(), D, PendingBodies.count(D))) { + if (!WasInteresting && isConsumerInterestedIn(D)) { PotentiallyInterestingDecls.push_back(D); WasInteresting = true; } From 026165fad70420d85defb5fc9109c138250058ee Mon Sep 17 00:00:00 2001 From: paperchalice Date: Thu, 11 Apr 2024 11:25:33 +0800 Subject: [PATCH 093/886] [Instrumentation] Support MachineFunction in ChangeReporter (#80946) --- .../llvm/Passes/StandardInstrumentations.h | 13 ++- llvm/lib/Passes/StandardInstrumentations.cpp | 82 ++++++++++++++++--- .../DotCfg/print-changed-dot-cfg.mir | 24 ++++++ llvm/test/Other/change-printer.mir | 21 +++++ 4 files changed, 128 insertions(+), 12 deletions(-) create mode 100644 llvm/test/Other/ChangePrinters/DotCfg/print-changed-dot-cfg.mir create mode 100644 llvm/test/Other/change-printer.mir diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 8c6a44876d545..b053e6307a653 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -18,6 +18,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/OptBisect.h" #include "llvm/IR/PassTimingInfo.h" @@ -33,6 +35,7 @@ namespace llvm { class Module; class Function; +class MachineFunction; class PassInstrumentationCallbacks; /// Instrumentation to print IR before/after passes. @@ -313,6 +316,11 @@ template class BlockDataT { B.print(SS, nullptr, true, true); } + BlockDataT(const MachineBasicBlock &B) : Label(B.getName().str()), Data(B) { + raw_string_ostream SS(Body); + B.print(SS); + } + bool operator==(const BlockDataT &That) const { return Body == That.Body; } bool operator!=(const BlockDataT &That) const { return Body != That.Body; } @@ -364,6 +372,7 @@ template class OrderedChangedData { class EmptyData { public: EmptyData(const BasicBlock &) {} + EmptyData(const MachineBasicBlock &) {} }; // The data saved for comparing functions. @@ -405,7 +414,8 @@ template class IRComparer { protected: // Generate the data for \p F into \p Data. - static bool generateFunctionData(IRDataT &Data, const Function &F); + template + static bool generateFunctionData(IRDataT &Data, const FunctionT &F); const IRDataT &Before; const IRDataT &After; @@ -475,6 +485,7 @@ class DCData { public: // Fill the map with the transitions from basic block \p B. DCData(const BasicBlock &B); + DCData(const MachineBasicBlock &B); // Return an iterator to the names of the successor blocks. StringMap::const_iterator begin() const { diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 697988b3fc7c0..c5f0c14885d0c 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -19,7 +19,9 @@ #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" @@ -180,6 +182,12 @@ const Module *unwrapModule(Any IR, bool Force = false) { return F->getParent(); } + if (const auto *MF = unwrapIR(IR)) { + if (!Force && !isFunctionInPrintList(MF->getName())) + return nullptr; + return MF->getFunction().getParent(); + } + llvm_unreachable("Unknown IR unit"); } @@ -215,6 +223,12 @@ void printIR(raw_ostream &OS, const Loop *L) { printLoop(const_cast(*L), OS); } +void printIR(raw_ostream &OS, const MachineFunction *MF) { + if (!isFunctionInPrintList(MF->getName())) + return; + MF->print(OS); +} + std::string getIRName(Any IR) { if (unwrapIR(IR)) return "[module]"; @@ -262,6 +276,9 @@ bool shouldPrintIR(Any IR) { if (const auto *L = unwrapIR(IR)) return isFunctionInPrintList(L->getHeader()->getParent()->getName()); + + if (const auto *MF = unwrapIR(IR)) + return isFunctionInPrintList(MF->getName()); llvm_unreachable("Unknown wrapped IR type"); } @@ -275,6 +292,14 @@ void unwrapAndPrint(raw_ostream &OS, Any IR) { auto *M = unwrapModule(IR); assert(M && "should have unwrapped module"); printIR(OS, M); + + if (const auto *MF = unwrapIR(IR)) { + auto &MMI = MF->getMMI(); + for (const auto &F : *M) { + if (auto *MF = MMI.getMachineFunction(F)) + MF->print(OS); + } + } return; } @@ -297,6 +322,11 @@ void unwrapAndPrint(raw_ostream &OS, Any IR) { printIR(OS, L); return; } + + if (const auto *MF = unwrapIR(IR)) { + printIR(OS, MF); + return; + } llvm_unreachable("Unknown wrapped IR type"); } @@ -305,7 +335,8 @@ bool isIgnored(StringRef PassID) { return isSpecialPass(PassID, {"PassManager", "PassAdaptor", "AnalysisManagerProxy", "DevirtSCCRepeatedPass", "ModuleInlinerWrapperPass", - "VerifierPass", "PrintModulePass"}); + "VerifierPass", "PrintModulePass", "PrintMIRPass", + "PrintMIRPreparePass"}); } std::string makeHTMLReady(StringRef SR) { @@ -664,20 +695,38 @@ template void IRComparer::analyzeIR(Any IR, IRDataT &Data) { return; } - const auto *F = unwrapIR(IR); - if (!F) { - const auto *L = unwrapIR(IR); - assert(L && "Unknown IR unit."); - F = L->getHeader()->getParent(); + if (const auto *F = unwrapIR(IR)) { + generateFunctionData(Data, *F); + return; + } + + if (const auto *L = unwrapIR(IR)) { + auto *F = L->getHeader()->getParent(); + generateFunctionData(Data, *F); + return; } - assert(F && "Unknown IR unit."); - generateFunctionData(Data, *F); + + if (const auto *MF = unwrapIR(IR)) { + generateFunctionData(Data, *MF); + return; + } + + llvm_unreachable("Unknown IR unit"); +} + +static bool shouldGenerateData(const Function &F) { + return !F.isDeclaration() && isFunctionInPrintList(F.getName()); +} + +static bool shouldGenerateData(const MachineFunction &MF) { + return isFunctionInPrintList(MF.getName()); } template -bool IRComparer::generateFunctionData(IRDataT &Data, const Function &F) { - if (!F.isDeclaration() && isFunctionInPrintList(F.getName())) { - FuncDataT FD(F.getEntryBlock().getName().str()); +template +bool IRComparer::generateFunctionData(IRDataT &Data, const FunctionT &F) { + if (shouldGenerateData(F)) { + FuncDataT FD(F.front().getName().str()); int I = 0; for (const auto &B : F) { std::string BBName = B.getName().str(); @@ -722,6 +771,12 @@ static SmallString<32> getIRFileDisplayName(Any IR) { ResultStream << "-loop-"; stable_hash LoopNameHash = stable_hash_combine_string(L->getName()); write_hex(ResultStream, LoopNameHash, HexPrintStyle::Lower, MaxHashWidth); + } else if (const auto *MF = unwrapIR(IR)) { + ResultStream << "-machine-function-"; + stable_hash MachineFunctionNameHash = + stable_hash_combine_string(MF->getName()); + write_hex(ResultStream, MachineFunctionNameHash, HexPrintStyle::Lower, + MaxHashWidth); } else { llvm_unreachable("Unknown wrapped IR type"); } @@ -2122,6 +2177,11 @@ DCData::DCData(const BasicBlock &B) { addSuccessorLabel(Succ->getName().str(), ""); } +DCData::DCData(const MachineBasicBlock &B) { + for (const MachineBasicBlock *Succ : successors(&B)) + addSuccessorLabel(Succ->getName().str(), ""); +} + DotCfgChangeReporter::DotCfgChangeReporter(bool Verbose) : ChangeReporter>(Verbose) {} diff --git a/llvm/test/Other/ChangePrinters/DotCfg/print-changed-dot-cfg.mir b/llvm/test/Other/ChangePrinters/DotCfg/print-changed-dot-cfg.mir new file mode 100644 index 0000000000000..340ece93aa02b --- /dev/null +++ b/llvm/test/Other/ChangePrinters/DotCfg/print-changed-dot-cfg.mir @@ -0,0 +1,24 @@ +# REQUIRES: x86-registered-target +# Simple functionality check. +# RUN: rm -rf %t && mkdir -p %t +# RUN: llc -filetype=null -print-changed=dot-cfg -passes=no-op-machine-function -dot-cfg-dir=%t %s +# RUN: ls %t/*.pdf %t/passes.html | count 3 + +--- +name: g +body: | + bb.0.entry: + %0:gr32 = MOV32ri 5 + $eax = COPY %0 + RET 0, $eax + +... +--- +name: f +body: | + bb.0.entry: + %0:gr32 = MOV32ri 7 + $eax = COPY %0 + RET 0, $eax + +... diff --git a/llvm/test/Other/change-printer.mir b/llvm/test/Other/change-printer.mir new file mode 100644 index 0000000000000..5e57da50e625d --- /dev/null +++ b/llvm/test/Other/change-printer.mir @@ -0,0 +1,21 @@ +# REQUIRES: x86-registered-target +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -filetype=null %s \ +# RUN: -p no-op-machine-function -print-changed 2>&1 | FileCheck %s --check-prefix=CHECK-NO-OP + +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -filetype=null %s \ +# RUN: -p dead-mi-elimination -print-changed 2>&1 | FileCheck %s --check-prefix=CHECK-SIMPLE + +--- +name: test +body: | + bb.0: + %1:gr64 = MOV64ri 0 + %2:gr64 = MOV64ri 0 + $eax = COPY %1 + RET64 implicit $eax +... + +# CHECK-NO-OP: *** IR Dump After NoOpMachineFunctionPass on test omitted because no change *** + +# CHECK-SIMPLE: *** IR Dump After DeadMachineInstructionElimPass on test *** +# CHECK-SIMPLE-NOT: %2:gr64 = MOV64ri 0 From 53003e36e9f4574d06c22611f61f68de32c89c6b Mon Sep 17 00:00:00 2001 From: Sacha Coppey Date: Thu, 11 Apr 2024 06:19:56 +0200 Subject: [PATCH 094/886] [RISCV] Implement Statepoint and Patchpoint lowering to call instructions (#77337) This patch adds stackmap support for RISC-V with call targets. Based on patch from https://reviews.llvm.org/D129848. --- llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp | 57 ++++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 11 + llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 7 +- llvm/test/CodeGen/RISCV/rv64-patchpoint.ll | 46 ++- .../RISCV/rv64-statepoint-call-lowering-x1.ll | 16 ++ .../RISCV/rv64-statepoint-call-lowering-x2.ll | 23 ++ .../RISCV/rv64-statepoint-call-lowering.ll | 262 ++++++++++++++++++ 7 files changed, 419 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rv64-statepoint-call-lowering-x1.ll create mode 100644 llvm/test/CodeGen/RISCV/rv64-statepoint-call-lowering-x2.ll create mode 100644 llvm/test/CodeGen/RISCV/rv64-statepoint-call-lowering.ll diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 9982a73ee914d..779f179dff619 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/RISCVBaseInfo.h" #include "MCTargetDesc/RISCVInstPrinter.h" #include "MCTargetDesc/RISCVMCExpr.h" +#include "MCTargetDesc/RISCVMatInt.h" #include "MCTargetDesc/RISCVTargetStreamer.h" #include "RISCV.h" #include "RISCVMachineFunctionInfo.h" @@ -153,8 +154,35 @@ void RISCVAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, PatchPointOpers Opers(&MI); + const MachineOperand &CalleeMO = Opers.getCallTarget(); unsigned EncodedBytes = 0; + if (CalleeMO.isImm()) { + uint64_t CallTarget = CalleeMO.getImm(); + if (CallTarget) { + assert((CallTarget & 0xFFFF'FFFF'FFFF) == CallTarget && + "High 16 bits of call target should be zero."); + // Materialize the jump address: + SmallVector Seq; + RISCVMatInt::generateMCInstSeq(CallTarget, *STI, RISCV::X1, Seq); + for (MCInst &Inst : Seq) { + bool Compressed = EmitToStreamer(OutStreamer, Inst); + EncodedBytes += Compressed ? 2 : 4; + } + bool Compressed = EmitToStreamer(OutStreamer, MCInstBuilder(RISCV::JALR) + .addReg(RISCV::X1) + .addReg(RISCV::X1) + .addImm(0)); + EncodedBytes += Compressed ? 2 : 4; + } + } else if (CalleeMO.isGlobal()) { + MCOperand CallTargetMCOp; + lowerOperand(CalleeMO, CallTargetMCOp); + EmitToStreamer(OutStreamer, + MCInstBuilder(RISCV::PseudoCALL).addOperand(CallTargetMCOp)); + EncodedBytes += 8; + } + // Emit padding. unsigned NumBytes = Opers.getNumPatchBytes(); assert(NumBytes >= EncodedBytes && @@ -173,6 +201,35 @@ void RISCVAsmPrinter::LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM, assert(PatchBytes % NOPBytes == 0 && "Invalid number of NOP bytes requested!"); emitNops(PatchBytes / NOPBytes); + } else { + // Lower call target and choose correct opcode + const MachineOperand &CallTarget = SOpers.getCallTarget(); + MCOperand CallTargetMCOp; + switch (CallTarget.getType()) { + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + lowerOperand(CallTarget, CallTargetMCOp); + EmitToStreamer( + OutStreamer, + MCInstBuilder(RISCV::PseudoCALL).addOperand(CallTargetMCOp)); + break; + case MachineOperand::MO_Immediate: + CallTargetMCOp = MCOperand::createImm(CallTarget.getImm()); + EmitToStreamer(OutStreamer, MCInstBuilder(RISCV::JAL) + .addReg(RISCV::X1) + .addOperand(CallTargetMCOp)); + break; + case MachineOperand::MO_Register: + CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); + EmitToStreamer(OutStreamer, MCInstBuilder(RISCV::JALR) + .addReg(RISCV::X1) + .addOperand(CallTargetMCOp) + .addImm(0)); + break; + default: + llvm_unreachable("Unsupported operand type in statepoint call target"); + break; + } } auto &Ctx = OutStreamer.getContext(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 357432081ddb0..3e7bc8c2367de 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -17910,6 +17910,17 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case RISCV::PseudoFROUND_D_IN32X: return emitFROUND(MI, BB, Subtarget); case TargetOpcode::STATEPOINT: + // STATEPOINT is a pseudo instruction which has no implicit defs/uses + // while jal call instruction (where statepoint will be lowered at the end) + // has implicit def. This def is early-clobber as it will be set at + // the moment of the call and earlier than any use is read. + // Add this implicit dead def here as a workaround. + MI.addOperand(*MI.getMF(), + MachineOperand::CreateReg( + RISCV::X1, /*isDef*/ true, + /*isImp*/ true, /*isKill*/ false, /*isDead*/ true, + /*isUndef*/ false, /*isEarlyClobber*/ true)); + [[fallthrough]]; case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: if (!Subtarget.is64Bit()) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 6b75efe684d91..84d754e3cbcf3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1469,9 +1469,12 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { case TargetOpcode::PATCHPOINT: // The size of the patchpoint intrinsic is the number of bytes requested return PatchPointOpers(&MI).getNumPatchBytes(); - case TargetOpcode::STATEPOINT: + case TargetOpcode::STATEPOINT: { // The size of the statepoint intrinsic is the number of bytes requested - return StatepointOpers(&MI).getNumPatchBytes(); + unsigned NumBytes = StatepointOpers(&MI).getNumPatchBytes(); + // No patch bytes means at most a PseudoCall is emitted + return std::max(NumBytes, 8U); + } default: return get(Opcode).getSize(); } diff --git a/llvm/test/CodeGen/RISCV/rv64-patchpoint.ll b/llvm/test/CodeGen/RISCV/rv64-patchpoint.ll index 51c2ae908e842..adf5f9863b79f 100644 --- a/llvm/test/CodeGen/RISCV/rv64-patchpoint.ll +++ b/llvm/test/CodeGen/RISCV/rv64-patchpoint.ll @@ -1,12 +1,56 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -debug-entry-values -enable-misched=0 < %s | FileCheck %s +; Trivial patchpoint codegen +; +define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { +; CHECK-LABEL: trivial_patchpoint_codegen: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset s0, -8 +; CHECK-NEXT: .cfi_offset s1, -16 +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: lui ra, 3563 +; CHECK-NEXT: addiw ra, ra, -577 +; CHECK-NEXT: slli ra, ra, 12 +; CHECK-NEXT: addi ra, ra, -259 +; CHECK-NEXT: slli ra, ra, 12 +; CHECK-NEXT: addi ra, ra, -1282 +; CHECK-NEXT: jalr ra +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a1, s1 +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: lui ra, 3563 +; CHECK-NEXT: addiw ra, ra, -577 +; CHECK-NEXT: slli ra, ra, 12 +; CHECK-NEXT: addi ra, ra, -259 +; CHECK-NEXT: slli ra, ra, 12 +; CHECK-NEXT: addi ra, ra, -1281 +; CHECK-NEXT: jalr ra +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: ld s0, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +entry: + %resolveCall2 = inttoptr i64 244837814094590 to i8* + %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 28, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4) + %resolveCall3 = inttoptr i64 244837814094591 to i8* + tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 3, i32 28, i8* %resolveCall3, i32 2, i64 %p1, i64 %result) + ret i64 %result +} + ; Test small patchpoints that don't emit calls. define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { ; CHECK-LABEL: small_patchpoint_codegen: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: .Ltmp2: ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop diff --git a/llvm/test/CodeGen/RISCV/rv64-statepoint-call-lowering-x1.ll b/llvm/test/CodeGen/RISCV/rv64-statepoint-call-lowering-x1.ll new file mode 100644 index 0000000000000..3ba49653cd01e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64-statepoint-call-lowering-x1.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple riscv64 -verify-machineinstrs -stop-after=prologepilog < %s | FileCheck %s + +; Check that STATEPOINT instruction has an early clobber implicit def for LR. +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "riscv64" + +define void @test() "frame-pointer"="all" gc "statepoint-example" { +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" ()] +; CHECK: STATEPOINT 0, 0, 0, target-flags(riscv-call) @return_i1, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, csr_ilp32_lp64, implicit-def $x2, implicit-def dead early-clobber $x1 + ret void +} + + +declare void @return_i1() +declare token @llvm.experimental.gc.statepoint.p0(i64, i32, ptr, i32, i32, ...) diff --git a/llvm/test/CodeGen/RISCV/rv64-statepoint-call-lowering-x2.ll b/llvm/test/CodeGen/RISCV/rv64-statepoint-call-lowering-x2.ll new file mode 100644 index 0000000000000..9c99f64bcacc0 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64-statepoint-call-lowering-x2.ll @@ -0,0 +1,23 @@ +; RUN: llc -mtriple riscv64 -verify-machineinstrs -stop-after=prologepilog < %s | FileCheck %s + +; Check that STATEPOINT instruction prefer to use x2 in presense of x8. +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "riscv64" + +declare void @consume(ptr addrspace(1) %obj) + +define i1 @test(ptr addrspace(1) %a) "frame-pointer"="all" gc "statepoint-example" { +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i1 ()) @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (ptr addrspace(1) %a)] +; CHECK: STATEPOINT 0, 0, 0, target-flags(riscv-call) @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, 1, 8, $x8, -32, 2, 0, 2, 1, 0, 0 + %call1 = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %safepoint_token, i32 0, i32 0) + %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + call void @consume(ptr addrspace(1) %call1) + ret i1 %call2 +} + + +declare i1 @return_i1() +declare token @llvm.experimental.gc.statepoint.p0(i64, i32, ptr, i32, i32, ...) +declare ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token, i32, i32) +declare i1 @llvm.experimental.gc.result.i1(token) diff --git a/llvm/test/CodeGen/RISCV/rv64-statepoint-call-lowering.ll b/llvm/test/CodeGen/RISCV/rv64-statepoint-call-lowering.ll new file mode 100644 index 0000000000000..2fa344d4d79a7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64-statepoint-call-lowering.ll @@ -0,0 +1,262 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s | FileCheck %s +; A collection of basic functionality tests for statepoint lowering - most +; interesting cornercases are exercised through the x86 tests. + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "riscv64" + +%struct = type { i64, i64 } + +declare zeroext i1 @return_i1() +declare zeroext i32 @return_i32() +declare ptr @return_i32ptr() +declare float @return_float() +declare %struct @return_struct() +declare void @varargf(i32, ...) + +define i1 @test_i1_return() gc "statepoint-example" { +; CHECK-LABEL: test_i1_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: call return_i1 +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +; This is just checking that a i1 gets lowered normally when there's no extra +; state arguments to the statepoint +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i1 ()) @return_i1, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call1 +} + +define i32 @test_i32_return() gc "statepoint-example" { +; CHECK-LABEL: test_i32_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: call return_i32 +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token) + ret i32 %call1 +} + +define ptr @test_i32ptr_return() gc "statepoint-example" { +; CHECK-LABEL: test_i32ptr_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: call return_i32ptr +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(ptr ()) @return_i32ptr, i32 0, i32 0, i32 0, i32 0) + %call1 = call ptr @llvm.experimental.gc.result.p0(token %safepoint_token) + ret ptr %call1 +} + +define float @test_float_return() gc "statepoint-example" { +; CHECK-LABEL: test_float_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: call return_float +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(float ()) @return_float, i32 0, i32 0, i32 0, i32 0) + %call1 = call float @llvm.experimental.gc.result.f32(token %safepoint_token) + ret float %call1 +} + +define %struct @test_struct_return() gc "statepoint-example" { +; CHECK-LABEL: test_struct_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: call return_struct +; CHECK-NEXT: .Ltmp4: +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(%struct ()) @return_struct, i32 0, i32 0, i32 0, i32 0) + %call1 = call %struct @llvm.experimental.gc.result.struct(token %safepoint_token) + ret %struct %call1 +} + +define i1 @test_relocate(ptr addrspace(1) %a) gc "statepoint-example" { +; CHECK-LABEL: test_relocate: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: sd a0, 0(sp) +; CHECK-NEXT: call return_i1 +; CHECK-NEXT: .Ltmp5: +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +; Check that an ununsed relocate has no code-generation impact +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i1 ()) @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (ptr addrspace(1) %a)] + %call1 = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %safepoint_token, i32 0, i32 0) + %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call2 +} + +define void @test_void_vararg() gc "statepoint-example" { +; CHECK-LABEL: test_void_vararg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: li a1, 43 +; CHECK-NEXT: call varargf +; CHECK-NEXT: .Ltmp6: +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +; Check a statepoint wrapping a *ptr returning vararg function works +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void (i32, ...)) @varargf, i32 2, i32 0, i32 42, i32 43, i32 0, i32 0) + ;; if we try to use the result from a statepoint wrapping a + ;; non-void-returning varargf, we will experience a crash. + ret void +} + +define i1 @test_i1_return_patchable() gc "statepoint-example" { +; CHECK-LABEL: test_i1_return_patchable: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: nop +; CHECK-NEXT: .Ltmp7: +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +; A patchable variant of test_i1_return +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 4, ptr elementtype(i1 ()) null, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call1 +} + +declare void @consume(ptr addrspace(1) %obj) + +define i1 @test_cross_bb(ptr addrspace(1) %a, i1 %external_cond) gc "statepoint-example" { +; CHECK-LABEL: test_cross_bb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: andi s0, a1, 1 +; CHECK-NEXT: sd a0, 8(sp) +; CHECK-NEXT: call return_i1 +; CHECK-NEXT: .Ltmp8: +; CHECK-NEXT: beqz s0, .LBB8_2 +; CHECK-NEXT: # %bb.1: # %left +; CHECK-NEXT: ld a1, 8(sp) +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: call consume +; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: j .LBB8_3 +; CHECK-NEXT: .LBB8_2: # %right +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: .LBB8_3: # %right +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i1 ()) @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (ptr addrspace(1) %a)] + br i1 %external_cond, label %left, label %right + +left: + %call1 = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %safepoint_token, i32 0, i32 0) + %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + call void @consume(ptr addrspace(1) %call1) + ret i1 %call2 + +right: + ret i1 true +} + +%struct2 = type { i64, i64, i64 } + +declare void @consume_attributes(i32, ptr nest, i32, ptr byval(%struct2)) + +define void @test_attributes(ptr byval(%struct2) %s) gc "statepoint-example" { +; CHECK-LABEL: test_attributes: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: ld a1, 16(a0) +; CHECK-NEXT: sd a1, 16(sp) +; CHECK-NEXT: ld a1, 8(a0) +; CHECK-NEXT: sd a1, 8(sp) +; CHECK-NEXT: ld a0, 0(a0) +; CHECK-NEXT: sd a0, 0(sp) +; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: li a1, 17 +; CHECK-NEXT: mv a2, sp +; CHECK-NEXT: li t2, 0 +; CHECK-NEXT: call consume_attributes +; CHECK-NEXT: .Ltmp9: +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret +entry: +; Check that arguments with attributes are lowered correctly. +; We call a function that has a nest argument and a byval argument. + %statepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void (i32, ptr, i32, ptr)) @consume_attributes, i32 4, i32 0, i32 42, ptr nest null, i32 17, ptr byval(%struct2) %s, i32 0, i32 0) + ret void +} + +declare token @llvm.experimental.gc.statepoint.p0(i64, i32, ptr, i32, i32, ...) +declare i1 @llvm.experimental.gc.result.i1(token) + +declare i32 @llvm.experimental.gc.result.i32(token) + +declare ptr @llvm.experimental.gc.result.p0(token) + +declare float @llvm.experimental.gc.result.f32(token) + +declare %struct @llvm.experimental.gc.result.struct(token) + + + +declare ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token, i32, i32) From b5640369337e98e573c949080ed4a4061ec6ec9a Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Thu, 11 Apr 2024 12:20:27 +0800 Subject: [PATCH 095/886] [MachineCombiner][NFC] Split target-dependent patterns We split target-dependent MachineCombiner patterns into their target folder. This makes MachineCombiner much more target-independent. Reviewers: davemgreen, asavonic, rotateright, RKSimon, lukel97, LuoYuanke, topperc, mshockwave, asi-sc Reviewed By: topperc, mshockwave Pull Request: https://github.com/llvm/llvm-project/pull/87991 --- .../llvm/CodeGen/MachineCombinerPattern.h | 168 +---- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 25 +- llvm/lib/CodeGen/MachineCombiner.cpp | 72 +-- llvm/lib/CodeGen/TargetInstrInfo.cpp | 19 +- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 572 +++++++++--------- llvm/lib/Target/AArch64/AArch64InstrInfo.h | 152 ++++- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 77 ++- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 26 +- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 83 +-- llvm/lib/Target/RISCV/RISCVInstrInfo.h | 23 +- llvm/lib/Target/X86/X86InstrInfo.cpp | 12 +- llvm/lib/Target/X86/X86InstrInfo.h | 15 +- 12 files changed, 642 insertions(+), 602 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h index 41b73eaae0298..3428c4dde5c7f 100644 --- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h @@ -16,8 +16,16 @@ namespace llvm { +/// The combiner's goal may differ based on which pattern it is attempting +/// to optimize. +enum class CombinerObjective { + MustReduceDepth, // The data dependency chain must be improved. + MustReduceRegisterPressure, // The register pressure must be reduced. + Default // The critical path must not be lengthened. +}; + /// These are instruction patterns matched by the machine combiner pass. -enum class MachineCombinerPattern { +enum MachineCombinerPattern : unsigned { // These are commutative variants for reassociating a computation chain. See // the comments before getMachineCombinerPatterns() in TargetInstrInfo.cpp. REASSOC_AX_BY, @@ -25,163 +33,7 @@ enum class MachineCombinerPattern { REASSOC_XA_BY, REASSOC_XA_YB, - // These are patterns matched by the PowerPC to reassociate FMA chains. - REASSOC_XY_AMM_BMM, - REASSOC_XMM_AMM_BMM, - - // These are patterns matched by the PowerPC to reassociate FMA and FSUB to - // reduce register pressure. - REASSOC_XY_BCA, - REASSOC_XY_BAC, - - // These are patterns used to reduce the length of dependence chain. - SUBADD_OP1, - SUBADD_OP2, - - // These are multiply-add patterns matched by the AArch64 machine combiner. - MULADDW_OP1, - MULADDW_OP2, - MULSUBW_OP1, - MULSUBW_OP2, - MULADDWI_OP1, - MULSUBWI_OP1, - MULADDX_OP1, - MULADDX_OP2, - MULSUBX_OP1, - MULSUBX_OP2, - MULADDXI_OP1, - MULSUBXI_OP1, - // NEON integers vectors - MULADDv8i8_OP1, - MULADDv8i8_OP2, - MULADDv16i8_OP1, - MULADDv16i8_OP2, - MULADDv4i16_OP1, - MULADDv4i16_OP2, - MULADDv8i16_OP1, - MULADDv8i16_OP2, - MULADDv2i32_OP1, - MULADDv2i32_OP2, - MULADDv4i32_OP1, - MULADDv4i32_OP2, - - MULSUBv8i8_OP1, - MULSUBv8i8_OP2, - MULSUBv16i8_OP1, - MULSUBv16i8_OP2, - MULSUBv4i16_OP1, - MULSUBv4i16_OP2, - MULSUBv8i16_OP1, - MULSUBv8i16_OP2, - MULSUBv2i32_OP1, - MULSUBv2i32_OP2, - MULSUBv4i32_OP1, - MULSUBv4i32_OP2, - - MULADDv4i16_indexed_OP1, - MULADDv4i16_indexed_OP2, - MULADDv8i16_indexed_OP1, - MULADDv8i16_indexed_OP2, - MULADDv2i32_indexed_OP1, - MULADDv2i32_indexed_OP2, - MULADDv4i32_indexed_OP1, - MULADDv4i32_indexed_OP2, - - MULSUBv4i16_indexed_OP1, - MULSUBv4i16_indexed_OP2, - MULSUBv8i16_indexed_OP1, - MULSUBv8i16_indexed_OP2, - MULSUBv2i32_indexed_OP1, - MULSUBv2i32_indexed_OP2, - MULSUBv4i32_indexed_OP1, - MULSUBv4i32_indexed_OP2, - - // Floating Point - FMULADDH_OP1, - FMULADDH_OP2, - FMULSUBH_OP1, - FMULSUBH_OP2, - FMULADDS_OP1, - FMULADDS_OP2, - FMULSUBS_OP1, - FMULSUBS_OP2, - FMULADDD_OP1, - FMULADDD_OP2, - FMULSUBD_OP1, - FMULSUBD_OP2, - FNMULSUBH_OP1, - FNMULSUBS_OP1, - FNMULSUBD_OP1, - FMLAv1i32_indexed_OP1, - FMLAv1i32_indexed_OP2, - FMLAv1i64_indexed_OP1, - FMLAv1i64_indexed_OP2, - FMLAv4f16_OP1, - FMLAv4f16_OP2, - FMLAv8f16_OP1, - FMLAv8f16_OP2, - FMLAv2f32_OP2, - FMLAv2f32_OP1, - FMLAv2f64_OP1, - FMLAv2f64_OP2, - FMLAv4i16_indexed_OP1, - FMLAv4i16_indexed_OP2, - FMLAv8i16_indexed_OP1, - FMLAv8i16_indexed_OP2, - FMLAv2i32_indexed_OP1, - FMLAv2i32_indexed_OP2, - FMLAv2i64_indexed_OP1, - FMLAv2i64_indexed_OP2, - FMLAv4f32_OP1, - FMLAv4f32_OP2, - FMLAv4i32_indexed_OP1, - FMLAv4i32_indexed_OP2, - FMLSv1i32_indexed_OP2, - FMLSv1i64_indexed_OP2, - FMLSv4f16_OP1, - FMLSv4f16_OP2, - FMLSv8f16_OP1, - FMLSv8f16_OP2, - FMLSv2f32_OP1, - FMLSv2f32_OP2, - FMLSv2f64_OP1, - FMLSv2f64_OP2, - FMLSv4i16_indexed_OP1, - FMLSv4i16_indexed_OP2, - FMLSv8i16_indexed_OP1, - FMLSv8i16_indexed_OP2, - FMLSv2i32_indexed_OP1, - FMLSv2i32_indexed_OP2, - FMLSv2i64_indexed_OP1, - FMLSv2i64_indexed_OP2, - FMLSv4f32_OP1, - FMLSv4f32_OP2, - FMLSv4i32_indexed_OP1, - FMLSv4i32_indexed_OP2, - - FMULv2i32_indexed_OP1, - FMULv2i32_indexed_OP2, - FMULv2i64_indexed_OP1, - FMULv2i64_indexed_OP2, - FMULv4i16_indexed_OP1, - FMULv4i16_indexed_OP2, - FMULv4i32_indexed_OP1, - FMULv4i32_indexed_OP2, - FMULv8i16_indexed_OP1, - FMULv8i16_indexed_OP2, - - // RISCV FMADD, FMSUB, FNMSUB patterns - FMADD_AX, - FMADD_XA, - FMSUB, - FNMSUB, - SHXADD_ADD_SLLI_OP1, - SHXADD_ADD_SLLI_OP2, - - // X86 VNNI - DPWSSD, - - FNMADD, + TARGET_PATTERN_START }; } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 9fd0ebe6956fb..d4a83e3753d98 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -19,6 +19,7 @@ #include "llvm/ADT/Uniformity.h" #include "llvm/CodeGen/MIRFormatter.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -61,7 +62,6 @@ class TargetRegisterClass; class TargetRegisterInfo; class TargetSchedModel; class TargetSubtargetInfo; -enum class MachineCombinerPattern; enum class MachineTraceStrategy; template class SmallVectorImpl; @@ -1191,10 +1191,9 @@ class TargetInstrInfo : public MCInstrInfo { /// faster sequence. /// \param Root - Instruction that could be combined with one of its operands /// \param Patterns - Vector of possible combination patterns - virtual bool - getMachineCombinerPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns, - bool DoRegPressureReduce) const; + virtual bool getMachineCombinerPatterns(MachineInstr &Root, + SmallVectorImpl &Patterns, + bool DoRegPressureReduce) const; /// Return true if target supports reassociation of instructions in machine /// combiner pass to reduce register pressure for a given BB. @@ -1206,13 +1205,17 @@ class TargetInstrInfo : public MCInstrInfo { /// Fix up the placeholder we may add in genAlternativeCodeSequence(). virtual void - finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P, + finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern, SmallVectorImpl &InsInstrs) const {} /// Return true when a code sequence can improve throughput. It /// should be called only for instructions in loops. /// \param Pattern - combiner pattern - virtual bool isThroughputPattern(MachineCombinerPattern Pattern) const; + virtual bool isThroughputPattern(unsigned Pattern) const; + + /// Return the objective of a combiner pattern. + /// \param Pattern - combiner pattern + virtual CombinerObjective getCombinerObjective(unsigned Pattern) const; /// Return true if the input \P Inst is part of a chain of dependent ops /// that are suitable for reassociation, otherwise return false. @@ -1256,7 +1259,7 @@ class TargetInstrInfo : public MCInstrInfo { /// \param InstIdxForVirtReg - map of virtual register to instruction in /// InsInstr that defines it virtual void genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern Pattern, + MachineInstr &Root, unsigned Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstIdxForVirtReg) const; @@ -1270,8 +1273,7 @@ class TargetInstrInfo : public MCInstrInfo { /// Attempt to reassociate \P Root and \P Prev according to \P Pattern to /// reduce critical path length. - void reassociateOps(MachineInstr &Root, MachineInstr &Prev, - MachineCombinerPattern Pattern, + void reassociateOps(MachineInstr &Root, MachineInstr &Prev, unsigned Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const; @@ -1281,8 +1283,7 @@ class TargetInstrInfo : public MCInstrInfo { /// (new root opcode, new prev opcode) that must be used to reassociate \P /// Root and \P Prev accoring to \P Pattern. std::pair - getReassociationOpcodes(MachineCombinerPattern Pattern, - const MachineInstr &Root, + getReassociationOpcodes(unsigned Pattern, const MachineInstr &Root, const MachineInstr &Prev) const; /// The limit on resource length extension we accept in MachineCombiner Pass. diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index a4c87a7678bd8..ac58162bbfb42 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -99,17 +99,16 @@ class MachineCombiner : public MachineFunctionPass { const MachineBasicBlock &MBB); unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot, MachineTraceMetrics::Trace BlockTrace); - bool - improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, - MachineTraceMetrics::Trace BlockTrace, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg, - MachineCombinerPattern Pattern, bool SlackIsAccurate); + bool improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg, + unsigned Pattern, bool SlackIsAccurate); bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, - MachineCombinerPattern Pattern); + unsigned Pattern); bool preservesResourceLen(MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl &InsInstrs, @@ -123,7 +122,8 @@ class MachineCombiner : public MachineFunctionPass { MachineTraceMetrics::Trace BlockTrace); void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root, - SmallVector &Patterns); + SmallVector &Patterns); + CombinerObjective getCombinerObjective(unsigned Pattern); }; } @@ -290,36 +290,17 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, return NewRootLatency; } -/// The combiner's goal may differ based on which pattern it is attempting -/// to optimize. -enum class CombinerObjective { - MustReduceDepth, // The data dependency chain must be improved. - MustReduceRegisterPressure, // The register pressure must be reduced. - Default // The critical path must not be lengthened. -}; - -static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { +CombinerObjective MachineCombiner::getCombinerObjective(unsigned Pattern) { // TODO: If C++ ever gets a real enum class, make this part of the // MachineCombinerPattern class. - switch (P) { + switch (Pattern) { case MachineCombinerPattern::REASSOC_AX_BY: case MachineCombinerPattern::REASSOC_AX_YB: case MachineCombinerPattern::REASSOC_XA_BY: case MachineCombinerPattern::REASSOC_XA_YB: - case MachineCombinerPattern::REASSOC_XY_AMM_BMM: - case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: - case MachineCombinerPattern::SUBADD_OP1: - case MachineCombinerPattern::SUBADD_OP2: - case MachineCombinerPattern::FMADD_AX: - case MachineCombinerPattern::FMADD_XA: - case MachineCombinerPattern::FMSUB: - case MachineCombinerPattern::FNMSUB: return CombinerObjective::MustReduceDepth; - case MachineCombinerPattern::REASSOC_XY_BCA: - case MachineCombinerPattern::REASSOC_XY_BAC: - return CombinerObjective::MustReduceRegisterPressure; default: - return CombinerObjective::Default; + return TII->getCombinerObjective(Pattern); } } @@ -349,8 +330,7 @@ std::pair MachineCombiner::getLatenciesForInstrSequences( bool MachineCombiner::reduceRegisterPressure( MachineInstr &Root, MachineBasicBlock *MBB, SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - MachineCombinerPattern Pattern) { + SmallVectorImpl &DelInstrs, unsigned Pattern) { // FIXME: for now, we don't do any check for the register pressure patterns. // We treat them as always profitable. But we can do better if we make // RegPressureTracker class be aware of TIE attribute. Then we can get an @@ -368,8 +348,7 @@ bool MachineCombiner::improvesCriticalPathLen( MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg, - MachineCombinerPattern Pattern, + DenseMap &InstrIdxForVirtReg, unsigned Pattern, bool SlackIsAccurate) { // Get depth and latency of NewRoot and Root. unsigned NewRootDepth = @@ -493,13 +472,14 @@ bool MachineCombiner::preservesResourceLen( /// \param Pattern is used to call target hook finalizeInsInstrs /// \param IncrementalUpdate if true, compute instruction depths incrementally, /// otherwise invalidate the trace -static void insertDeleteInstructions( - MachineBasicBlock *MBB, MachineInstr &MI, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - MachineTraceMetrics::Ensemble *TraceEnsemble, - SparseSet &RegUnits, const TargetInstrInfo *TII, - MachineCombinerPattern Pattern, bool IncrementalUpdate) { +static void +insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + MachineTraceMetrics::Ensemble *TraceEnsemble, + SparseSet &RegUnits, + const TargetInstrInfo *TII, unsigned Pattern, + bool IncrementalUpdate) { // If we want to fix up some placeholder for some target, do it now. // We need this because in genAlternativeCodeSequence, we have not decided the // better pattern InsInstrs or DelInstrs, so we don't want generate some @@ -534,9 +514,9 @@ static void insertDeleteInstructions( // Check that the difference between original and new latency is decreasing for // later patterns. This helps to discover sub-optimal pattern orderings. -void MachineCombiner::verifyPatternOrder( - MachineBasicBlock *MBB, MachineInstr &Root, - SmallVector &Patterns) { +void MachineCombiner::verifyPatternOrder(MachineBasicBlock *MBB, + MachineInstr &Root, + SmallVector &Patterns) { long PrevLatencyDiff = std::numeric_limits::max(); (void)PrevLatencyDiff; // Variable is used in assert only. for (auto P : Patterns) { @@ -590,7 +570,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { while (BlockIter != MBB->end()) { auto &MI = *BlockIter++; - SmallVector Patterns; + SmallVector Patterns; // The motivating example is: // // MUL Other MUL_op1 MUL_op2 Other diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 9fbd516acea8e..7d77e5d1a1ff0 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -919,7 +919,7 @@ bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst, // instruction is known to not increase the critical path, then don't match // that pattern. bool TargetInstrInfo::getMachineCombinerPatterns( - MachineInstr &Root, SmallVectorImpl &Patterns, + MachineInstr &Root, SmallVectorImpl &Patterns, bool DoRegPressureReduce) const { bool Commute; if (isReassociationCandidate(Root, Commute)) { @@ -941,13 +941,17 @@ bool TargetInstrInfo::getMachineCombinerPatterns( } /// Return true when a code sequence can improve loop throughput. -bool -TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { +bool TargetInstrInfo::isThroughputPattern(unsigned Pattern) const { return false; } +CombinerObjective +TargetInstrInfo::getCombinerObjective(unsigned Pattern) const { + return CombinerObjective::Default; +} + std::pair -TargetInstrInfo::getReassociationOpcodes(MachineCombinerPattern Pattern, +TargetInstrInfo::getReassociationOpcodes(unsigned Pattern, const MachineInstr &Root, const MachineInstr &Prev) const { bool AssocCommutRoot = isAssociativeAndCommutative(Root); @@ -1036,7 +1040,7 @@ TargetInstrInfo::getReassociationOpcodes(MachineCombinerPattern Pattern, // Return a pair of boolean flags showing if the new root and new prev operands // must be swapped. See visual example of the rule in // TargetInstrInfo::getReassociationOpcodes. -static std::pair mustSwapOperands(MachineCombinerPattern Pattern) { +static std::pair mustSwapOperands(unsigned Pattern) { switch (Pattern) { default: llvm_unreachable("Unexpected pattern"); @@ -1054,8 +1058,7 @@ static std::pair mustSwapOperands(MachineCombinerPattern Pattern) { /// Attempt the reassociation transformation to reduce critical path length. /// See the above comments before getMachineCombinerPatterns(). void TargetInstrInfo::reassociateOps( - MachineInstr &Root, MachineInstr &Prev, - MachineCombinerPattern Pattern, + MachineInstr &Root, MachineInstr &Prev, unsigned Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const { @@ -1177,7 +1180,7 @@ void TargetInstrInfo::reassociateOps( } void TargetInstrInfo::genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern Pattern, + MachineInstr &Root, unsigned Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstIdxForVirtReg) const { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 9783b33219460..92647cb405252 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6043,7 +6043,7 @@ bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, /// Find instructions that can be turned into madd. static bool getMaddPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns) { + SmallVectorImpl &Patterns) { unsigned Opc = Root.getOpcode(); MachineBasicBlock &MBB = *Root.getParent(); bool Found = false; @@ -6064,21 +6064,21 @@ static bool getMaddPatterns(MachineInstr &Root, } auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg, - MachineCombinerPattern Pattern) { + unsigned Pattern) { if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) { Patterns.push_back(Pattern); Found = true; } }; - auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) { + auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) { if (canCombine(MBB, Root.getOperand(Operand), Opcode)) { Patterns.push_back(Pattern); Found = true; } }; - typedef MachineCombinerPattern MCP; + typedef AArch64MachineCombinerPattern MCP; switch (Opc) { default: @@ -6184,7 +6184,7 @@ static bool getMaddPatterns(MachineInstr &Root, /// Find instructions that can be turned into madd. static bool getFMAPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns) { + SmallVectorImpl &Patterns) { if (!isCombineInstrCandidateFP(Root)) return false; @@ -6192,8 +6192,7 @@ static bool getFMAPatterns(MachineInstr &Root, MachineBasicBlock &MBB = *Root.getParent(); bool Found = false; - auto Match = [&](int Opcode, int Operand, - MachineCombinerPattern Pattern) -> bool { + auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool { if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) { Patterns.push_back(Pattern); return true; @@ -6201,7 +6200,7 @@ static bool getFMAPatterns(MachineInstr &Root, return false; }; - typedef MachineCombinerPattern MCP; + typedef AArch64MachineCombinerPattern MCP; switch (Root.getOpcode()) { default: @@ -6327,12 +6326,11 @@ static bool getFMAPatterns(MachineInstr &Root, } static bool getFMULPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns) { + SmallVectorImpl &Patterns) { MachineBasicBlock &MBB = *Root.getParent(); bool Found = false; - auto Match = [&](unsigned Opcode, int Operand, - MachineCombinerPattern Pattern) -> bool { + auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineOperand &MO = Root.getOperand(Operand); MachineInstr *MI = nullptr; @@ -6349,7 +6347,7 @@ static bool getFMULPatterns(MachineInstr &Root, return false; }; - typedef MachineCombinerPattern MCP; + typedef AArch64MachineCombinerPattern MCP; switch (Root.getOpcode()) { default: @@ -6380,12 +6378,12 @@ static bool getFMULPatterns(MachineInstr &Root, } static bool getFNEGPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns) { + SmallVectorImpl &Patterns) { unsigned Opc = Root.getOpcode(); MachineBasicBlock &MBB = *Root.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - auto Match = [&](unsigned Opcode, MachineCombinerPattern Pattern) -> bool { + auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool { MachineOperand &MO = Root.getOperand(1); MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg()); if (MI != nullptr && (MI->getOpcode() == Opcode) && @@ -6404,9 +6402,9 @@ static bool getFNEGPatterns(MachineInstr &Root, default: break; case AArch64::FNEGDr: - return Match(AArch64::FMADDDrrr, MachineCombinerPattern::FNMADD); + return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD); case AArch64::FNEGSr: - return Match(AArch64::FMADDSrrr, MachineCombinerPattern::FNMADD); + return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD); } return false; @@ -6415,116 +6413,115 @@ static bool getFNEGPatterns(MachineInstr &Root, /// Return true when a code sequence can improve throughput. It /// should be called only for instructions in loops. /// \param Pattern - combiner pattern -bool AArch64InstrInfo::isThroughputPattern( - MachineCombinerPattern Pattern) const { +bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern) const { switch (Pattern) { default: break; - case MachineCombinerPattern::FMULADDH_OP1: - case MachineCombinerPattern::FMULADDH_OP2: - case MachineCombinerPattern::FMULSUBH_OP1: - case MachineCombinerPattern::FMULSUBH_OP2: - case MachineCombinerPattern::FMULADDS_OP1: - case MachineCombinerPattern::FMULADDS_OP2: - case MachineCombinerPattern::FMULSUBS_OP1: - case MachineCombinerPattern::FMULSUBS_OP2: - case MachineCombinerPattern::FMULADDD_OP1: - case MachineCombinerPattern::FMULADDD_OP2: - case MachineCombinerPattern::FMULSUBD_OP1: - case MachineCombinerPattern::FMULSUBD_OP2: - case MachineCombinerPattern::FNMULSUBH_OP1: - case MachineCombinerPattern::FNMULSUBS_OP1: - case MachineCombinerPattern::FNMULSUBD_OP1: - case MachineCombinerPattern::FMLAv4i16_indexed_OP1: - case MachineCombinerPattern::FMLAv4i16_indexed_OP2: - case MachineCombinerPattern::FMLAv8i16_indexed_OP1: - case MachineCombinerPattern::FMLAv8i16_indexed_OP2: - case MachineCombinerPattern::FMLAv1i32_indexed_OP1: - case MachineCombinerPattern::FMLAv1i32_indexed_OP2: - case MachineCombinerPattern::FMLAv1i64_indexed_OP1: - case MachineCombinerPattern::FMLAv1i64_indexed_OP2: - case MachineCombinerPattern::FMLAv4f16_OP2: - case MachineCombinerPattern::FMLAv4f16_OP1: - case MachineCombinerPattern::FMLAv8f16_OP1: - case MachineCombinerPattern::FMLAv8f16_OP2: - case MachineCombinerPattern::FMLAv2f32_OP2: - case MachineCombinerPattern::FMLAv2f32_OP1: - case MachineCombinerPattern::FMLAv2f64_OP1: - case MachineCombinerPattern::FMLAv2f64_OP2: - case MachineCombinerPattern::FMLAv2i32_indexed_OP1: - case MachineCombinerPattern::FMLAv2i32_indexed_OP2: - case MachineCombinerPattern::FMLAv2i64_indexed_OP1: - case MachineCombinerPattern::FMLAv2i64_indexed_OP2: - case MachineCombinerPattern::FMLAv4f32_OP1: - case MachineCombinerPattern::FMLAv4f32_OP2: - case MachineCombinerPattern::FMLAv4i32_indexed_OP1: - case MachineCombinerPattern::FMLAv4i32_indexed_OP2: - case MachineCombinerPattern::FMLSv4i16_indexed_OP1: - case MachineCombinerPattern::FMLSv4i16_indexed_OP2: - case MachineCombinerPattern::FMLSv8i16_indexed_OP1: - case MachineCombinerPattern::FMLSv8i16_indexed_OP2: - case MachineCombinerPattern::FMLSv1i32_indexed_OP2: - case MachineCombinerPattern::FMLSv1i64_indexed_OP2: - case MachineCombinerPattern::FMLSv2i32_indexed_OP2: - case MachineCombinerPattern::FMLSv2i64_indexed_OP2: - case MachineCombinerPattern::FMLSv4f16_OP1: - case MachineCombinerPattern::FMLSv4f16_OP2: - case MachineCombinerPattern::FMLSv8f16_OP1: - case MachineCombinerPattern::FMLSv8f16_OP2: - case MachineCombinerPattern::FMLSv2f32_OP2: - case MachineCombinerPattern::FMLSv2f64_OP2: - case MachineCombinerPattern::FMLSv4i32_indexed_OP2: - case MachineCombinerPattern::FMLSv4f32_OP2: - case MachineCombinerPattern::FMULv2i32_indexed_OP1: - case MachineCombinerPattern::FMULv2i32_indexed_OP2: - case MachineCombinerPattern::FMULv2i64_indexed_OP1: - case MachineCombinerPattern::FMULv2i64_indexed_OP2: - case MachineCombinerPattern::FMULv4i16_indexed_OP1: - case MachineCombinerPattern::FMULv4i16_indexed_OP2: - case MachineCombinerPattern::FMULv4i32_indexed_OP1: - case MachineCombinerPattern::FMULv4i32_indexed_OP2: - case MachineCombinerPattern::FMULv8i16_indexed_OP1: - case MachineCombinerPattern::FMULv8i16_indexed_OP2: - case MachineCombinerPattern::MULADDv8i8_OP1: - case MachineCombinerPattern::MULADDv8i8_OP2: - case MachineCombinerPattern::MULADDv16i8_OP1: - case MachineCombinerPattern::MULADDv16i8_OP2: - case MachineCombinerPattern::MULADDv4i16_OP1: - case MachineCombinerPattern::MULADDv4i16_OP2: - case MachineCombinerPattern::MULADDv8i16_OP1: - case MachineCombinerPattern::MULADDv8i16_OP2: - case MachineCombinerPattern::MULADDv2i32_OP1: - case MachineCombinerPattern::MULADDv2i32_OP2: - case MachineCombinerPattern::MULADDv4i32_OP1: - case MachineCombinerPattern::MULADDv4i32_OP2: - case MachineCombinerPattern::MULSUBv8i8_OP1: - case MachineCombinerPattern::MULSUBv8i8_OP2: - case MachineCombinerPattern::MULSUBv16i8_OP1: - case MachineCombinerPattern::MULSUBv16i8_OP2: - case MachineCombinerPattern::MULSUBv4i16_OP1: - case MachineCombinerPattern::MULSUBv4i16_OP2: - case MachineCombinerPattern::MULSUBv8i16_OP1: - case MachineCombinerPattern::MULSUBv8i16_OP2: - case MachineCombinerPattern::MULSUBv2i32_OP1: - case MachineCombinerPattern::MULSUBv2i32_OP2: - case MachineCombinerPattern::MULSUBv4i32_OP1: - case MachineCombinerPattern::MULSUBv4i32_OP2: - case MachineCombinerPattern::MULADDv4i16_indexed_OP1: - case MachineCombinerPattern::MULADDv4i16_indexed_OP2: - case MachineCombinerPattern::MULADDv8i16_indexed_OP1: - case MachineCombinerPattern::MULADDv8i16_indexed_OP2: - case MachineCombinerPattern::MULADDv2i32_indexed_OP1: - case MachineCombinerPattern::MULADDv2i32_indexed_OP2: - case MachineCombinerPattern::MULADDv4i32_indexed_OP1: - case MachineCombinerPattern::MULADDv4i32_indexed_OP2: - case MachineCombinerPattern::MULSUBv4i16_indexed_OP1: - case MachineCombinerPattern::MULSUBv4i16_indexed_OP2: - case MachineCombinerPattern::MULSUBv8i16_indexed_OP1: - case MachineCombinerPattern::MULSUBv8i16_indexed_OP2: - case MachineCombinerPattern::MULSUBv2i32_indexed_OP1: - case MachineCombinerPattern::MULSUBv2i32_indexed_OP2: - case MachineCombinerPattern::MULSUBv4i32_indexed_OP1: - case MachineCombinerPattern::MULSUBv4i32_indexed_OP2: + case AArch64MachineCombinerPattern::FMULADDH_OP1: + case AArch64MachineCombinerPattern::FMULADDH_OP2: + case AArch64MachineCombinerPattern::FMULSUBH_OP1: + case AArch64MachineCombinerPattern::FMULSUBH_OP2: + case AArch64MachineCombinerPattern::FMULADDS_OP1: + case AArch64MachineCombinerPattern::FMULADDS_OP2: + case AArch64MachineCombinerPattern::FMULSUBS_OP1: + case AArch64MachineCombinerPattern::FMULSUBS_OP2: + case AArch64MachineCombinerPattern::FMULADDD_OP1: + case AArch64MachineCombinerPattern::FMULADDD_OP2: + case AArch64MachineCombinerPattern::FMULSUBD_OP1: + case AArch64MachineCombinerPattern::FMULSUBD_OP2: + case AArch64MachineCombinerPattern::FNMULSUBH_OP1: + case AArch64MachineCombinerPattern::FNMULSUBS_OP1: + case AArch64MachineCombinerPattern::FNMULSUBD_OP1: + case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1: + case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2: + case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1: + case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2: + case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1: + case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2: + case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1: + case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2: + case AArch64MachineCombinerPattern::FMLAv4f16_OP2: + case AArch64MachineCombinerPattern::FMLAv4f16_OP1: + case AArch64MachineCombinerPattern::FMLAv8f16_OP1: + case AArch64MachineCombinerPattern::FMLAv8f16_OP2: + case AArch64MachineCombinerPattern::FMLAv2f32_OP2: + case AArch64MachineCombinerPattern::FMLAv2f32_OP1: + case AArch64MachineCombinerPattern::FMLAv2f64_OP1: + case AArch64MachineCombinerPattern::FMLAv2f64_OP2: + case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1: + case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2: + case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1: + case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2: + case AArch64MachineCombinerPattern::FMLAv4f32_OP1: + case AArch64MachineCombinerPattern::FMLAv4f32_OP2: + case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1: + case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2: + case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1: + case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2: + case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1: + case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2: + case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2: + case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2: + case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2: + case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2: + case AArch64MachineCombinerPattern::FMLSv4f16_OP1: + case AArch64MachineCombinerPattern::FMLSv4f16_OP2: + case AArch64MachineCombinerPattern::FMLSv8f16_OP1: + case AArch64MachineCombinerPattern::FMLSv8f16_OP2: + case AArch64MachineCombinerPattern::FMLSv2f32_OP2: + case AArch64MachineCombinerPattern::FMLSv2f64_OP2: + case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2: + case AArch64MachineCombinerPattern::FMLSv4f32_OP2: + case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1: + case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2: + case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1: + case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2: + case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1: + case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2: + case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1: + case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2: + case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1: + case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2: + case AArch64MachineCombinerPattern::MULADDv8i8_OP1: + case AArch64MachineCombinerPattern::MULADDv8i8_OP2: + case AArch64MachineCombinerPattern::MULADDv16i8_OP1: + case AArch64MachineCombinerPattern::MULADDv16i8_OP2: + case AArch64MachineCombinerPattern::MULADDv4i16_OP1: + case AArch64MachineCombinerPattern::MULADDv4i16_OP2: + case AArch64MachineCombinerPattern::MULADDv8i16_OP1: + case AArch64MachineCombinerPattern::MULADDv8i16_OP2: + case AArch64MachineCombinerPattern::MULADDv2i32_OP1: + case AArch64MachineCombinerPattern::MULADDv2i32_OP2: + case AArch64MachineCombinerPattern::MULADDv4i32_OP1: + case AArch64MachineCombinerPattern::MULADDv4i32_OP2: + case AArch64MachineCombinerPattern::MULSUBv8i8_OP1: + case AArch64MachineCombinerPattern::MULSUBv8i8_OP2: + case AArch64MachineCombinerPattern::MULSUBv16i8_OP1: + case AArch64MachineCombinerPattern::MULSUBv16i8_OP2: + case AArch64MachineCombinerPattern::MULSUBv4i16_OP1: + case AArch64MachineCombinerPattern::MULSUBv4i16_OP2: + case AArch64MachineCombinerPattern::MULSUBv8i16_OP1: + case AArch64MachineCombinerPattern::MULSUBv8i16_OP2: + case AArch64MachineCombinerPattern::MULSUBv2i32_OP1: + case AArch64MachineCombinerPattern::MULSUBv2i32_OP2: + case AArch64MachineCombinerPattern::MULSUBv4i32_OP1: + case AArch64MachineCombinerPattern::MULSUBv4i32_OP2: + case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1: + case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2: + case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1: + case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2: + case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1: + case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2: + case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1: + case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2: + case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1: + case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2: + case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1: + case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2: + case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1: + case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2: + case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1: + case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2: return true; } // end switch (Pattern) return false; @@ -6532,8 +6529,7 @@ bool AArch64InstrInfo::isThroughputPattern( /// Find other MI combine patterns. static bool getMiscPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns) -{ + SmallVectorImpl &Patterns) { // A - (B + C) ==> (A - B) - C or (A - C) - B unsigned Opc = Root.getOpcode(); MachineBasicBlock &MBB = *Root.getParent(); @@ -6557,21 +6553,32 @@ static bool getMiscPatterns(MachineInstr &Root, canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) || canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) || canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) { - Patterns.push_back(MachineCombinerPattern::SUBADD_OP1); - Patterns.push_back(MachineCombinerPattern::SUBADD_OP2); + Patterns.push_back(AArch64MachineCombinerPattern::SUBADD_OP1); + Patterns.push_back(AArch64MachineCombinerPattern::SUBADD_OP2); return true; } return false; } +CombinerObjective +AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const { + switch (Pattern) { + case AArch64MachineCombinerPattern::SUBADD_OP1: + case AArch64MachineCombinerPattern::SUBADD_OP2: + return CombinerObjective::MustReduceDepth; + default: + return TargetInstrInfo::getCombinerObjective(Pattern); + } +} + /// Return true when there is potentially a faster code sequence for an /// instruction chain ending in \p Root. All potential patterns are listed in /// the \p Pattern vector. Pattern should be sorted in priority order since the /// pattern evaluator stops checking as soon as it finds a faster sequence. bool AArch64InstrInfo::getMachineCombinerPatterns( - MachineInstr &Root, SmallVectorImpl &Patterns, + MachineInstr &Root, SmallVectorImpl &Patterns, bool DoRegPressureReduce) const { // Integer patterns if (getMaddPatterns(Root, Patterns)) @@ -6930,7 +6937,7 @@ genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, /// this function generates the instructions that could replace the /// original code sequence void AArch64InstrInfo::genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern Pattern, + MachineInstr &Root, unsigned Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const { @@ -6948,25 +6955,25 @@ void AArch64InstrInfo::genAlternativeCodeSequence( TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg); return; - case MachineCombinerPattern::SUBADD_OP1: + case AArch64MachineCombinerPattern::SUBADD_OP1: // A - (B + C) // ==> (A - B) - C genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1, InstrIdxForVirtReg); break; - case MachineCombinerPattern::SUBADD_OP2: + case AArch64MachineCombinerPattern::SUBADD_OP2: // A - (B + C) // ==> (A - C) - B genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2, InstrIdxForVirtReg); break; - case MachineCombinerPattern::MULADDW_OP1: - case MachineCombinerPattern::MULADDX_OP1: + case AArch64MachineCombinerPattern::MULADDW_OP1: + case AArch64MachineCombinerPattern::MULADDX_OP1: // MUL I=A,B,0 // ADD R,I,C // ==> MADD R,A,B,C // --- Create(MADD); - if (Pattern == MachineCombinerPattern::MULADDW_OP1) { + if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP1) { Opc = AArch64::MADDWrrr; RC = &AArch64::GPR32RegClass; } else { @@ -6975,13 +6982,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::MULADDW_OP2: - case MachineCombinerPattern::MULADDX_OP2: + case AArch64MachineCombinerPattern::MULADDW_OP2: + case AArch64MachineCombinerPattern::MULADDX_OP2: // MUL I=A,B,0 // ADD R,C,I // ==> MADD R,A,B,C // --- Create(MADD); - if (Pattern == MachineCombinerPattern::MULADDW_OP2) { + if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP2) { Opc = AArch64::MADDWrrr; RC = &AArch64::GPR32RegClass; } else { @@ -6990,8 +6997,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULADDWI_OP1: - case MachineCombinerPattern::MULADDXI_OP1: { + case AArch64MachineCombinerPattern::MULADDWI_OP1: + case AArch64MachineCombinerPattern::MULADDXI_OP1: { // MUL I=A,B,0 // ADD R,I,Imm // ==> MOV V, Imm @@ -6999,7 +7006,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( // --- Create(MADD); const TargetRegisterClass *OrrRC; unsigned BitSize, OrrOpc, ZeroReg; - if (Pattern == MachineCombinerPattern::MULADDWI_OP1) { + if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1) { OrrOpc = AArch64::ORRWri; OrrRC = &AArch64::GPR32spRegClass; BitSize = 32; @@ -7052,8 +7059,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); break; } - case MachineCombinerPattern::MULSUBW_OP1: - case MachineCombinerPattern::MULSUBX_OP1: { + case AArch64MachineCombinerPattern::MULSUBW_OP1: + case AArch64MachineCombinerPattern::MULSUBX_OP1: { // MUL I=A,B,0 // SUB R,I, C // ==> SUB V, 0, C @@ -7061,7 +7068,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( // --- Create(MADD); const TargetRegisterClass *SubRC; unsigned SubOpc, ZeroReg; - if (Pattern == MachineCombinerPattern::MULSUBW_OP1) { + if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP1) { SubOpc = AArch64::SUBWrr; SubRC = &AArch64::GPR32spRegClass; ZeroReg = AArch64::WZR; @@ -7085,13 +7092,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); break; } - case MachineCombinerPattern::MULSUBW_OP2: - case MachineCombinerPattern::MULSUBX_OP2: + case AArch64MachineCombinerPattern::MULSUBW_OP2: + case AArch64MachineCombinerPattern::MULSUBX_OP2: // MUL I=A,B,0 // SUB R,C,I // ==> MSUB R,A,B,C (computes C - A*B) // --- Create(MSUB); - if (Pattern == MachineCombinerPattern::MULSUBW_OP2) { + if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP2) { Opc = AArch64::MSUBWrrr; RC = &AArch64::GPR32RegClass; } else { @@ -7100,8 +7107,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULSUBWI_OP1: - case MachineCombinerPattern::MULSUBXI_OP1: { + case AArch64MachineCombinerPattern::MULSUBWI_OP1: + case AArch64MachineCombinerPattern::MULSUBXI_OP1: { // MUL I=A,B,0 // SUB R,I, Imm // ==> MOV V, -Imm @@ -7109,7 +7116,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( // --- Create(MADD); const TargetRegisterClass *OrrRC; unsigned BitSize, OrrOpc, ZeroReg; - if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) { + if (Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) { OrrOpc = AArch64::ORRWri; OrrRC = &AArch64::GPR32spRegClass; BitSize = 32; @@ -7162,318 +7169,318 @@ void AArch64InstrInfo::genAlternativeCodeSequence( break; } - case MachineCombinerPattern::MULADDv8i8_OP1: + case AArch64MachineCombinerPattern::MULADDv8i8_OP1: Opc = AArch64::MLAv8i8; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::MULADDv8i8_OP2: + case AArch64MachineCombinerPattern::MULADDv8i8_OP2: Opc = AArch64::MLAv8i8; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULADDv16i8_OP1: + case AArch64MachineCombinerPattern::MULADDv16i8_OP1: Opc = AArch64::MLAv16i8; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::MULADDv16i8_OP2: + case AArch64MachineCombinerPattern::MULADDv16i8_OP2: Opc = AArch64::MLAv16i8; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULADDv4i16_OP1: + case AArch64MachineCombinerPattern::MULADDv4i16_OP1: Opc = AArch64::MLAv4i16; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::MULADDv4i16_OP2: + case AArch64MachineCombinerPattern::MULADDv4i16_OP2: Opc = AArch64::MLAv4i16; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULADDv8i16_OP1: + case AArch64MachineCombinerPattern::MULADDv8i16_OP1: Opc = AArch64::MLAv8i16; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::MULADDv8i16_OP2: + case AArch64MachineCombinerPattern::MULADDv8i16_OP2: Opc = AArch64::MLAv8i16; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULADDv2i32_OP1: + case AArch64MachineCombinerPattern::MULADDv2i32_OP1: Opc = AArch64::MLAv2i32; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::MULADDv2i32_OP2: + case AArch64MachineCombinerPattern::MULADDv2i32_OP2: Opc = AArch64::MLAv2i32; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULADDv4i32_OP1: + case AArch64MachineCombinerPattern::MULADDv4i32_OP1: Opc = AArch64::MLAv4i32; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::MULADDv4i32_OP2: + case AArch64MachineCombinerPattern::MULADDv4i32_OP2: Opc = AArch64::MLAv4i32; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULSUBv8i8_OP1: + case AArch64MachineCombinerPattern::MULSUBv8i8_OP1: Opc = AArch64::MLAv8i8; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8, RC); break; - case MachineCombinerPattern::MULSUBv8i8_OP2: + case AArch64MachineCombinerPattern::MULSUBv8i8_OP2: Opc = AArch64::MLSv8i8; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULSUBv16i8_OP1: + case AArch64MachineCombinerPattern::MULSUBv16i8_OP1: Opc = AArch64::MLAv16i8; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8, RC); break; - case MachineCombinerPattern::MULSUBv16i8_OP2: + case AArch64MachineCombinerPattern::MULSUBv16i8_OP2: Opc = AArch64::MLSv16i8; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULSUBv4i16_OP1: + case AArch64MachineCombinerPattern::MULSUBv4i16_OP1: Opc = AArch64::MLAv4i16; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16, RC); break; - case MachineCombinerPattern::MULSUBv4i16_OP2: + case AArch64MachineCombinerPattern::MULSUBv4i16_OP2: Opc = AArch64::MLSv4i16; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULSUBv8i16_OP1: + case AArch64MachineCombinerPattern::MULSUBv8i16_OP1: Opc = AArch64::MLAv8i16; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16, RC); break; - case MachineCombinerPattern::MULSUBv8i16_OP2: + case AArch64MachineCombinerPattern::MULSUBv8i16_OP2: Opc = AArch64::MLSv8i16; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULSUBv2i32_OP1: + case AArch64MachineCombinerPattern::MULSUBv2i32_OP1: Opc = AArch64::MLAv2i32; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32, RC); break; - case MachineCombinerPattern::MULSUBv2i32_OP2: + case AArch64MachineCombinerPattern::MULSUBv2i32_OP2: Opc = AArch64::MLSv2i32; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULSUBv4i32_OP1: + case AArch64MachineCombinerPattern::MULSUBv4i32_OP1: Opc = AArch64::MLAv4i32; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32, RC); break; - case MachineCombinerPattern::MULSUBv4i32_OP2: + case AArch64MachineCombinerPattern::MULSUBv4i32_OP2: Opc = AArch64::MLSv4i32; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULADDv4i16_indexed_OP1: + case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1: Opc = AArch64::MLAv4i16_indexed; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::MULADDv4i16_indexed_OP2: + case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2: Opc = AArch64::MLAv4i16_indexed; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULADDv8i16_indexed_OP1: + case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1: Opc = AArch64::MLAv8i16_indexed; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::MULADDv8i16_indexed_OP2: + case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2: Opc = AArch64::MLAv8i16_indexed; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULADDv2i32_indexed_OP1: + case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1: Opc = AArch64::MLAv2i32_indexed; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::MULADDv2i32_indexed_OP2: + case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2: Opc = AArch64::MLAv2i32_indexed; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULADDv4i32_indexed_OP1: + case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1: Opc = AArch64::MLAv4i32_indexed; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::MULADDv4i32_indexed_OP2: + case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2: Opc = AArch64::MLAv4i32_indexed; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULSUBv4i16_indexed_OP1: + case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1: Opc = AArch64::MLAv4i16_indexed; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16, RC); break; - case MachineCombinerPattern::MULSUBv4i16_indexed_OP2: + case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2: Opc = AArch64::MLSv4i16_indexed; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULSUBv8i16_indexed_OP1: + case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1: Opc = AArch64::MLAv8i16_indexed; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16, RC); break; - case MachineCombinerPattern::MULSUBv8i16_indexed_OP2: + case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2: Opc = AArch64::MLSv8i16_indexed; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULSUBv2i32_indexed_OP1: + case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1: Opc = AArch64::MLAv2i32_indexed; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32, RC); break; - case MachineCombinerPattern::MULSUBv2i32_indexed_OP2: + case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2: Opc = AArch64::MLSv2i32_indexed; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MULSUBv4i32_indexed_OP1: + case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1: Opc = AArch64::MLAv4i32_indexed; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32, RC); break; - case MachineCombinerPattern::MULSUBv4i32_indexed_OP2: + case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2: Opc = AArch64::MLSv4i32_indexed; RC = &AArch64::FPR128RegClass; MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; // Floating Point Support - case MachineCombinerPattern::FMULADDH_OP1: + case AArch64MachineCombinerPattern::FMULADDH_OP1: Opc = AArch64::FMADDHrrr; RC = &AArch64::FPR16RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::FMULADDS_OP1: + case AArch64MachineCombinerPattern::FMULADDS_OP1: Opc = AArch64::FMADDSrrr; RC = &AArch64::FPR32RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::FMULADDD_OP1: + case AArch64MachineCombinerPattern::FMULADDD_OP1: Opc = AArch64::FMADDDrrr; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::FMULADDH_OP2: + case AArch64MachineCombinerPattern::FMULADDH_OP2: Opc = AArch64::FMADDHrrr; RC = &AArch64::FPR16RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::FMULADDS_OP2: + case AArch64MachineCombinerPattern::FMULADDS_OP2: Opc = AArch64::FMADDSrrr; RC = &AArch64::FPR32RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::FMULADDD_OP2: + case AArch64MachineCombinerPattern::FMULADDD_OP2: Opc = AArch64::FMADDDrrr; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::FMLAv1i32_indexed_OP1: + case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1: Opc = AArch64::FMLAv1i32_indexed; RC = &AArch64::FPR32RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, FMAInstKind::Indexed); break; - case MachineCombinerPattern::FMLAv1i32_indexed_OP2: + case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2: Opc = AArch64::FMLAv1i32_indexed; RC = &AArch64::FPR32RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Indexed); break; - case MachineCombinerPattern::FMLAv1i64_indexed_OP1: + case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1: Opc = AArch64::FMLAv1i64_indexed; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, FMAInstKind::Indexed); break; - case MachineCombinerPattern::FMLAv1i64_indexed_OP2: + case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2: Opc = AArch64::FMLAv1i64_indexed; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Indexed); break; - case MachineCombinerPattern::FMLAv4i16_indexed_OP1: + case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1: RC = &AArch64::FPR64RegClass; Opc = AArch64::FMLAv4i16_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, FMAInstKind::Indexed); break; - case MachineCombinerPattern::FMLAv4f16_OP1: + case AArch64MachineCombinerPattern::FMLAv4f16_OP1: RC = &AArch64::FPR64RegClass; Opc = AArch64::FMLAv4f16; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, FMAInstKind::Accumulator); break; - case MachineCombinerPattern::FMLAv4i16_indexed_OP2: + case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2: RC = &AArch64::FPR64RegClass; Opc = AArch64::FMLAv4i16_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Indexed); break; - case MachineCombinerPattern::FMLAv4f16_OP2: + case AArch64MachineCombinerPattern::FMLAv4f16_OP2: RC = &AArch64::FPR64RegClass; Opc = AArch64::FMLAv4f16; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Accumulator); break; - case MachineCombinerPattern::FMLAv2i32_indexed_OP1: - case MachineCombinerPattern::FMLAv2f32_OP1: + case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1: + case AArch64MachineCombinerPattern::FMLAv2f32_OP1: RC = &AArch64::FPR64RegClass; - if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) { + if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1) { Opc = AArch64::FMLAv2i32_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, FMAInstKind::Indexed); @@ -7483,10 +7490,10 @@ void AArch64InstrInfo::genAlternativeCodeSequence( FMAInstKind::Accumulator); } break; - case MachineCombinerPattern::FMLAv2i32_indexed_OP2: - case MachineCombinerPattern::FMLAv2f32_OP2: + case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2: + case AArch64MachineCombinerPattern::FMLAv2f32_OP2: RC = &AArch64::FPR64RegClass; - if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) { + if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2) { Opc = AArch64::FMLAv2i32_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Indexed); @@ -7497,35 +7504,35 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; - case MachineCombinerPattern::FMLAv8i16_indexed_OP1: + case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1: RC = &AArch64::FPR128RegClass; Opc = AArch64::FMLAv8i16_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, FMAInstKind::Indexed); break; - case MachineCombinerPattern::FMLAv8f16_OP1: + case AArch64MachineCombinerPattern::FMLAv8f16_OP1: RC = &AArch64::FPR128RegClass; Opc = AArch64::FMLAv8f16; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, FMAInstKind::Accumulator); break; - case MachineCombinerPattern::FMLAv8i16_indexed_OP2: + case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2: RC = &AArch64::FPR128RegClass; Opc = AArch64::FMLAv8i16_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Indexed); break; - case MachineCombinerPattern::FMLAv8f16_OP2: + case AArch64MachineCombinerPattern::FMLAv8f16_OP2: RC = &AArch64::FPR128RegClass; Opc = AArch64::FMLAv8f16; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Accumulator); break; - case MachineCombinerPattern::FMLAv2i64_indexed_OP1: - case MachineCombinerPattern::FMLAv2f64_OP1: + case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1: + case AArch64MachineCombinerPattern::FMLAv2f64_OP1: RC = &AArch64::FPR128RegClass; - if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) { + if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1) { Opc = AArch64::FMLAv2i64_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, FMAInstKind::Indexed); @@ -7535,10 +7542,10 @@ void AArch64InstrInfo::genAlternativeCodeSequence( FMAInstKind::Accumulator); } break; - case MachineCombinerPattern::FMLAv2i64_indexed_OP2: - case MachineCombinerPattern::FMLAv2f64_OP2: + case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2: + case AArch64MachineCombinerPattern::FMLAv2f64_OP2: RC = &AArch64::FPR128RegClass; - if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) { + if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2) { Opc = AArch64::FMLAv2i64_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Indexed); @@ -7549,10 +7556,10 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; - case MachineCombinerPattern::FMLAv4i32_indexed_OP1: - case MachineCombinerPattern::FMLAv4f32_OP1: + case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1: + case AArch64MachineCombinerPattern::FMLAv4f32_OP1: RC = &AArch64::FPR128RegClass; - if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) { + if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1) { Opc = AArch64::FMLAv4i32_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, FMAInstKind::Indexed); @@ -7563,10 +7570,10 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; - case MachineCombinerPattern::FMLAv4i32_indexed_OP2: - case MachineCombinerPattern::FMLAv4f32_OP2: + case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2: + case AArch64MachineCombinerPattern::FMLAv4f32_OP2: RC = &AArch64::FPR128RegClass; - if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) { + if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2) { Opc = AArch64::FMLAv4i32_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Indexed); @@ -7577,70 +7584,70 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; - case MachineCombinerPattern::FMULSUBH_OP1: + case AArch64MachineCombinerPattern::FMULSUBH_OP1: Opc = AArch64::FNMSUBHrrr; RC = &AArch64::FPR16RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::FMULSUBS_OP1: + case AArch64MachineCombinerPattern::FMULSUBS_OP1: Opc = AArch64::FNMSUBSrrr; RC = &AArch64::FPR32RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::FMULSUBD_OP1: + case AArch64MachineCombinerPattern::FMULSUBD_OP1: Opc = AArch64::FNMSUBDrrr; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::FNMULSUBH_OP1: + case AArch64MachineCombinerPattern::FNMULSUBH_OP1: Opc = AArch64::FNMADDHrrr; RC = &AArch64::FPR16RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::FNMULSUBS_OP1: + case AArch64MachineCombinerPattern::FNMULSUBS_OP1: Opc = AArch64::FNMADDSrrr; RC = &AArch64::FPR32RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::FNMULSUBD_OP1: + case AArch64MachineCombinerPattern::FNMULSUBD_OP1: Opc = AArch64::FNMADDDrrr; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::FMULSUBH_OP2: + case AArch64MachineCombinerPattern::FMULSUBH_OP2: Opc = AArch64::FMSUBHrrr; RC = &AArch64::FPR16RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::FMULSUBS_OP2: + case AArch64MachineCombinerPattern::FMULSUBS_OP2: Opc = AArch64::FMSUBSrrr; RC = &AArch64::FPR32RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::FMULSUBD_OP2: + case AArch64MachineCombinerPattern::FMULSUBD_OP2: Opc = AArch64::FMSUBDrrr; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::FMLSv1i32_indexed_OP2: + case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2: Opc = AArch64::FMLSv1i32_indexed; RC = &AArch64::FPR32RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Indexed); break; - case MachineCombinerPattern::FMLSv1i64_indexed_OP2: + case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2: Opc = AArch64::FMLSv1i64_indexed; RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Indexed); break; - case MachineCombinerPattern::FMLSv4f16_OP1: - case MachineCombinerPattern::FMLSv4i16_indexed_OP1: { + case AArch64MachineCombinerPattern::FMLSv4f16_OP1: + case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1: { RC = &AArch64::FPR64RegClass; Register NewVR = MRI.createVirtualRegister(RC); MachineInstrBuilder MIB1 = @@ -7648,7 +7655,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( .add(Root.getOperand(2)); InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) { + if (Pattern == AArch64MachineCombinerPattern::FMLSv4f16_OP1) { Opc = AArch64::FMLAv4f16; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, FMAInstKind::Accumulator, &NewVR); @@ -7659,23 +7666,23 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; } - case MachineCombinerPattern::FMLSv4f16_OP2: + case AArch64MachineCombinerPattern::FMLSv4f16_OP2: RC = &AArch64::FPR64RegClass; Opc = AArch64::FMLSv4f16; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Accumulator); break; - case MachineCombinerPattern::FMLSv4i16_indexed_OP2: + case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2: RC = &AArch64::FPR64RegClass; Opc = AArch64::FMLSv4i16_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Indexed); break; - case MachineCombinerPattern::FMLSv2f32_OP2: - case MachineCombinerPattern::FMLSv2i32_indexed_OP2: + case AArch64MachineCombinerPattern::FMLSv2f32_OP2: + case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2: RC = &AArch64::FPR64RegClass; - if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) { + if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2) { Opc = AArch64::FMLSv2i32_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Indexed); @@ -7686,8 +7693,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; - case MachineCombinerPattern::FMLSv8f16_OP1: - case MachineCombinerPattern::FMLSv8i16_indexed_OP1: { + case AArch64MachineCombinerPattern::FMLSv8f16_OP1: + case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1: { RC = &AArch64::FPR128RegClass; Register NewVR = MRI.createVirtualRegister(RC); MachineInstrBuilder MIB1 = @@ -7695,7 +7702,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( .add(Root.getOperand(2)); InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) { + if (Pattern == AArch64MachineCombinerPattern::FMLSv8f16_OP1) { Opc = AArch64::FMLAv8f16; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, FMAInstKind::Accumulator, &NewVR); @@ -7706,23 +7713,23 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; } - case MachineCombinerPattern::FMLSv8f16_OP2: + case AArch64MachineCombinerPattern::FMLSv8f16_OP2: RC = &AArch64::FPR128RegClass; Opc = AArch64::FMLSv8f16; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Accumulator); break; - case MachineCombinerPattern::FMLSv8i16_indexed_OP2: + case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2: RC = &AArch64::FPR128RegClass; Opc = AArch64::FMLSv8i16_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Indexed); break; - case MachineCombinerPattern::FMLSv2f64_OP2: - case MachineCombinerPattern::FMLSv2i64_indexed_OP2: + case AArch64MachineCombinerPattern::FMLSv2f64_OP2: + case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2: RC = &AArch64::FPR128RegClass; - if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) { + if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2) { Opc = AArch64::FMLSv2i64_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Indexed); @@ -7733,10 +7740,10 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; - case MachineCombinerPattern::FMLSv4f32_OP2: - case MachineCombinerPattern::FMLSv4i32_indexed_OP2: + case AArch64MachineCombinerPattern::FMLSv4f32_OP2: + case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2: RC = &AArch64::FPR128RegClass; - if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) { + if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2) { Opc = AArch64::FMLSv4i32_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, FMAInstKind::Indexed); @@ -7746,8 +7753,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence( FMAInstKind::Accumulator); } break; - case MachineCombinerPattern::FMLSv2f32_OP1: - case MachineCombinerPattern::FMLSv2i32_indexed_OP1: { + case AArch64MachineCombinerPattern::FMLSv2f32_OP1: + case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1: { RC = &AArch64::FPR64RegClass; Register NewVR = MRI.createVirtualRegister(RC); MachineInstrBuilder MIB1 = @@ -7755,7 +7762,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( .add(Root.getOperand(2)); InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) { + if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1) { Opc = AArch64::FMLAv2i32_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, FMAInstKind::Indexed, &NewVR); @@ -7766,8 +7773,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; } - case MachineCombinerPattern::FMLSv4f32_OP1: - case MachineCombinerPattern::FMLSv4i32_indexed_OP1: { + case AArch64MachineCombinerPattern::FMLSv4f32_OP1: + case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1: { RC = &AArch64::FPR128RegClass; Register NewVR = MRI.createVirtualRegister(RC); MachineInstrBuilder MIB1 = @@ -7775,7 +7782,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( .add(Root.getOperand(2)); InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) { + if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1) { Opc = AArch64::FMLAv4i32_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, FMAInstKind::Indexed, &NewVR); @@ -7786,8 +7793,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; } - case MachineCombinerPattern::FMLSv2f64_OP1: - case MachineCombinerPattern::FMLSv2i64_indexed_OP1: { + case AArch64MachineCombinerPattern::FMLSv2f64_OP1: + case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1: { RC = &AArch64::FPR128RegClass; Register NewVR = MRI.createVirtualRegister(RC); MachineInstrBuilder MIB1 = @@ -7795,7 +7802,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( .add(Root.getOperand(2)); InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) { + if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1) { Opc = AArch64::FMLAv2i64_indexed; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, FMAInstKind::Indexed, &NewVR); @@ -7806,47 +7813,52 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; } - case MachineCombinerPattern::FMULv2i32_indexed_OP1: - case MachineCombinerPattern::FMULv2i32_indexed_OP2: { + case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1: + case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2: { unsigned IdxDupOp = - (Pattern == MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 : 2; + (Pattern == AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 + : 2; genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed, &AArch64::FPR128RegClass, MRI); break; } - case MachineCombinerPattern::FMULv2i64_indexed_OP1: - case MachineCombinerPattern::FMULv2i64_indexed_OP2: { + case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1: + case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2: { unsigned IdxDupOp = - (Pattern == MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 : 2; + (Pattern == AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 + : 2; genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed, &AArch64::FPR128RegClass, MRI); break; } - case MachineCombinerPattern::FMULv4i16_indexed_OP1: - case MachineCombinerPattern::FMULv4i16_indexed_OP2: { + case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1: + case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2: { unsigned IdxDupOp = - (Pattern == MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 : 2; + (Pattern == AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 + : 2; genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed, &AArch64::FPR128_loRegClass, MRI); break; } - case MachineCombinerPattern::FMULv4i32_indexed_OP1: - case MachineCombinerPattern::FMULv4i32_indexed_OP2: { + case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1: + case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2: { unsigned IdxDupOp = - (Pattern == MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 : 2; + (Pattern == AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 + : 2; genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed, &AArch64::FPR128RegClass, MRI); break; } - case MachineCombinerPattern::FMULv8i16_indexed_OP1: - case MachineCombinerPattern::FMULv8i16_indexed_OP2: { + case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1: + case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2: { unsigned IdxDupOp = - (Pattern == MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 : 2; + (Pattern == AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 + : 2; genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed, &AArch64::FPR128_loRegClass, MRI); break; } - case MachineCombinerPattern::FNMADD: { + case AArch64MachineCombinerPattern::FNMADD: { MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs); break; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 2f10f80f4bdf7..9a2914891675c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -33,6 +33,146 @@ static const MachineMemOperand::Flags MOStridedAccess = #define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access" +// AArch64 MachineCombiner patterns +enum AArch64MachineCombinerPattern : unsigned { + // These are patterns used to reduce the length of dependence chain. + SUBADD_OP1 = MachineCombinerPattern::TARGET_PATTERN_START, + SUBADD_OP2, + + // These are multiply-add patterns matched by the AArch64 machine combiner. + MULADDW_OP1, + MULADDW_OP2, + MULSUBW_OP1, + MULSUBW_OP2, + MULADDWI_OP1, + MULSUBWI_OP1, + MULADDX_OP1, + MULADDX_OP2, + MULSUBX_OP1, + MULSUBX_OP2, + MULADDXI_OP1, + MULSUBXI_OP1, + // NEON integers vectors + MULADDv8i8_OP1, + MULADDv8i8_OP2, + MULADDv16i8_OP1, + MULADDv16i8_OP2, + MULADDv4i16_OP1, + MULADDv4i16_OP2, + MULADDv8i16_OP1, + MULADDv8i16_OP2, + MULADDv2i32_OP1, + MULADDv2i32_OP2, + MULADDv4i32_OP1, + MULADDv4i32_OP2, + + MULSUBv8i8_OP1, + MULSUBv8i8_OP2, + MULSUBv16i8_OP1, + MULSUBv16i8_OP2, + MULSUBv4i16_OP1, + MULSUBv4i16_OP2, + MULSUBv8i16_OP1, + MULSUBv8i16_OP2, + MULSUBv2i32_OP1, + MULSUBv2i32_OP2, + MULSUBv4i32_OP1, + MULSUBv4i32_OP2, + + MULADDv4i16_indexed_OP1, + MULADDv4i16_indexed_OP2, + MULADDv8i16_indexed_OP1, + MULADDv8i16_indexed_OP2, + MULADDv2i32_indexed_OP1, + MULADDv2i32_indexed_OP2, + MULADDv4i32_indexed_OP1, + MULADDv4i32_indexed_OP2, + + MULSUBv4i16_indexed_OP1, + MULSUBv4i16_indexed_OP2, + MULSUBv8i16_indexed_OP1, + MULSUBv8i16_indexed_OP2, + MULSUBv2i32_indexed_OP1, + MULSUBv2i32_indexed_OP2, + MULSUBv4i32_indexed_OP1, + MULSUBv4i32_indexed_OP2, + + // Floating Point + FMULADDH_OP1, + FMULADDH_OP2, + FMULSUBH_OP1, + FMULSUBH_OP2, + FMULADDS_OP1, + FMULADDS_OP2, + FMULSUBS_OP1, + FMULSUBS_OP2, + FMULADDD_OP1, + FMULADDD_OP2, + FMULSUBD_OP1, + FMULSUBD_OP2, + FNMULSUBH_OP1, + FNMULSUBS_OP1, + FNMULSUBD_OP1, + FMLAv1i32_indexed_OP1, + FMLAv1i32_indexed_OP2, + FMLAv1i64_indexed_OP1, + FMLAv1i64_indexed_OP2, + FMLAv4f16_OP1, + FMLAv4f16_OP2, + FMLAv8f16_OP1, + FMLAv8f16_OP2, + FMLAv2f32_OP2, + FMLAv2f32_OP1, + FMLAv2f64_OP1, + FMLAv2f64_OP2, + FMLAv4i16_indexed_OP1, + FMLAv4i16_indexed_OP2, + FMLAv8i16_indexed_OP1, + FMLAv8i16_indexed_OP2, + FMLAv2i32_indexed_OP1, + FMLAv2i32_indexed_OP2, + FMLAv2i64_indexed_OP1, + FMLAv2i64_indexed_OP2, + FMLAv4f32_OP1, + FMLAv4f32_OP2, + FMLAv4i32_indexed_OP1, + FMLAv4i32_indexed_OP2, + FMLSv1i32_indexed_OP2, + FMLSv1i64_indexed_OP2, + FMLSv4f16_OP1, + FMLSv4f16_OP2, + FMLSv8f16_OP1, + FMLSv8f16_OP2, + FMLSv2f32_OP1, + FMLSv2f32_OP2, + FMLSv2f64_OP1, + FMLSv2f64_OP2, + FMLSv4i16_indexed_OP1, + FMLSv4i16_indexed_OP2, + FMLSv8i16_indexed_OP1, + FMLSv8i16_indexed_OP2, + FMLSv2i32_indexed_OP1, + FMLSv2i32_indexed_OP2, + FMLSv2i64_indexed_OP1, + FMLSv2i64_indexed_OP2, + FMLSv4f32_OP1, + FMLSv4f32_OP2, + FMLSv4i32_indexed_OP1, + FMLSv4i32_indexed_OP2, + + FMULv2i32_indexed_OP1, + FMULv2i32_indexed_OP2, + FMULv2i64_indexed_OP1, + FMULv2i64_indexed_OP2, + FMULv4i16_indexed_OP1, + FMULv4i16_indexed_OP2, + FMULv4i32_indexed_OP1, + FMULv4i32_indexed_OP2, + FMULv8i16_indexed_OP1, + FMULv8i16_indexed_OP2, + + FNMADD, +}; class AArch64InstrInfo final : public AArch64GenInstrInfo { const AArch64RegisterInfo RI; const AArch64Subtarget &Subtarget; @@ -283,17 +423,17 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { const MachineRegisterInfo *MRI) const override; bool optimizeCondBranch(MachineInstr &MI) const override; + CombinerObjective getCombinerObjective(unsigned Pattern) const override; /// Return true when a code sequence can improve throughput. It /// should be called only for instructions in loops. /// \param Pattern - combiner pattern - bool isThroughputPattern(MachineCombinerPattern Pattern) const override; + bool isThroughputPattern(unsigned Pattern) const override; /// Return true when there is potentially a faster code sequence /// for an instruction chain ending in ``Root``. All potential patterns are /// listed in the ``Patterns`` array. - bool - getMachineCombinerPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns, - bool DoRegPressureReduce) const override; + bool getMachineCombinerPatterns(MachineInstr &Root, + SmallVectorImpl &Patterns, + bool DoRegPressureReduce) const override; /// Return true when Inst is associative and commutative so that it can be /// reassociated. If Invert is true, then the inverse of Inst operation must /// be checked. @@ -302,7 +442,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { /// When getMachineCombinerPatterns() finds patterns, this function generates /// the instructions that could replace the original code sequence void genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern Pattern, + MachineInstr &Root, unsigned Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const override; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 5f5eb31a5a85f..93874d65531ae 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -348,9 +348,9 @@ int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const { // register with D. After the transformation, A and D must be assigned with // same hardware register due to TIE attribute of FMA instructions. // -bool PPCInstrInfo::getFMAPatterns( - MachineInstr &Root, SmallVectorImpl &Patterns, - bool DoRegPressureReduce) const { +bool PPCInstrInfo::getFMAPatterns(MachineInstr &Root, + SmallVectorImpl &Patterns, + bool DoRegPressureReduce) const { MachineBasicBlock *MBB = Root.getParent(); const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); const TargetRegisterInfo *TRI = &getRegisterInfo(); @@ -476,7 +476,7 @@ bool PPCInstrInfo::getFMAPatterns( if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR && IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) { LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n"); - Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BCA); + Patterns.push_back(PPCMachineCombinerPattern::REASSOC_XY_BCA); return true; } @@ -484,7 +484,7 @@ bool PPCInstrInfo::getFMAPatterns( if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL && IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) { LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n"); - Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BAC); + Patterns.push_back(PPCMachineCombinerPattern::REASSOC_XY_BAC); return true; } } @@ -511,12 +511,12 @@ bool PPCInstrInfo::getFMAPatterns( MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA); AddOpIdx = -1; if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) { - Patterns.push_back(MachineCombinerPattern::REASSOC_XMM_AMM_BMM); + Patterns.push_back(PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM); LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n"); return true; } if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) { - Patterns.push_back(MachineCombinerPattern::REASSOC_XY_AMM_BMM); + Patterns.push_back(PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM); LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n"); return true; } @@ -524,7 +524,7 @@ bool PPCInstrInfo::getFMAPatterns( } void PPCInstrInfo::finalizeInsInstrs( - MachineInstr &Root, MachineCombinerPattern &P, + MachineInstr &Root, unsigned &Pattern, SmallVectorImpl &InsInstrs) const { assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!"); @@ -542,12 +542,12 @@ void PPCInstrInfo::finalizeInsInstrs( // For now we only need to fix up placeholder for register pressure reduce // patterns. Register ConstReg = 0; - switch (P) { - case MachineCombinerPattern::REASSOC_XY_BCA: + switch (Pattern) { + case PPCMachineCombinerPattern::REASSOC_XY_BCA: ConstReg = TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI); break; - case MachineCombinerPattern::REASSOC_XY_BAC: + case PPCMachineCombinerPattern::REASSOC_XY_BAC: ConstReg = TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI); break; @@ -737,8 +737,21 @@ PPCInstrInfo::getConstantFromConstantPool(MachineInstr *I) const { return nullptr; } +CombinerObjective PPCInstrInfo::getCombinerObjective(unsigned Pattern) const { + switch (Pattern) { + case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM: + case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM: + return CombinerObjective::MustReduceDepth; + case PPCMachineCombinerPattern::REASSOC_XY_BCA: + case PPCMachineCombinerPattern::REASSOC_XY_BAC: + return CombinerObjective::MustReduceRegisterPressure; + default: + return TargetInstrInfo::getCombinerObjective(Pattern); + } +} + bool PPCInstrInfo::getMachineCombinerPatterns( - MachineInstr &Root, SmallVectorImpl &Patterns, + MachineInstr &Root, SmallVectorImpl &Patterns, bool DoRegPressureReduce) const { // Using the machine combiner in this way is potentially expensive, so // restrict to when aggressive optimizations are desired. @@ -753,15 +766,15 @@ bool PPCInstrInfo::getMachineCombinerPatterns( } void PPCInstrInfo::genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern Pattern, + MachineInstr &Root, unsigned Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const { switch (Pattern) { - case MachineCombinerPattern::REASSOC_XY_AMM_BMM: - case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: - case MachineCombinerPattern::REASSOC_XY_BCA: - case MachineCombinerPattern::REASSOC_XY_BAC: + case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM: + case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM: + case PPCMachineCombinerPattern::REASSOC_XY_BCA: + case PPCMachineCombinerPattern::REASSOC_XY_BAC: reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg); break; default: @@ -773,7 +786,7 @@ void PPCInstrInfo::genAlternativeCodeSequence( } void PPCInstrInfo::reassociateFMA( - MachineInstr &Root, MachineCombinerPattern Pattern, + MachineInstr &Root, unsigned Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const { @@ -790,8 +803,8 @@ void PPCInstrInfo::reassociateFMA( assert(Idx >= 0 && "Root must be a FMA instruction"); bool IsILPReassociate = - (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) || - (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM); + (Pattern == PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM) || + (Pattern == PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM); uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx]; uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx]; @@ -801,18 +814,18 @@ void PPCInstrInfo::reassociateFMA( switch (Pattern) { default: llvm_unreachable("not recognized pattern!"); - case MachineCombinerPattern::REASSOC_XY_AMM_BMM: - case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: + case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM: + case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM: Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg()); Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg()); break; - case MachineCombinerPattern::REASSOC_XY_BAC: { + case PPCMachineCombinerPattern::REASSOC_XY_BAC: { Register MULReg = TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI); Leaf = MRI.getVRegDef(MULReg); break; } - case MachineCombinerPattern::REASSOC_XY_BCA: { + case PPCMachineCombinerPattern::REASSOC_XY_BCA: { Register MULReg = TRI->lookThruCopyLike( Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI); Leaf = MRI.getVRegDef(MULReg); @@ -853,10 +866,10 @@ void PPCInstrInfo::reassociateFMA( if (IsILPReassociate) GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21); - if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) { + if (Pattern == PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM) { GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11); GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX); - } else if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) { + } else if (Pattern == PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM) { GetOperandInfo(Leaf->getOperand(1), RegX, KillX); GetOperandInfo(Leaf->getOperand(2), RegY, KillY); } else { @@ -881,7 +894,7 @@ void PPCInstrInfo::reassociateFMA( } Register NewVRD = 0; - if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) { + if (Pattern == PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM) { NewVRD = MRI.createVirtualRegister(RC); InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2)); } @@ -901,7 +914,7 @@ void PPCInstrInfo::reassociateFMA( switch (Pattern) { default: llvm_unreachable("not recognized pattern!"); - case MachineCombinerPattern::REASSOC_XY_AMM_BMM: { + case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM: { // Create new instructions for insertion. MachineInstrBuilder MINewB = BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB) @@ -936,7 +949,7 @@ void PPCInstrInfo::reassociateFMA( InsInstrs.push_back(MINewC); break; } - case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: { + case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM: { assert(NewVRD && "new FMA register not created!"); // Create new instructions for insertion. MachineInstrBuilder MINewA = @@ -980,11 +993,11 @@ void PPCInstrInfo::reassociateFMA( InsInstrs.push_back(MINewC); break; } - case MachineCombinerPattern::REASSOC_XY_BAC: - case MachineCombinerPattern::REASSOC_XY_BCA: { + case PPCMachineCombinerPattern::REASSOC_XY_BAC: + case PPCMachineCombinerPattern::REASSOC_XY_BCA: { Register VarReg; bool KillVarReg = false; - if (Pattern == MachineCombinerPattern::REASSOC_XY_BCA) { + if (Pattern == PPCMachineCombinerPattern::REASSOC_XY_BCA) { VarReg = RegM31; KillVarReg = KillM31; } else { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 045932dc0d3ba..1e2687f92c61e 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -85,6 +85,19 @@ enum SpillOpcodeKey { SOK_LastOpcodeSpill // This must be last on the enum. }; +// PPC MachineCombiner patterns +enum PPCMachineCombinerPattern : unsigned { + // These are patterns matched by the PowerPC to reassociate FMA chains. + REASSOC_XY_AMM_BMM = MachineCombinerPattern::TARGET_PATTERN_START, + REASSOC_XMM_AMM_BMM, + + // These are patterns matched by the PowerPC to reassociate FMA and FSUB to + // reduce register pressure. + REASSOC_XY_BCA, + REASSOC_XY_BAC, + +}; + // Define list of load and store spill opcodes. #define NoInstr PPC::INSTRUCTION_LIST_END #define Pwr8LoadOpcodes \ @@ -224,7 +237,7 @@ class PPCInstrInfo : public PPCGenInstrInfo { ArrayRef getLoadOpcodesForSpillArray() const; unsigned getSpillIndex(const TargetRegisterClass *RC) const; int16_t getFMAOpIdxInfo(unsigned Opcode) const; - void reassociateFMA(MachineInstr &Root, MachineCombinerPattern Pattern, + void reassociateFMA(MachineInstr &Root, unsigned Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const; @@ -350,7 +363,7 @@ class PPCInstrInfo : public PPCGenInstrInfo { /// When getMachineCombinerPatterns() finds patterns, this function generates /// the instructions that could replace the original code sequence void genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern Pattern, + MachineInstr &Root, unsigned Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const override; @@ -358,15 +371,16 @@ class PPCInstrInfo : public PPCGenInstrInfo { /// Return true when there is potentially a faster code sequence for a fma /// chain ending in \p Root. All potential patterns are output in the \p /// P array. - bool getFMAPatterns(MachineInstr &Root, - SmallVectorImpl &P, + bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl &Patterns, bool DoRegPressureReduce) const; + CombinerObjective getCombinerObjective(unsigned Pattern) const override; + /// Return true when there is potentially a faster code sequence /// for an instruction chain ending in . All potential patterns are /// output in the array. bool getMachineCombinerPatterns(MachineInstr &Root, - SmallVectorImpl &P, + SmallVectorImpl &Patterns, bool DoRegPressureReduce) const override; /// On PowerPC, we leverage machine combiner pass to reduce register pressure @@ -380,7 +394,7 @@ class PPCInstrInfo : public PPCGenInstrInfo { /// Fixup the placeholders we put in genAlternativeCodeSequence() for /// MachineCombiner. void - finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P, + finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern, SmallVectorImpl &InsInstrs) const override; bool isAssociativeAndCommutative(const MachineInstr &Inst, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 84d754e3cbcf3..d78f5bd9dedf3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1560,7 +1560,7 @@ MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const { } void RISCVInstrInfo::finalizeInsInstrs( - MachineInstr &Root, MachineCombinerPattern &P, + MachineInstr &Root, unsigned &Pattern, SmallVectorImpl &InsInstrs) const { int16_t FrmOpIdx = RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm); @@ -1748,10 +1748,9 @@ static bool canCombineFPFusedMultiply(const MachineInstr &Root, return RISCV::hasEqualFRM(Root, *MI); } -static bool -getFPFusedMultiplyPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns, - bool DoRegPressureReduce) { +static bool getFPFusedMultiplyPatterns(MachineInstr &Root, + SmallVectorImpl &Patterns, + bool DoRegPressureReduce) { unsigned Opc = Root.getOpcode(); bool IsFAdd = isFADD(Opc); if (!IsFAdd && !isFSUB(Opc)) @@ -1759,21 +1758,21 @@ getFPFusedMultiplyPatterns(MachineInstr &Root, bool Added = false; if (canCombineFPFusedMultiply(Root, Root.getOperand(1), DoRegPressureReduce)) { - Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_AX - : MachineCombinerPattern::FMSUB); + Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_AX + : RISCVMachineCombinerPattern::FMSUB); Added = true; } if (canCombineFPFusedMultiply(Root, Root.getOperand(2), DoRegPressureReduce)) { - Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_XA - : MachineCombinerPattern::FNMSUB); + Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_XA + : RISCVMachineCombinerPattern::FNMSUB); Added = true; } return Added; } static bool getFPPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns, + SmallVectorImpl &Patterns, bool DoRegPressureReduce) { return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce); } @@ -1832,9 +1831,8 @@ static unsigned getSHXADDShiftAmount(unsigned Opc) { // Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into // (sh3add (sh2add Y, Z), X). -static bool -getSHXADDPatterns(const MachineInstr &Root, - SmallVectorImpl &Patterns) { +static bool getSHXADDPatterns(const MachineInstr &Root, + SmallVectorImpl &Patterns) { unsigned ShiftAmt = getSHXADDShiftAmount(Root.getOpcode()); if (!ShiftAmt) return false; @@ -1847,19 +1845,31 @@ getSHXADDPatterns(const MachineInstr &Root, bool Found = false; if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(1), ShiftAmt)) { - Patterns.push_back(MachineCombinerPattern::SHXADD_ADD_SLLI_OP1); + Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1); Found = true; } if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(2), ShiftAmt)) { - Patterns.push_back(MachineCombinerPattern::SHXADD_ADD_SLLI_OP2); + Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2); Found = true; } return Found; } +CombinerObjective RISCVInstrInfo::getCombinerObjective(unsigned Pattern) const { + switch (Pattern) { + case RISCVMachineCombinerPattern::FMADD_AX: + case RISCVMachineCombinerPattern::FMADD_XA: + case RISCVMachineCombinerPattern::FMSUB: + case RISCVMachineCombinerPattern::FNMSUB: + return CombinerObjective::MustReduceDepth; + default: + return TargetInstrInfo::getCombinerObjective(Pattern); + } +} + bool RISCVInstrInfo::getMachineCombinerPatterns( - MachineInstr &Root, SmallVectorImpl &Patterns, + MachineInstr &Root, SmallVectorImpl &Patterns, bool DoRegPressureReduce) const { if (getFPPatterns(Root, Patterns, DoRegPressureReduce)) @@ -1872,8 +1882,7 @@ bool RISCVInstrInfo::getMachineCombinerPatterns( DoRegPressureReduce); } -static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, - MachineCombinerPattern Pattern) { +static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern) { switch (RootOpc) { default: llvm_unreachable("Unexpected opcode"); @@ -1884,32 +1893,32 @@ static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, case RISCV::FADD_D: return RISCV::FMADD_D; case RISCV::FSUB_H: - return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_H - : RISCV::FNMSUB_H; + return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_H + : RISCV::FNMSUB_H; case RISCV::FSUB_S: - return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_S - : RISCV::FNMSUB_S; + return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_S + : RISCV::FNMSUB_S; case RISCV::FSUB_D: - return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_D - : RISCV::FNMSUB_D; + return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_D + : RISCV::FNMSUB_D; } } -static unsigned getAddendOperandIdx(MachineCombinerPattern Pattern) { +static unsigned getAddendOperandIdx(unsigned Pattern) { switch (Pattern) { default: llvm_unreachable("Unexpected pattern"); - case MachineCombinerPattern::FMADD_AX: - case MachineCombinerPattern::FMSUB: + case RISCVMachineCombinerPattern::FMADD_AX: + case RISCVMachineCombinerPattern::FMSUB: return 2; - case MachineCombinerPattern::FMADD_XA: - case MachineCombinerPattern::FNMSUB: + case RISCVMachineCombinerPattern::FMADD_XA: + case RISCVMachineCombinerPattern::FNMSUB: return 1; } } static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev, - MachineCombinerPattern Pattern, + unsigned Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs) { MachineFunction *MF = Root.getMF(); @@ -2013,7 +2022,7 @@ genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx, } void RISCVInstrInfo::genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern Pattern, + MachineInstr &Root, unsigned Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const { @@ -2023,22 +2032,22 @@ void RISCVInstrInfo::genAlternativeCodeSequence( TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg); return; - case MachineCombinerPattern::FMADD_AX: - case MachineCombinerPattern::FMSUB: { + case RISCVMachineCombinerPattern::FMADD_AX: + case RISCVMachineCombinerPattern::FMSUB: { MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg()); combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs); return; } - case MachineCombinerPattern::FMADD_XA: - case MachineCombinerPattern::FNMSUB: { + case RISCVMachineCombinerPattern::FMADD_XA: + case RISCVMachineCombinerPattern::FNMSUB: { MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg()); combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs); return; } - case MachineCombinerPattern::SHXADD_ADD_SLLI_OP1: + case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1: genShXAddAddShift(Root, 1, InsInstrs, DelInstrs, InstrIdxForVirtReg); return; - case MachineCombinerPattern::SHXADD_ADD_SLLI_OP2: + case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2: genShXAddAddShift(Root, 2, InsInstrs, DelInstrs, InstrIdxForVirtReg); return; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 81d9c9db783c0..70fe7da85be0e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -49,6 +49,16 @@ unsigned getBrCond(CondCode CC); } // end of namespace RISCVCC +// RISCV MachineCombiner patterns +enum RISCVMachineCombinerPattern : unsigned { + FMADD_AX = MachineCombinerPattern::TARGET_PATTERN_START, + FMADD_XA, + FMSUB, + FNMSUB, + SHXADD_ADD_SLLI_OP1, + SHXADD_ADD_SLLI_OP2, +}; + class RISCVInstrInfo : public RISCVGenInstrInfo { public: @@ -240,17 +250,18 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { MachineTraceStrategy getMachineCombinerTraceStrategy() const override; - bool - getMachineCombinerPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns, - bool DoRegPressureReduce) const override; + CombinerObjective getCombinerObjective(unsigned Pattern) const override; + + bool getMachineCombinerPatterns(MachineInstr &Root, + SmallVectorImpl &Patterns, + bool DoRegPressureReduce) const override; void - finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P, + finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern, SmallVectorImpl &InsInstrs) const override; void genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern Pattern, + MachineInstr &Root, unsigned Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const override; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index a5b2e4895eded..510b08f9901a2 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -10578,7 +10578,7 @@ void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB, } bool X86InstrInfo::getMachineCombinerPatterns( - MachineInstr &Root, SmallVectorImpl &Patterns, + MachineInstr &Root, SmallVectorImpl &Patterns, bool DoRegPressureReduce) const { unsigned Opc = Root.getOpcode(); switch (Opc) { @@ -10587,7 +10587,7 @@ bool X86InstrInfo::getMachineCombinerPatterns( case X86::VPDPWSSDYrr: case X86::VPDPWSSDYrm: { if (!Subtarget.hasFastDPWSSD()) { - Patterns.push_back(MachineCombinerPattern::DPWSSD); + Patterns.push_back(X86MachineCombinerPattern::DPWSSD); return true; } break; @@ -10599,8 +10599,8 @@ bool X86InstrInfo::getMachineCombinerPatterns( case X86::VPDPWSSDZr: case X86::VPDPWSSDZm: { if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) { - Patterns.push_back(MachineCombinerPattern::DPWSSD); - return true; + Patterns.push_back(X86MachineCombinerPattern::DPWSSD); + return true; } break; } @@ -10700,7 +10700,7 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII, } void X86InstrInfo::genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern Pattern, + MachineInstr &Root, unsigned Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const { @@ -10710,7 +10710,7 @@ void X86InstrInfo::genAlternativeCodeSequence( TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg); return; - case MachineCombinerPattern::DPWSSD: + case X86MachineCombinerPattern::DPWSSD: genAlternativeDpCodeSequence(Root, *this, InsInstrs, DelInstrs, InstrIdxForVirtReg); return; diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index e719be0caf3ee..5407ede69a91c 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -26,6 +26,12 @@ namespace llvm { class X86Subtarget; +// X86 MachineCombiner patterns +enum X86MachineCombinerPattern : unsigned { + // X86 VNNI + DPWSSD = MachineCombinerPattern::TARGET_PATTERN_START, +}; + namespace X86 { enum AsmComments { @@ -607,16 +613,15 @@ class X86InstrInfo final : public X86GenInstrInfo { std::optional isCopyInstrImpl(const MachineInstr &MI) const override; - bool - getMachineCombinerPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns, - bool DoRegPressureReduce) const override; + bool getMachineCombinerPatterns(MachineInstr &Root, + SmallVectorImpl &Patterns, + bool DoRegPressureReduce) const override; /// When getMachineCombinerPatterns() finds potential patterns, /// this function generates the instructions that could replace the /// original code sequence. void genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern Pattern, + MachineInstr &Root, unsigned Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const override; From efb8cc5ddb03897795dc153a03d0c1548c8ee4a7 Mon Sep 17 00:00:00 2001 From: paperchalice Date: Thu, 11 Apr 2024 12:27:18 +0800 Subject: [PATCH 096/886] [NewPM] Fix print-changed-dot-cfg failure (#88351) Fix failure in #80946. --- llvm/test/Other/ChangePrinters/DotCfg/print-changed-dot-cfg.mir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Other/ChangePrinters/DotCfg/print-changed-dot-cfg.mir b/llvm/test/Other/ChangePrinters/DotCfg/print-changed-dot-cfg.mir index 340ece93aa02b..40603630c6134 100644 --- a/llvm/test/Other/ChangePrinters/DotCfg/print-changed-dot-cfg.mir +++ b/llvm/test/Other/ChangePrinters/DotCfg/print-changed-dot-cfg.mir @@ -1,7 +1,7 @@ # REQUIRES: x86-registered-target # Simple functionality check. # RUN: rm -rf %t && mkdir -p %t -# RUN: llc -filetype=null -print-changed=dot-cfg -passes=no-op-machine-function -dot-cfg-dir=%t %s +# RUN: llc -mtriple=x86_64-pc-linux-gnu -filetype=null -print-changed=dot-cfg -passes=no-op-machine-function -dot-cfg-dir=%t %s # RUN: ls %t/*.pdf %t/passes.html | count 3 --- From 3197f9d8b0efc3efdc531421bd11c16305d9b1ff Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Thu, 11 Apr 2024 12:48:52 +0800 Subject: [PATCH 097/886] [InstSimplify] Make sure the simplified value doesn't generate poison in threadBinOpOverSelect (#87075) Alive2: https://alive2.llvm.org/ce/z/y_Jmdn Fix https://github.com/llvm/llvm-project/issues/87042. --- llvm/lib/Analysis/InstructionSimplify.cpp | 3 +- llvm/test/Transforms/InstSimplify/pr87042.ll | 42 ++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/InstSimplify/pr87042.ll diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 9ff3faff79902..3c943a09a9c23 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -440,7 +440,8 @@ static Value *threadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, // Check that the simplified value has the form "X op Y" where "op" is the // same as the original operation. Instruction *Simplified = dyn_cast(FV ? FV : TV); - if (Simplified && Simplified->getOpcode() == unsigned(Opcode)) { + if (Simplified && Simplified->getOpcode() == unsigned(Opcode) && + !Simplified->hasPoisonGeneratingFlags()) { // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS". // We already know that "op" is the same as for the simplified value. See // if the operands match too. If so, return the simplified value. diff --git a/llvm/test/Transforms/InstSimplify/pr87042.ll b/llvm/test/Transforms/InstSimplify/pr87042.ll new file mode 100644 index 0000000000000..800d27c9e6504 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/pr87042.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=instsimplify -S | FileCheck %s + +; %or2 cannot be folded into %or1 because %or1 has disjoint. +; TODO: Can we move the logic into InstCombine and drop the disjoint flag? +define i64 @test(i1 %cond, i64 %x) { +; CHECK-LABEL: define i64 @test( +; CHECK-SAME: i1 [[COND:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[OR1:%.*]] = or disjoint i64 [[X]], 7 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[COND]], i64 [[OR1]], i64 [[X]] +; CHECK-NEXT: [[OR2:%.*]] = or i64 [[SEL1]], 7 +; CHECK-NEXT: ret i64 [[OR2]] +; + %or1 = or disjoint i64 %x, 7 + %sel1 = select i1 %cond, i64 %or1, i64 %x + %or2 = or i64 %sel1, 7 + ret i64 %or2 +} + +define i64 @pr87042(i64 %x) { +; CHECK-LABEL: define i64 @pr87042( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: [[AND1:%.*]] = and i64 [[X]], 65535 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[AND1]], 0 +; CHECK-NEXT: [[OR1:%.*]] = or disjoint i64 [[X]], 7 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i64 [[OR1]], i64 [[X]] +; CHECK-NEXT: [[AND2:%.*]] = and i64 [[SEL1]], 16776960 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[AND2]], 0 +; CHECK-NEXT: [[OR2:%.*]] = or i64 [[SEL1]], 7 +; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i64 [[OR2]], i64 [[SEL1]] +; CHECK-NEXT: ret i64 [[SEL2]] +; + %and1 = and i64 %x, 65535 + %cmp1 = icmp eq i64 %and1, 0 + %or1 = or disjoint i64 %x, 7 + %sel1 = select i1 %cmp1, i64 %or1, i64 %x + %and2 = and i64 %sel1, 16776960 + %cmp2 = icmp eq i64 %and2, 0 + %or2 = or i64 %sel1, 7 + %sel2 = select i1 %cmp2, i64 %or2, i64 %sel1 + ret i64 %sel2 +} From 2bede6873dbe7021b306d3e5bec59d0fba2dd26c Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 10 Apr 2024 22:03:20 -0700 Subject: [PATCH 098/886] [memprof] Rename RawMemProfReader.{cpp,h} to MemProfReader.{cpp,h} (NFC) (#88200) This patch renames RawMemProfReader.{cpp,h} to MemProfReader.{cpp,h}, respectively. Also, it re-creates RawMemProfReader.h just to include MemProfReader.h for compatibility with out-of-tree users. --- llvm/include/llvm/ProfileData/MemProfReader.h | 214 ++++++++++++++++++ .../llvm/ProfileData/RawMemProfReader.h | 207 +---------------- llvm/lib/ProfileData/CMakeLists.txt | 2 +- ...RawMemProfReader.cpp => MemProfReader.cpp} | 6 +- llvm/tools/llvm-profdata/llvm-profdata.cpp | 2 +- llvm/unittests/ProfileData/MemProfTest.cpp | 2 +- .../secondary/llvm/lib/ProfileData/BUILD.gn | 2 +- 7 files changed, 227 insertions(+), 208 deletions(-) create mode 100644 llvm/include/llvm/ProfileData/MemProfReader.h rename llvm/lib/ProfileData/{RawMemProfReader.cpp => MemProfReader.cpp} (99%) diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h new file mode 100644 index 0000000000000..89f49a20a6089 --- /dev/null +++ b/llvm/include/llvm/ProfileData/MemProfReader.h @@ -0,0 +1,214 @@ +//===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading MemProf profiling data. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_MEMPROFREADER_H_ +#define LLVM_PROFILEDATA_MEMPROFREADER_H_ + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/ProfileData/MemProf.h" +#include "llvm/ProfileData/MemProfData.inc" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" + +#include + +namespace llvm { +namespace memprof { +// A class for memprof profile data populated directly from external +// sources. +class MemProfReader { +public: + // The MemProfReader only holds memory profile information. + InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; } + + using GuidMemProfRecordPair = std::pair; + using Iterator = InstrProfIterator; + Iterator end() { return Iterator(); } + Iterator begin() { + Iter = FunctionProfileData.begin(); + return Iterator(this); + } + + // Return a const reference to the internal Id to Frame mappings. + const llvm::DenseMap &getFrameMapping() const { + return IdToFrame; + } + + // Return a const reference to the internal function profile data. + const llvm::MapVector & + getProfileData() const { + return FunctionProfileData; + } + + virtual Error + readNextRecord(GuidMemProfRecordPair &GuidRecord, + std::function Callback = nullptr) { + if (FunctionProfileData.empty()) + return make_error(instrprof_error::empty_raw_profile); + + if (Iter == FunctionProfileData.end()) + return make_error(instrprof_error::eof); + + if (Callback == nullptr) + Callback = + std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1); + + const IndexedMemProfRecord &IndexedRecord = Iter->second; + GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, Callback)}; + Iter++; + return Error::success(); + } + + // Allow default construction for derived classes which can populate the + // contents after construction. + MemProfReader() = default; + virtual ~MemProfReader() = default; + + // Initialize the MemProfReader with the frame mappings and profile contents. + MemProfReader( + llvm::DenseMap FrameIdMap, + llvm::MapVector ProfData) + : IdToFrame(std::move(FrameIdMap)), + FunctionProfileData(std::move(ProfData)) {} + +protected: + // A helper method to extract the frame from the IdToFrame map. + const Frame &idToFrame(const FrameId Id) const { + auto It = IdToFrame.find(Id); + assert(It != IdToFrame.end() && "Id not found in map."); + return It->getSecond(); + } + // A mapping from FrameId (a hash of the contents) to the frame. + llvm::DenseMap IdToFrame; + // A mapping from function GUID, hash of the canonical function symbol to the + // memprof profile data for that function, i.e allocation and callsite info. + llvm::MapVector FunctionProfileData; + // An iterator to the internal function profile data structure. + llvm::MapVector::iterator Iter; +}; + +// Map from id (recorded from sanitizer stack depot) to virtual addresses for +// each program counter address in the callstack. +using CallStackMap = llvm::DenseMap>; + +// Specializes the MemProfReader class to populate the contents from raw binary +// memprof profiles from instrumentation based profiling. +class RawMemProfReader final : public MemProfReader { +public: + RawMemProfReader(const RawMemProfReader &) = delete; + RawMemProfReader &operator=(const RawMemProfReader &) = delete; + virtual ~RawMemProfReader() override = default; + + // Prints the contents of the profile in YAML format. + void printYAML(raw_ostream &OS); + + // Return true if the \p DataBuffer starts with magic bytes indicating it is + // a raw binary memprof profile. + static bool hasFormat(const MemoryBuffer &DataBuffer); + // Return true if the file at \p Path starts with magic bytes indicating it is + // a raw binary memprof profile. + static bool hasFormat(const StringRef Path); + + // Create a RawMemProfReader after sanity checking the contents of the file at + // \p Path or the \p Buffer. The binary from which the profile has been + // collected is specified via a path in \p ProfiledBinary. + static Expected> + create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false); + static Expected> + create(std::unique_ptr Buffer, StringRef ProfiledBinary, + bool KeepName = false); + + // Returns a list of build ids recorded in the segment information. + static std::vector peekBuildIds(MemoryBuffer *DataBuffer); + + virtual Error + readNextRecord(GuidMemProfRecordPair &GuidRecord, + std::function Callback) override; + + // Constructor for unittests only. + RawMemProfReader(std::unique_ptr Sym, + llvm::SmallVectorImpl &Seg, + llvm::MapVector &Prof, + CallStackMap &SM, bool KeepName = false) + : SegmentInfo(Seg.begin(), Seg.end()), CallstackProfileData(Prof), + StackMap(SM), KeepSymbolName(KeepName) { + // We don't call initialize here since there is no raw profile to read. The + // test should pass in the raw profile as structured data. + + // If there is an error here then the mock symbolizer has not been + // initialized properly. + if (Error E = symbolizeAndFilterStackFrames(std::move(Sym))) + report_fatal_error(std::move(E)); + if (Error E = mapRawProfileToRecords()) + report_fatal_error(std::move(E)); + } + +private: + RawMemProfReader(object::OwningBinary &&Bin, bool KeepName) + : Binary(std::move(Bin)), KeepSymbolName(KeepName) {} + // Initializes the RawMemProfReader with the contents in `DataBuffer`. + Error initialize(std::unique_ptr DataBuffer); + // Read and parse the contents of the `DataBuffer` as a binary format profile. + Error readRawProfile(std::unique_ptr DataBuffer); + // Initialize the segment mapping information for symbolization. + Error setupForSymbolization(); + // Symbolize and cache all the virtual addresses we encounter in the + // callstacks from the raw profile. Also prune callstack frames which we can't + // symbolize or those that belong to the runtime. For profile entries where + // the entire callstack is pruned, we drop the entry from the profile. + Error symbolizeAndFilterStackFrames( + std::unique_ptr Symbolizer); + // Construct memprof records for each function and store it in the + // `FunctionProfileData` map. A function may have allocation profile data or + // callsite data or both. + Error mapRawProfileToRecords(); + + object::SectionedAddress getModuleOffset(uint64_t VirtualAddress); + + // The profiled binary. + object::OwningBinary Binary; + // The preferred load address of the executable segment. + uint64_t PreferredTextSegmentAddress = 0; + // The base address of the text segment in the process during profiling. + uint64_t ProfiledTextSegmentStart = 0; + // The limit address of the text segment in the process during profiling. + uint64_t ProfiledTextSegmentEnd = 0; + + // The memory mapped segment information for all executable segments in the + // profiled binary (filtered from the raw profile using the build id). + llvm::SmallVector SegmentInfo; + + // A map from callstack id (same as key in CallStackMap below) to the heap + // information recorded for that allocation context. + llvm::MapVector CallstackProfileData; + CallStackMap StackMap; + + // Cached symbolization from PC to Frame. + llvm::DenseMap> SymbolizedFrame; + + // Whether to keep the symbol name for each frame after hashing. + bool KeepSymbolName = false; + // A mapping of the hash to symbol name, only used if KeepSymbolName is true. + llvm::DenseMap GuidToSymbolName; +}; +} // namespace memprof +} // namespace llvm + +#endif // LLVM_PROFILEDATA_MEMPROFREADER_H_ diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h index 6aa5caec65f79..5e06f26fffdc2 100644 --- a/llvm/include/llvm/ProfileData/RawMemProfReader.h +++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h @@ -1,6 +1,4 @@ -#ifndef LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ -#define LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ -//===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===// +//===- RawMemProfReader.h - Instrumented memory profiling reader *- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -8,207 +6,14 @@ // //===----------------------------------------------------------------------===// // -// This file contains support for reading MemProf profiling data. +// This file just includes MemProfReader.h for compatibility with +// out-of-tree users. // //===----------------------------------------------------------------------===// -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" -#include "llvm/DebugInfo/Symbolize/Symbolize.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/Object/Binary.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/ProfileData/InstrProfReader.h" -#include "llvm/ProfileData/MemProf.h" -#include "llvm/ProfileData/MemProfData.inc" -#include "llvm/Support/Error.h" -#include "llvm/Support/MemoryBuffer.h" - -#include - -namespace llvm { -namespace memprof { -// A class for memprof profile data populated directly from external -// sources. -// TODO: Rename this file to MemProfReader.h to better reflect the contents. -class MemProfReader { -public: - // The MemProfReader only holds memory profile information. - InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; } - - using GuidMemProfRecordPair = std::pair; - using Iterator = InstrProfIterator; - Iterator end() { return Iterator(); } - Iterator begin() { - Iter = FunctionProfileData.begin(); - return Iterator(this); - } - - // Return a const reference to the internal Id to Frame mappings. - const llvm::DenseMap &getFrameMapping() const { - return IdToFrame; - } - - // Return a const reference to the internal function profile data. - const llvm::MapVector & - getProfileData() const { - return FunctionProfileData; - } - - virtual Error - readNextRecord(GuidMemProfRecordPair &GuidRecord, - std::function Callback = nullptr) { - if (FunctionProfileData.empty()) - return make_error(instrprof_error::empty_raw_profile); - - if (Iter == FunctionProfileData.end()) - return make_error(instrprof_error::eof); - - if (Callback == nullptr) - Callback = - std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1); - - const IndexedMemProfRecord &IndexedRecord = Iter->second; - GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, Callback)}; - Iter++; - return Error::success(); - } - - // Allow default construction for derived classes which can populate the - // contents after construction. - MemProfReader() = default; - virtual ~MemProfReader() = default; - - // Initialize the MemProfReader with the frame mappings and profile contents. - MemProfReader( - llvm::DenseMap FrameIdMap, - llvm::MapVector ProfData) - : IdToFrame(std::move(FrameIdMap)), - FunctionProfileData(std::move(ProfData)) {} - -protected: - // A helper method to extract the frame from the IdToFrame map. - const Frame &idToFrame(const FrameId Id) const { - auto It = IdToFrame.find(Id); - assert(It != IdToFrame.end() && "Id not found in map."); - return It->getSecond(); - } - // A mapping from FrameId (a hash of the contents) to the frame. - llvm::DenseMap IdToFrame; - // A mapping from function GUID, hash of the canonical function symbol to the - // memprof profile data for that function, i.e allocation and callsite info. - llvm::MapVector FunctionProfileData; - // An iterator to the internal function profile data structure. - llvm::MapVector::iterator Iter; -}; - -// Map from id (recorded from sanitizer stack depot) to virtual addresses for -// each program counter address in the callstack. -using CallStackMap = llvm::DenseMap>; - -// Specializes the MemProfReader class to populate the contents from raw binary -// memprof profiles from instrumentation based profiling. -class RawMemProfReader final : public MemProfReader { -public: - RawMemProfReader(const RawMemProfReader &) = delete; - RawMemProfReader &operator=(const RawMemProfReader &) = delete; - virtual ~RawMemProfReader() override = default; - - // Prints the contents of the profile in YAML format. - void printYAML(raw_ostream &OS); - - // Return true if the \p DataBuffer starts with magic bytes indicating it is - // a raw binary memprof profile. - static bool hasFormat(const MemoryBuffer &DataBuffer); - // Return true if the file at \p Path starts with magic bytes indicating it is - // a raw binary memprof profile. - static bool hasFormat(const StringRef Path); - - // Create a RawMemProfReader after sanity checking the contents of the file at - // \p Path or the \p Buffer. The binary from which the profile has been - // collected is specified via a path in \p ProfiledBinary. - static Expected> - create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false); - static Expected> - create(std::unique_ptr Buffer, StringRef ProfiledBinary, - bool KeepName = false); - - // Returns a list of build ids recorded in the segment information. - static std::vector peekBuildIds(MemoryBuffer *DataBuffer); - - virtual Error - readNextRecord(GuidMemProfRecordPair &GuidRecord, - std::function Callback) override; - - // Constructor for unittests only. - RawMemProfReader(std::unique_ptr Sym, - llvm::SmallVectorImpl &Seg, - llvm::MapVector &Prof, - CallStackMap &SM, bool KeepName = false) - : SegmentInfo(Seg.begin(), Seg.end()), CallstackProfileData(Prof), - StackMap(SM), KeepSymbolName(KeepName) { - // We don't call initialize here since there is no raw profile to read. The - // test should pass in the raw profile as structured data. - - // If there is an error here then the mock symbolizer has not been - // initialized properly. - if (Error E = symbolizeAndFilterStackFrames(std::move(Sym))) - report_fatal_error(std::move(E)); - if (Error E = mapRawProfileToRecords()) - report_fatal_error(std::move(E)); - } - -private: - RawMemProfReader(object::OwningBinary &&Bin, bool KeepName) - : Binary(std::move(Bin)), KeepSymbolName(KeepName) {} - // Initializes the RawMemProfReader with the contents in `DataBuffer`. - Error initialize(std::unique_ptr DataBuffer); - // Read and parse the contents of the `DataBuffer` as a binary format profile. - Error readRawProfile(std::unique_ptr DataBuffer); - // Initialize the segment mapping information for symbolization. - Error setupForSymbolization(); - // Symbolize and cache all the virtual addresses we encounter in the - // callstacks from the raw profile. Also prune callstack frames which we can't - // symbolize or those that belong to the runtime. For profile entries where - // the entire callstack is pruned, we drop the entry from the profile. - Error symbolizeAndFilterStackFrames( - std::unique_ptr Symbolizer); - // Construct memprof records for each function and store it in the - // `FunctionProfileData` map. A function may have allocation profile data or - // callsite data or both. - Error mapRawProfileToRecords(); - - object::SectionedAddress getModuleOffset(uint64_t VirtualAddress); - - // The profiled binary. - object::OwningBinary Binary; - // The preferred load address of the executable segment. - uint64_t PreferredTextSegmentAddress = 0; - // The base address of the text segment in the process during profiling. - uint64_t ProfiledTextSegmentStart = 0; - // The limit address of the text segment in the process during profiling. - uint64_t ProfiledTextSegmentEnd = 0; - - // The memory mapped segment information for all executable segments in the - // profiled binary (filtered from the raw profile using the build id). - llvm::SmallVector SegmentInfo; - - // A map from callstack id (same as key in CallStackMap below) to the heap - // information recorded for that allocation context. - llvm::MapVector CallstackProfileData; - CallStackMap StackMap; - - // Cached symbolization from PC to Frame. - llvm::DenseMap> SymbolizedFrame; +#ifndef LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ +#define LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ - // Whether to keep the symbol name for each frame after hashing. - bool KeepSymbolName = false; - // A mapping of the hash to symbol name, only used if KeepSymbolName is true. - llvm::DenseMap GuidToSymbolName; -}; -} // namespace memprof -} // namespace llvm +#include "llvm/ProfileData/MemProfReader.h" #endif // LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ diff --git a/llvm/lib/ProfileData/CMakeLists.txt b/llvm/lib/ProfileData/CMakeLists.txt index 99617a43fee75..408f9ff01ec87 100644 --- a/llvm/lib/ProfileData/CMakeLists.txt +++ b/llvm/lib/ProfileData/CMakeLists.txt @@ -6,8 +6,8 @@ add_llvm_component_library(LLVMProfileData InstrProfWriter.cpp ItaniumManglingCanonicalizer.cpp MemProf.cpp + MemProfReader.cpp ProfileSummaryBuilder.cpp - RawMemProfReader.cpp SampleProf.cpp SampleProfReader.cpp SampleProfWriter.cpp diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp similarity index 99% rename from llvm/lib/ProfileData/RawMemProfReader.cpp rename to llvm/lib/ProfileData/MemProfReader.cpp index e93fbc72f54eb..4ccec26597c09 100644 --- a/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -32,7 +32,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/MemProfData.inc" -#include "llvm/ProfileData/RawMemProfReader.h" +#include "llvm/ProfileData/MemProfReader.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" @@ -295,8 +295,8 @@ Error RawMemProfReader::initialize(std::unique_ptr DataBuffer) { // Check whether the profiled binary was built with position independent code // (PIC). Perform sanity checks for assumptions we rely on to simplify // symbolization. - auto* Elf64LEObject = llvm::cast(ElfObject); - const llvm::object::ELF64LEFile& ElfFile = Elf64LEObject->getELFFile(); + auto *Elf64LEObject = llvm::cast(ElfObject); + const llvm::object::ELF64LEFile &ElfFile = Elf64LEObject->getELFFile(); auto PHdrsOr = ElfFile.program_headers(); if (!PHdrsOr) return report( diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 0b78564ccea37..6a70773613b7f 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -19,8 +19,8 @@ #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/ProfileData/MemProf.h" +#include "llvm/ProfileData/MemProfReader.h" #include "llvm/ProfileData/ProfileCommon.h" -#include "llvm/ProfileData/RawMemProfReader.h" #include "llvm/ProfileData/SampleProfReader.h" #include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/Support/BalancedPartitioning.h" diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index f1aa6f37aa399..9cf307472d656 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -6,7 +6,7 @@ #include "llvm/IR/Value.h" #include "llvm/Object/ObjectFile.h" #include "llvm/ProfileData/MemProfData.inc" -#include "llvm/ProfileData/RawMemProfReader.h" +#include "llvm/ProfileData/MemProfReader.h" #include "llvm/Support/raw_ostream.h" #include "gmock/gmock.h" #include "gtest/gtest.h" diff --git a/llvm/utils/gn/secondary/llvm/lib/ProfileData/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ProfileData/BUILD.gn index 51568ac0472c3..9dbfe0f94c1db 100644 --- a/llvm/utils/gn/secondary/llvm/lib/ProfileData/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/ProfileData/BUILD.gn @@ -16,8 +16,8 @@ static_library("ProfileData") { "InstrProfWriter.cpp", "ItaniumManglingCanonicalizer.cpp", "MemProf.cpp", + "MemProfReader.cpp", "ProfileSummaryBuilder.cpp", - "RawMemProfReader.cpp", "SampleProf.cpp", "SampleProfReader.cpp", "SampleProfWriter.cpp", From bd32aaa8c9ec2094f605315b3989adc2a567ca98 Mon Sep 17 00:00:00 2001 From: Cyrill Leutwiler Date: Thu, 11 Apr 2024 07:11:51 +0200 Subject: [PATCH 099/886] [RISCV] Support rv{32, 64}e in the compiler builtins (#88252) Register spills (save/restore) in RISC-V embedded work differently because there are less registers and different stack alignment. [GCC equivalent ](https://github.com/gcc-mirror/gcc/blob/master/libgcc/config/riscv/save-restore.S#L298C16-L336) Follow up from #76777. --------- Signed-off-by: xermicus --- compiler-rt/lib/builtins/riscv/restore.S | 42 ++++++++++++++++++++++++ compiler-rt/lib/builtins/riscv/save.S | 42 ++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/compiler-rt/lib/builtins/riscv/restore.S b/compiler-rt/lib/builtins/riscv/restore.S index 73f64a920d669..6f43842c8ca68 100644 --- a/compiler-rt/lib/builtins/riscv/restore.S +++ b/compiler-rt/lib/builtins/riscv/restore.S @@ -22,6 +22,8 @@ #if __riscv_xlen == 32 +#ifndef __riscv_32e + .globl __riscv_restore_12 .type __riscv_restore_12,@function __riscv_restore_12: @@ -86,8 +88,29 @@ __riscv_restore_0: addi sp, sp, 16 ret +#else + + .globl __riscv_restore_2 + .type __riscv_restore_2,@function + .globl __riscv_restore_1 + .type __riscv_restore_1,@function + .globl __riscv_restore_0 + .type __riscv_restore_0,@function +__riscv_restore_2: +__riscv_restore_1: +__riscv_restore_0: + lw s1, 0(sp) + lw s0, 4(sp) + lw ra, 8(sp) + addi sp, sp, 12 + ret + +#endif + #elif __riscv_xlen == 64 +#ifndef __riscv_64e + .globl __riscv_restore_12 .type __riscv_restore_12,@function __riscv_restore_12: @@ -161,6 +184,25 @@ __riscv_restore_0: addi sp, sp, 16 ret +#else + + .globl __riscv_restore_2 + .type __riscv_restore_2,@function + .globl __riscv_restore_1 + .type __riscv_restore_1,@function + .globl __riscv_restore_0 + .type __riscv_restore_0,@function +__riscv_restore_2: +__riscv_restore_1: +__riscv_restore_0: + ld s1, 0(sp) + ld s0, 8(sp) + ld ra, 16(sp) + addi sp, sp, 24 + ret + +#endif + #else # error "xlen must be 32 or 64 for save-restore implementation #endif diff --git a/compiler-rt/lib/builtins/riscv/save.S b/compiler-rt/lib/builtins/riscv/save.S index 85501aeb4c2e9..3e044179ff7f1 100644 --- a/compiler-rt/lib/builtins/riscv/save.S +++ b/compiler-rt/lib/builtins/riscv/save.S @@ -18,6 +18,8 @@ #if __riscv_xlen == 32 +#ifndef __riscv_32e + .globl __riscv_save_12 .type __riscv_save_12,@function __riscv_save_12: @@ -92,8 +94,29 @@ __riscv_save_0: sw ra, 12(sp) jr t0 +#else + + .globl __riscv_save_2 + .type __riscv_save_2,@function + .globl __riscv_save_1 + .type __riscv_save_1,@function + .globl __riscv_save_0 + .type __riscv_save_0,@function +__riscv_save_2: +__riscv_save_1: +__riscv_save_0: + addi sp, sp, -12 + sw s1, 0(sp) + sw s0, 4(sp) + sw ra, 8(sp) + jr t0 + +#endif + #elif __riscv_xlen == 64 +#ifndef __riscv_64e + .globl __riscv_save_12 .type __riscv_save_12,@function __riscv_save_12: @@ -181,6 +204,25 @@ __riscv_save_0: sd ra, 8(sp) jr t0 +#else + + .globl __riscv_save_2 + .type __riscv_save_2,@function + .globl __riscv_save_1 + .type __riscv_save_1,@function + .globl __riscv_save_0 + .type __riscv_save_0,@function +__riscv_save_2: +__riscv_save_1: +__riscv_save_0: + addi sp, sp, -24 + sd s1, 0(sp) + sd s0, 8(sp) + sd ra, 16(sp) + jr t0 + +#endif + #else # error "xlen must be 32 or 64 for save-restore implementation #endif From 5964c944bfe74cee2872cddb66eff22866cdb6ee Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Wed, 10 Apr 2024 22:25:37 -0700 Subject: [PATCH 100/886] [clangd] Fix test case due to clang-format bug fix (#88352) See commit 51f1681424f1. --- clang-tools-extra/clangd/unittests/HoverTests.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp index 35db757b9c15b..5ead74748f550 100644 --- a/clang-tools-extra/clangd/unittests/HoverTests.cpp +++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp @@ -1983,10 +1983,14 @@ TEST(Hover, All) { HI.Kind = index::SymbolKind::Macro; HI.Definition = R"cpp(#define MACRO \ - { return 0; } + { \ + return 0; \ + } // Expands to -{ return 0; })cpp"; +{ + return 0; +})cpp"; }}, { R"cpp(// Forward class declaration From 45146082e693415f37413c656e0a0fd13d0e3136 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 10 Apr 2024 23:11:24 -0700 Subject: [PATCH 101/886] [ELF] relocateNonAlloc & ICF: replace random access iterators of relocations with input iterators. NFC Also replace one `this->file` with a local variable as some function calls are opaque to the compiler, causing unneeded reload. --- lld/ELF/ICF.cpp | 22 ++++++++++++---------- lld/ELF/InputSection.cpp | 16 ++++++++-------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index 2551c2e807b73..bfc605c793a92 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -239,16 +239,17 @@ bool ICF::constantEq(const InputSection *secA, ArrayRef ra, const InputSection *secB, ArrayRef rb) { if (ra.size() != rb.size()) return false; - for (size_t i = 0; i < ra.size(); ++i) { - if (ra[i].r_offset != rb[i].r_offset || - ra[i].getType(config->isMips64EL) != rb[i].getType(config->isMips64EL)) + auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin(); + for (; rai != rae; ++rai, ++rbi) { + if (rai->r_offset != rbi->r_offset || + rai->getType(config->isMips64EL) != rbi->getType(config->isMips64EL)) return false; - uint64_t addA = getAddend(ra[i]); - uint64_t addB = getAddend(rb[i]); + uint64_t addA = getAddend(*rai); + uint64_t addB = getAddend(*rbi); - Symbol &sa = secA->file->getRelocTargetSym(ra[i]); - Symbol &sb = secB->file->getRelocTargetSym(rb[i]); + Symbol &sa = secA->file->getRelocTargetSym(*rai); + Symbol &sb = secB->file->getRelocTargetSym(*rbi); if (&sa == &sb) { if (addA == addB) continue; @@ -336,10 +337,11 @@ bool ICF::variableEq(const InputSection *secA, ArrayRef ra, const InputSection *secB, ArrayRef rb) { assert(ra.size() == rb.size()); - for (size_t i = 0; i < ra.size(); ++i) { + auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin(); + for (; rai != rae; ++rai, ++rbi) { // The two sections must be identical. - Symbol &sa = secA->file->getRelocTargetSym(ra[i]); - Symbol &sb = secB->file->getRelocTargetSym(rb[i]); + Symbol &sa = secA->file->getRelocTargetSym(*rai); + Symbol &sb = secB->file->getRelocTargetSym(*rbi); if (&sa == &sb) continue; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index c06816bcfd561..c8350652e65a6 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -918,8 +918,9 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { break; } - for (size_t i = 0, relsSize = rels.size(); i != relsSize; ++i) { - const RelTy &rel = rels[i]; + const InputFile *f = this->file; + for (auto it = rels.begin(), end = rels.end(); it != end; ++it) { + const RelTy &rel = *it; const RelType type = rel.getType(config->isMips64EL); const uint64_t offset = rel.r_offset; uint8_t *bufLoc = buf + offset; @@ -927,23 +928,22 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { if (!RelTy::IsRela) addend += target.getImplicitAddend(bufLoc, type); - Symbol &sym = this->file->getRelocTargetSym(rel); + Symbol &sym = f->getRelocTargetSym(rel); RelExpr expr = target.getRelExpr(type, sym, bufLoc); if (expr == R_NONE) continue; auto *ds = dyn_cast(&sym); if (emachine == EM_RISCV && type == R_RISCV_SET_ULEB128) { - if (++i < relsSize && - rels[i].getType(/*isMips64EL=*/false) == R_RISCV_SUB_ULEB128 && - rels[i].r_offset == offset) { + if (++it != end && + it->getType(/*isMips64EL=*/false) == R_RISCV_SUB_ULEB128 && + it->r_offset == offset) { uint64_t val; if (!ds && tombstone) { val = *tombstone; } else { val = sym.getVA(addend) - - (this->file->getRelocTargetSym(rels[i]).getVA(0) + - getAddend(rels[i])); + (f->getRelocTargetSym(*it).getVA(0) + getAddend(*it)); } if (overwriteULEB128(bufLoc, val) >= 0x80) errorOrWarn(getLocation(offset) + ": ULEB128 value " + Twine(val) + From 71f1932b842793e5dc7b17051452e8ff2f9219aa Mon Sep 17 00:00:00 2001 From: martinboehme Date: Thu, 11 Apr 2024 08:20:35 +0200 Subject: [PATCH 102/886] [clang][dataflow] Reland #87320: Propagate locations from result objects to initializers. (#88316) This relands #87320 and additionally removes the now-unused function `isOriginalRecordConstructor()`, which was causing buildbots to fail. --- .../FlowSensitive/DataflowEnvironment.h | 64 ++- .../FlowSensitive/DataflowEnvironment.cpp | 427 +++++++++++++----- clang/lib/Analysis/FlowSensitive/Transfer.cpp | 176 ++++---- .../TypeErasedDataflowAnalysis.cpp | 13 +- .../FlowSensitive/DataflowEnvironmentTest.cpp | 43 ++ .../Analysis/FlowSensitive/TransferTest.cpp | 172 ++++--- 6 files changed, 590 insertions(+), 305 deletions(-) diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index 9a65f76cdf56b..706664d7db1c2 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -30,6 +30,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include #include #include @@ -344,17 +345,6 @@ class Environment { /// location of the result object to pass in `this`, even though prvalues are /// otherwise not associated with storage locations. /// - /// FIXME: Currently, this simply returns a stable storage location for `E`, - /// but this doesn't do the right thing in scenarios like the following: - /// ``` - /// MyClass c = some_condition()? MyClass(foo) : MyClass(bar); - /// ``` - /// Here, `MyClass(foo)` and `MyClass(bar)` will have two different storage - /// locations, when in fact their storage locations should be the same. - /// Eventually, we want to propagate storage locations from result objects - /// down to the prvalues that initialize them, similar to the way that this is - /// done in Clang's CodeGen. - /// /// Requirements: /// `E` must be a prvalue of record type. RecordStorageLocation & @@ -462,7 +452,13 @@ class Environment { /// Initializes the fields (including synthetic fields) of `Loc` with values, /// unless values of the field type are not supported or we hit one of the /// limits at which we stop producing values. - void initializeFieldsWithValues(RecordStorageLocation &Loc); + /// If `Type` is provided, initializes only those fields that are modeled for + /// `Type`; this is intended for use in cases where `Loc` is a derived type + /// and we only want to initialize the fields of a base type. + void initializeFieldsWithValues(RecordStorageLocation &Loc, QualType Type); + void initializeFieldsWithValues(RecordStorageLocation &Loc) { + initializeFieldsWithValues(Loc, Loc.getType()); + } /// Assigns `Val` as the value of `Loc` in the environment. void setValue(const StorageLocation &Loc, Value &Val); @@ -653,6 +649,9 @@ class Environment { LLVM_DUMP_METHOD void dump(raw_ostream &OS) const; private: + using PrValueToResultObject = + llvm::DenseMap; + // The copy-constructor is for use in fork() only. Environment(const Environment &) = default; @@ -682,8 +681,10 @@ class Environment { /// Initializes the fields (including synthetic fields) of `Loc` with values, /// unless values of the field type are not supported or we hit one of the /// limits at which we stop producing values (controlled by `Visited`, - /// `Depth`, and `CreatedValuesCount`). - void initializeFieldsWithValues(RecordStorageLocation &Loc, + /// `Depth`, and `CreatedValuesCount`). If `Type` is different from + /// `Loc.getType()`, initializes only those fields that are modeled for + /// `Type`. + void initializeFieldsWithValues(RecordStorageLocation &Loc, QualType Type, llvm::DenseSet &Visited, int Depth, int &CreatedValuesCount); @@ -702,22 +703,45 @@ class Environment { /// and functions referenced in `FuncDecl`. `FuncDecl` must have a body. void initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl); + static PrValueToResultObject + buildResultObjectMap(DataflowAnalysisContext *DACtx, + const FunctionDecl *FuncDecl, + RecordStorageLocation *ThisPointeeLoc, + RecordStorageLocation *LocForRecordReturnVal); + // `DACtx` is not null and not owned by this object. DataflowAnalysisContext *DACtx; - // FIXME: move the fields `CallStack`, `ReturnVal`, `ReturnLoc` and - // `ThisPointeeLoc` into a separate call-context object, shared between - // environments in the same call. + // FIXME: move the fields `CallStack`, `ResultObjectMap`, `ReturnVal`, + // `ReturnLoc` and `ThisPointeeLoc` into a separate call-context object, + // shared between environments in the same call. // https://github.com/llvm/llvm-project/issues/59005 // `DeclContext` of the block being analysed if provided. std::vector CallStack; - // Value returned by the function (if it has non-reference return type). + // Maps from prvalues of record type to their result objects. Shared between + // all environments for the same function. + // FIXME: It's somewhat unsatisfactory that we have to use a `shared_ptr` + // here, though the cost is acceptable: The overhead of a `shared_ptr` is + // incurred when it is copied, and this happens only relatively rarely (when + // we fork the environment). The need for a `shared_ptr` will go away once we + // introduce a shared call-context object (see above). + std::shared_ptr ResultObjectMap; + + // The following three member variables handle various different types of + // return values. + // - If the return type is not a reference and not a record: Value returned + // by the function. Value *ReturnVal = nullptr; - // Storage location of the reference returned by the function (if it has - // reference return type). + // - If the return type is a reference: Storage location of the reference + // returned by the function. StorageLocation *ReturnLoc = nullptr; + // - If the return type is a record or the function being analyzed is a + // constructor: Storage location into which the return value should be + // constructed. + RecordStorageLocation *LocForRecordReturnVal = nullptr; + // The storage location of the `this` pointee. Should only be null if the // function being analyzed is only a function and not a method. RecordStorageLocation *ThisPointeeLoc = nullptr; diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index 1bfa7ebcfd50c..bea15ce9bd24d 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -15,6 +15,7 @@ #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Type.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" #include "clang/Analysis/FlowSensitive/Value.h" @@ -26,6 +27,8 @@ #include #include +#define DEBUG_TYPE "dataflow" + namespace clang { namespace dataflow { @@ -354,6 +357,8 @@ getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, for (auto *Child : S.children()) if (Child != nullptr) getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); + if (const auto *DefaultArg = dyn_cast(&S)) + getFieldsGlobalsAndFuncs(*DefaultArg->getExpr(), Fields, Vars, Funcs); if (const auto *DefaultInit = dyn_cast(&S)) getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); @@ -386,6 +391,186 @@ getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, } } +namespace { + +// Visitor that builds a map from record prvalues to result objects. +// This traverses the body of the function to be analyzed; for each result +// object that it encounters, it propagates the storage location of the result +// object to all record prvalues that can initialize it. +class ResultObjectVisitor : public RecursiveASTVisitor { +public: + // `ResultObjectMap` will be filled with a map from record prvalues to result + // object. If the function being analyzed returns a record by value, + // `LocForRecordReturnVal` is the location to which this record should be + // written; otherwise, it is null. + explicit ResultObjectVisitor( + llvm::DenseMap &ResultObjectMap, + RecordStorageLocation *LocForRecordReturnVal, + DataflowAnalysisContext &DACtx) + : ResultObjectMap(ResultObjectMap), + LocForRecordReturnVal(LocForRecordReturnVal), DACtx(DACtx) {} + + bool shouldVisitImplicitCode() { return true; } + + bool shouldVisitLambdaBody() const { return false; } + + // Traverse all member and base initializers of `Ctor`. This function is not + // called by `RecursiveASTVisitor`; it should be called manually if we are + // analyzing a constructor. `ThisPointeeLoc` is the storage location that + // `this` points to. + void TraverseConstructorInits(const CXXConstructorDecl *Ctor, + RecordStorageLocation *ThisPointeeLoc) { + assert(ThisPointeeLoc != nullptr); + for (const CXXCtorInitializer *Init : Ctor->inits()) { + Expr *InitExpr = Init->getInit(); + if (FieldDecl *Field = Init->getMember(); + Field != nullptr && Field->getType()->isRecordType()) { + PropagateResultObject(InitExpr, cast( + ThisPointeeLoc->getChild(*Field))); + } else if (Init->getBaseClass()) { + PropagateResultObject(InitExpr, ThisPointeeLoc); + } + + // Ensure that any result objects within `InitExpr` (e.g. temporaries) + // are also propagated to the prvalues that initialize them. + TraverseStmt(InitExpr); + + // If this is a `CXXDefaultInitExpr`, also propagate any result objects + // within the default expression. + if (auto *DefaultInit = dyn_cast(InitExpr)) + TraverseStmt(DefaultInit->getExpr()); + } + } + + bool TraverseBindingDecl(BindingDecl *BD) { + // `RecursiveASTVisitor` doesn't traverse holding variables for + // `BindingDecl`s by itself, so we need to tell it to. + if (VarDecl *HoldingVar = BD->getHoldingVar()) + TraverseDecl(HoldingVar); + return RecursiveASTVisitor::TraverseBindingDecl(BD); + } + + bool VisitVarDecl(VarDecl *VD) { + if (VD->getType()->isRecordType() && VD->hasInit()) + PropagateResultObject( + VD->getInit(), + &cast(DACtx.getStableStorageLocation(*VD))); + return true; + } + + bool VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *MTE) { + if (MTE->getType()->isRecordType()) + PropagateResultObject( + MTE->getSubExpr(), + &cast(DACtx.getStableStorageLocation(*MTE))); + return true; + } + + bool VisitReturnStmt(ReturnStmt *Return) { + Expr *RetValue = Return->getRetValue(); + if (RetValue != nullptr && RetValue->getType()->isRecordType() && + RetValue->isPRValue()) + PropagateResultObject(RetValue, LocForRecordReturnVal); + return true; + } + + bool VisitExpr(Expr *E) { + // Clang's AST can have record-type prvalues without a result object -- for + // example as full-expressions contained in a compound statement or as + // arguments of call expressions. We notice this if we get here and a + // storage location has not yet been associated with `E`. In this case, + // treat this as if it was a `MaterializeTemporaryExpr`. + if (E->isPRValue() && E->getType()->isRecordType() && + !ResultObjectMap.contains(E)) + PropagateResultObject( + E, &cast(DACtx.getStableStorageLocation(*E))); + return true; + } + + // Assigns `Loc` as the result object location of `E`, then propagates the + // location to all lower-level prvalues that initialize the same object as + // `E` (or one of its base classes or member variables). + void PropagateResultObject(Expr *E, RecordStorageLocation *Loc) { + if (!E->isPRValue() || !E->getType()->isRecordType()) { + assert(false); + // Ensure we don't propagate the result object if we hit this in a + // release build. + return; + } + + ResultObjectMap[E] = Loc; + + // The following AST node kinds are "original initializers": They are the + // lowest-level AST node that initializes a given object, and nothing + // below them can initialize the same object (or part of it). + if (isa(E) || isa(E) || isa(E) || + isa(E) || isa(E) || + isa(E)) { + return; + } + + if (auto *InitList = dyn_cast(E)) { + if (!InitList->isSemanticForm()) + return; + if (InitList->isTransparent()) { + PropagateResultObject(InitList->getInit(0), Loc); + return; + } + + RecordInitListHelper InitListHelper(InitList); + + for (auto [Base, Init] : InitListHelper.base_inits()) { + assert(Base->getType().getCanonicalType() == + Init->getType().getCanonicalType()); + + // Storage location for the base class is the same as that of the + // derived class because we "flatten" the object hierarchy and put all + // fields in `RecordStorageLocation` of the derived class. + PropagateResultObject(Init, Loc); + } + + for (auto [Field, Init] : InitListHelper.field_inits()) { + // Fields of non-record type are handled in + // `TransferVisitor::VisitInitListExpr()`. + if (!Field->getType()->isRecordType()) + continue; + PropagateResultObject( + Init, cast(Loc->getChild(*Field))); + } + return; + } + + if (auto *Op = dyn_cast(E); Op && Op->isCommaOp()) { + PropagateResultObject(Op->getRHS(), Loc); + return; + } + + if (auto *Cond = dyn_cast(E)) { + PropagateResultObject(Cond->getTrueExpr(), Loc); + PropagateResultObject(Cond->getFalseExpr(), Loc); + return; + } + + // All other expression nodes that propagate a record prvalue should have + // exactly one child. + SmallVector Children(E->child_begin(), E->child_end()); + LLVM_DEBUG({ + if (Children.size() != 1) + E->dump(); + }); + assert(Children.size() == 1); + for (Stmt *S : Children) + PropagateResultObject(cast(S), Loc); + } + +private: + llvm::DenseMap &ResultObjectMap; + RecordStorageLocation *LocForRecordReturnVal; + DataflowAnalysisContext &DACtx; +}; + +} // namespace + Environment::Environment(DataflowAnalysisContext &DACtx) : DACtx(&DACtx), FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} @@ -401,17 +586,23 @@ void Environment::initialize() { if (DeclCtx == nullptr) return; - if (const auto *FuncDecl = dyn_cast(DeclCtx)) { - assert(FuncDecl->doesThisDeclarationHaveABody()); + const auto *FuncDecl = dyn_cast(DeclCtx); + if (FuncDecl == nullptr) + return; - initFieldsGlobalsAndFuncs(FuncDecl); + assert(FuncDecl->doesThisDeclarationHaveABody()); - for (const auto *ParamDecl : FuncDecl->parameters()) { - assert(ParamDecl != nullptr); - setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); - } + initFieldsGlobalsAndFuncs(FuncDecl); + + for (const auto *ParamDecl : FuncDecl->parameters()) { + assert(ParamDecl != nullptr); + setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); } + if (FuncDecl->getReturnType()->isRecordType()) + LocForRecordReturnVal = &cast( + createStorageLocation(FuncDecl->getReturnType())); + if (const auto *MethodDecl = dyn_cast(DeclCtx)) { auto *Parent = MethodDecl->getParent(); assert(Parent != nullptr); @@ -444,6 +635,12 @@ void Environment::initialize() { initializeFieldsWithValues(ThisLoc); } } + + // We do this below the handling of `CXXMethodDecl` above so that we can + // be sure that the storage location for `this` has been set. + ResultObjectMap = std::make_shared( + buildResultObjectMap(DACtx, FuncDecl, getThisPointeeStorageLocation(), + LocForRecordReturnVal)); } // FIXME: Add support for resetting globals after function calls to enable @@ -484,13 +681,18 @@ void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { if (getStorageLocation(*D) != nullptr) continue; - setStorageLocation(*D, createObject(*D)); + // We don't run transfer functions on the initializers of global variables, + // so they won't be associated with a value or storage location. We + // therefore intentionally don't pass an initializer to `createObject()`; + // in particular, this ensures that `createObject()` will initialize the + // fields of record-type variables with values. + setStorageLocation(*D, createObject(*D, nullptr)); } for (const FunctionDecl *FD : Funcs) { if (getStorageLocation(*FD) != nullptr) continue; - auto &Loc = createStorageLocation(FD->getType()); + auto &Loc = createStorageLocation(*FD); setStorageLocation(*FD, Loc); } } @@ -519,6 +721,9 @@ Environment Environment::pushCall(const CallExpr *Call) const { } } + if (Call->getType()->isRecordType() && Call->isPRValue()) + Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call); + Env.pushCallInternal(Call->getDirectCallee(), llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); @@ -529,6 +734,7 @@ Environment Environment::pushCall(const CXXConstructExpr *Call) const { Environment Env(*this); Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call); + Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call); Env.pushCallInternal(Call->getConstructor(), llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); @@ -557,6 +763,10 @@ void Environment::pushCallInternal(const FunctionDecl *FuncDecl, const VarDecl *Param = *ParamIt; setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); } + + ResultObjectMap = std::make_shared( + buildResultObjectMap(DACtx, FuncDecl, getThisPointeeStorageLocation(), + LocForRecordReturnVal)); } void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { @@ -600,6 +810,9 @@ bool Environment::equivalentTo(const Environment &Other, if (ReturnLoc != Other.ReturnLoc) return false; + if (LocForRecordReturnVal != Other.LocForRecordReturnVal) + return false; + if (ThisPointeeLoc != Other.ThisPointeeLoc) return false; @@ -623,8 +836,10 @@ LatticeEffect Environment::widen(const Environment &PrevEnv, assert(DACtx == PrevEnv.DACtx); assert(ReturnVal == PrevEnv.ReturnVal); assert(ReturnLoc == PrevEnv.ReturnLoc); + assert(LocForRecordReturnVal == PrevEnv.LocForRecordReturnVal); assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); assert(CallStack == PrevEnv.CallStack); + assert(ResultObjectMap == PrevEnv.ResultObjectMap); auto Effect = LatticeEffect::Unchanged; @@ -656,12 +871,16 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB, Environment::ValueModel &Model, ExprJoinBehavior ExprBehavior) { assert(EnvA.DACtx == EnvB.DACtx); + assert(EnvA.LocForRecordReturnVal == EnvB.LocForRecordReturnVal); assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); assert(EnvA.CallStack == EnvB.CallStack); + assert(EnvA.ResultObjectMap == EnvB.ResultObjectMap); Environment JoinedEnv(*EnvA.DACtx); JoinedEnv.CallStack = EnvA.CallStack; + JoinedEnv.ResultObjectMap = EnvA.ResultObjectMap; + JoinedEnv.LocForRecordReturnVal = EnvA.LocForRecordReturnVal; JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { @@ -730,6 +949,12 @@ StorageLocation &Environment::createStorageLocation(const Expr &E) { void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { assert(!DeclToLoc.contains(&D)); + // The only kinds of declarations that may have a "variable" storage location + // are declarations of reference type and `BindingDecl`. For all other + // declaration, the storage location should be the stable storage location + // returned by `createStorageLocation()`. + assert(D.getType()->isReferenceType() || isa(D) || + &Loc == &createStorageLocation(D)); DeclToLoc[&D] = &Loc; } @@ -764,77 +989,34 @@ StorageLocation *Environment::getStorageLocation(const Expr &E) const { return It == ExprToLoc.end() ? nullptr : &*It->second; } -// Returns whether a prvalue of record type is the one that originally -// constructs the object (i.e. it doesn't propagate it from one of its -// children). -static bool isOriginalRecordConstructor(const Expr &RecordPRValue) { - if (auto *Init = dyn_cast(&RecordPRValue)) - return !Init->isSemanticForm() || !Init->isTransparent(); - return isa(RecordPRValue) || isa(RecordPRValue) || - isa(RecordPRValue) || - isa(RecordPRValue) || - isa(RecordPRValue) || - // The framework currently does not propagate the objects created in - // the two branches of a `ConditionalOperator` because there is no way - // to reconcile their storage locations, which are different. We - // therefore claim that the `ConditionalOperator` is the expression - // that originally constructs the object. - // Ultimately, this will be fixed by propagating locations down from - // the result object, rather than up from the original constructor as - // we do now (see also the FIXME in the documentation for - // `getResultObjectLocation()`). - isa(RecordPRValue); -} - RecordStorageLocation & Environment::getResultObjectLocation(const Expr &RecordPRValue) const { assert(RecordPRValue.getType()->isRecordType()); assert(RecordPRValue.isPRValue()); - // Returns a storage location that we can use if assertions fail. - auto FallbackForAssertFailure = - [this, &RecordPRValue]() -> RecordStorageLocation & { + assert(ResultObjectMap != nullptr); + RecordStorageLocation *Loc = ResultObjectMap->lookup(&RecordPRValue); + assert(Loc != nullptr); + // In release builds, use the "stable" storage location if the map lookup + // failed. + if (Loc == nullptr) return cast( DACtx->getStableStorageLocation(RecordPRValue)); - }; - - if (isOriginalRecordConstructor(RecordPRValue)) { - auto *Val = cast_or_null(getValue(RecordPRValue)); - // The builtin transfer function should have created a `RecordValue` for all - // original record constructors. - assert(Val); - if (!Val) - return FallbackForAssertFailure(); - return Val->getLoc(); - } - - if (auto *Op = dyn_cast(&RecordPRValue); - Op && Op->isCommaOp()) { - return getResultObjectLocation(*Op->getRHS()); - } - - // All other expression nodes that propagate a record prvalue should have - // exactly one child. - llvm::SmallVector children(RecordPRValue.child_begin(), - RecordPRValue.child_end()); - assert(children.size() == 1); - if (children.empty()) - return FallbackForAssertFailure(); - - return getResultObjectLocation(*cast(children[0])); + return *Loc; } PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { return DACtx->getOrCreateNullPointerValue(PointeeType); } -void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc) { +void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, + QualType Type) { llvm::DenseSet Visited; int CreatedValuesCount = 0; - initializeFieldsWithValues(Loc, Visited, 0, CreatedValuesCount); + initializeFieldsWithValues(Loc, Type, Visited, 0, CreatedValuesCount); if (CreatedValuesCount > MaxCompositeValueSize) { - llvm::errs() << "Attempting to initialize a huge value of type: " - << Loc.getType() << '\n'; + llvm::errs() << "Attempting to initialize a huge value of type: " << Type + << '\n'; } } @@ -848,8 +1030,7 @@ void Environment::setValue(const Expr &E, Value &Val) { const Expr &CanonE = ignoreCFGOmittedNodes(E); if (auto *RecordVal = dyn_cast(&Val)) { - assert(isOriginalRecordConstructor(CanonE) || - &RecordVal->getLoc() == &getResultObjectLocation(CanonE)); + assert(&RecordVal->getLoc() == &getResultObjectLocation(CanonE)); (void)RecordVal; } @@ -928,7 +1109,8 @@ Value *Environment::createValueUnlessSelfReferential( if (Type->isRecordType()) { CreatedValuesCount++; auto &Loc = cast(createStorageLocation(Type)); - initializeFieldsWithValues(Loc, Visited, Depth, CreatedValuesCount); + initializeFieldsWithValues(Loc, Loc.getType(), Visited, Depth, + CreatedValuesCount); return &refreshRecordValue(Loc, *this); } @@ -960,6 +1142,7 @@ Environment::createLocAndMaybeValue(QualType Ty, } void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, + QualType Type, llvm::DenseSet &Visited, int Depth, int &CreatedValuesCount) { @@ -967,8 +1150,8 @@ void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, if (FieldType->isRecordType()) { auto &FieldRecordLoc = cast(FieldLoc); setValue(FieldRecordLoc, create(FieldRecordLoc)); - initializeFieldsWithValues(FieldRecordLoc, Visited, Depth + 1, - CreatedValuesCount); + initializeFieldsWithValues(FieldRecordLoc, FieldRecordLoc.getType(), + Visited, Depth + 1, CreatedValuesCount); } else { if (!Visited.insert(FieldType.getCanonicalType()).second) return; @@ -979,7 +1162,7 @@ void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, } }; - for (const auto &[Field, FieldLoc] : Loc.children()) { + for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { assert(Field != nullptr); QualType FieldType = Field->getType(); @@ -988,14 +1171,12 @@ void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, &createLocAndMaybeValue(FieldType, Visited, Depth + 1, CreatedValuesCount)); } else { + StorageLocation *FieldLoc = Loc.getChild(*Field); assert(FieldLoc != nullptr); initField(FieldType, *FieldLoc); } } - for (const auto &[FieldName, FieldLoc] : Loc.synthetic_fields()) { - assert(FieldLoc != nullptr); - QualType FieldType = FieldLoc->getType(); - + for (const auto &[FieldName, FieldType] : DACtx->getSyntheticFields(Type)) { // Synthetic fields cannot have reference type, so we don't need to deal // with this case. assert(!FieldType->isReferenceType()); @@ -1022,38 +1203,36 @@ StorageLocation &Environment::createObjectInternal(const ValueDecl *D, return createObjectInternal(D, Ty.getNonReferenceType(), nullptr); } - Value *Val = nullptr; - if (InitExpr) { - // In the (few) cases where an expression is intentionally - // "uninterpreted", `InitExpr` is not associated with a value. There are - // two ways to handle this situation: propagate the status, so that - // uninterpreted initializers result in uninterpreted variables, or - // provide a default value. We choose the latter so that later refinements - // of the variable can be used for reasoning about the surrounding code. - // For this reason, we let this case be handled by the `createValue()` - // call below. - // - // FIXME. If and when we interpret all language cases, change this to - // assert that `InitExpr` is interpreted, rather than supplying a - // default value (assuming we don't update the environment API to return - // references). - Val = getValue(*InitExpr); - - if (!Val && isa(InitExpr) && - InitExpr->getType()->isPointerType()) - Val = &getOrCreateNullPointerValue(InitExpr->getType()->getPointeeType()); - } - if (!Val) - Val = createValue(Ty); - - if (Ty->isRecordType()) - return cast(Val)->getLoc(); - StorageLocation &Loc = D ? createStorageLocation(*D) : createStorageLocation(Ty); - if (Val) - setValue(Loc, *Val); + if (Ty->isRecordType()) { + auto &RecordLoc = cast(Loc); + if (!InitExpr) + initializeFieldsWithValues(RecordLoc); + refreshRecordValue(RecordLoc, *this); + } else { + Value *Val = nullptr; + if (InitExpr) + // In the (few) cases where an expression is intentionally + // "uninterpreted", `InitExpr` is not associated with a value. There are + // two ways to handle this situation: propagate the status, so that + // uninterpreted initializers result in uninterpreted variables, or + // provide a default value. We choose the latter so that later refinements + // of the variable can be used for reasoning about the surrounding code. + // For this reason, we let this case be handled by the `createValue()` + // call below. + // + // FIXME. If and when we interpret all language cases, change this to + // assert that `InitExpr` is interpreted, rather than supplying a + // default value (assuming we don't update the environment API to return + // references). + Val = getValue(*InitExpr); + if (!Val) + Val = createValue(Ty); + if (Val) + setValue(Loc, *Val); + } return Loc; } @@ -1072,6 +1251,8 @@ bool Environment::allows(const Formula &F) const { void Environment::dump(raw_ostream &OS) const { llvm::DenseMap LocToName; + if (LocForRecordReturnVal != nullptr) + LocToName[LocForRecordReturnVal] = "(returned record)"; if (ThisPointeeLoc != nullptr) LocToName[ThisPointeeLoc] = "this"; @@ -1102,6 +1283,9 @@ void Environment::dump(raw_ostream &OS) const { if (auto Iter = LocToName.find(ReturnLoc); Iter != LocToName.end()) OS << " (" << Iter->second << ")"; OS << "\n"; + } else if (Func->getReturnType()->isRecordType() || + isa(Func)) { + OS << "LocForRecordReturnVal: " << LocForRecordReturnVal << "\n"; } else if (!Func->getReturnType()->isVoidType()) { if (ReturnVal == nullptr) OS << "ReturnVal: nullptr\n"; @@ -1122,6 +1306,22 @@ void Environment::dump() const { dump(llvm::dbgs()); } +Environment::PrValueToResultObject Environment::buildResultObjectMap( + DataflowAnalysisContext *DACtx, const FunctionDecl *FuncDecl, + RecordStorageLocation *ThisPointeeLoc, + RecordStorageLocation *LocForRecordReturnVal) { + assert(FuncDecl->doesThisDeclarationHaveABody()); + + PrValueToResultObject Map; + + ResultObjectVisitor Visitor(Map, LocForRecordReturnVal, *DACtx); + if (const auto *Ctor = dyn_cast(FuncDecl)) + Visitor.TraverseConstructorInits(Ctor, ThisPointeeLoc); + Visitor.TraverseStmt(FuncDecl->getBody()); + + return Map; +} + RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE, const Environment &Env) { Expr *ImplicitObject = MCE.getImplicitObjectArgument(); @@ -1216,24 +1416,11 @@ RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) { RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) { assert(Expr.getType()->isRecordType()); - if (Expr.isPRValue()) { - if (auto *ExistingVal = Env.get(Expr)) { - auto &NewVal = Env.create(ExistingVal->getLoc()); - Env.setValue(Expr, NewVal); - Env.setValue(NewVal.getLoc(), NewVal); - return NewVal; - } + if (Expr.isPRValue()) + refreshRecordValue(Env.getResultObjectLocation(Expr), Env); - auto &NewVal = *cast(Env.createValue(Expr.getType())); - Env.setValue(Expr, NewVal); - return NewVal; - } - - if (auto *Loc = Env.get(Expr)) { - auto &NewVal = Env.create(*Loc); - Env.setValue(*Loc, NewVal); - return NewVal; - } + if (auto *Loc = Env.get(Expr)) + refreshRecordValue(*Loc, Env); auto &NewVal = *cast(Env.createValue(Expr.getType())); Env.setStorageLocation(Expr, NewVal.getLoc()); diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp index 0a2e8368d541d..88a9c0eccbebc 100644 --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -460,11 +460,9 @@ class TransferVisitor : public ConstStmtVisitor { // So make sure we have a value if we didn't propagate one above. if (S->isPRValue() && S->getType()->isRecordType()) { if (Env.getValue(*S) == nullptr) { - Value *Val = Env.createValue(S->getType()); - // We're guaranteed to always be able to create a value for record - // types. - assert(Val != nullptr); - Env.setValue(*S, *Val); + auto &Loc = Env.getResultObjectLocation(*S); + Env.initializeFieldsWithValues(Loc); + refreshRecordValue(Loc, Env); } } } @@ -472,6 +470,13 @@ class TransferVisitor : public ConstStmtVisitor { void VisitCXXDefaultInitExpr(const CXXDefaultInitExpr *S) { const Expr *InitExpr = S->getExpr(); assert(InitExpr != nullptr); + + // If this is a prvalue of record type, the handler for `*InitExpr` (if one + // exists) will initialize the result object; there is no value to propgate + // here. + if (S->getType()->isRecordType() && S->isPRValue()) + return; + propagateValueOrStorageLocation(*InitExpr, *S, Env); } @@ -479,6 +484,17 @@ class TransferVisitor : public ConstStmtVisitor { const CXXConstructorDecl *ConstructorDecl = S->getConstructor(); assert(ConstructorDecl != nullptr); + // `CXXConstructExpr` can have array type if default-initializing an array + // of records. We don't handle this specifically beyond potentially inlining + // the call. + if (!S->getType()->isRecordType()) { + transferInlineCall(S, ConstructorDecl); + return; + } + + RecordStorageLocation &Loc = Env.getResultObjectLocation(*S); + Env.setValue(*S, refreshRecordValue(Loc, Env)); + if (ConstructorDecl->isCopyOrMoveConstructor()) { // It is permissible for a copy/move constructor to have additional // parameters as long as they have default arguments defined for them. @@ -491,24 +507,14 @@ class TransferVisitor : public ConstStmtVisitor { if (ArgLoc == nullptr) return; - if (S->isElidable()) { - if (Value *Val = Env.getValue(*ArgLoc)) - Env.setValue(*S, *Val); - } else { - auto &Val = *cast(Env.createValue(S->getType())); - Env.setValue(*S, Val); - copyRecord(*ArgLoc, Val.getLoc(), Env); - } + // Even if the copy/move constructor call is elidable, we choose to copy + // the record in all cases (which isn't wrong, just potentially not + // optimal). + copyRecord(*ArgLoc, Loc, Env); return; } - // `CXXConstructExpr` can have array type if default-initializing an array - // of records, and we currently can't create values for arrays. So check if - // we've got a record type. - if (S->getType()->isRecordType()) { - auto &InitialVal = *cast(Env.createValue(S->getType())); - Env.setValue(*S, InitialVal); - } + Env.initializeFieldsWithValues(Loc, S->getType()); transferInlineCall(S, ConstructorDecl); } @@ -551,19 +557,15 @@ class TransferVisitor : public ConstStmtVisitor { if (S->isGLValue()) { Env.setStorageLocation(*S, *LocDst); } else if (S->getType()->isRecordType()) { - // Make sure that we have a `RecordValue` for this expression so that - // `Environment::getResultObjectLocation()` is able to return a location - // for it. - if (Env.getValue(*S) == nullptr) - refreshRecordValue(*S, Env); + // Assume that the assignment returns the assigned value. + copyRecord(*LocDst, Env.getResultObjectLocation(*S), Env); } return; } - // CXXOperatorCallExpr can be prvalues. Call `VisitCallExpr`() to create - // a `RecordValue` for them so that `Environment::getResultObjectLocation()` - // can return a value. + // `CXXOperatorCallExpr` can be a prvalue. Call `VisitCallExpr`() to + // initialize the prvalue's fields with values. VisitCallExpr(S); } @@ -580,11 +582,6 @@ class TransferVisitor : public ConstStmtVisitor { } } - void VisitCXXTemporaryObjectExpr(const CXXTemporaryObjectExpr *S) { - if (Value *Val = Env.createValue(S->getType())) - Env.setValue(*S, *Val); - } - void VisitCallExpr(const CallExpr *S) { // Of clang's builtins, only `__builtin_expect` is handled explicitly, since // others (like trap, debugtrap, and unreachable) are handled by CFG @@ -612,13 +609,14 @@ class TransferVisitor : public ConstStmtVisitor { } else if (const FunctionDecl *F = S->getDirectCallee()) { transferInlineCall(S, F); - // If this call produces a prvalue of record type, make sure that we have - // a `RecordValue` for it. This is required so that - // `Environment::getResultObjectLocation()` is able to return a location - // for this `CallExpr`. + // If this call produces a prvalue of record type, initialize its fields + // with values. if (S->getType()->isRecordType() && S->isPRValue()) - if (Env.getValue(*S) == nullptr) - refreshRecordValue(*S, Env); + if (Env.getValue(*S) == nullptr) { + RecordStorageLocation &Loc = Env.getResultObjectLocation(*S); + Env.initializeFieldsWithValues(Loc); + Env.setValue(*S, refreshRecordValue(Loc, Env)); + } } } @@ -666,8 +664,10 @@ class TransferVisitor : public ConstStmtVisitor { // `getLogicOperatorSubExprValue()`. if (S->isGLValue()) Env.setStorageLocation(*S, Env.createObject(S->getType())); - else if (Value *Val = Env.createValue(S->getType())) - Env.setValue(*S, *Val); + else if (!S->getType()->isRecordType()) { + if (Value *Val = Env.createValue(S->getType())) + Env.setValue(*S, *Val); + } } void VisitInitListExpr(const InitListExpr *S) { @@ -688,71 +688,51 @@ class TransferVisitor : public ConstStmtVisitor { return; } - llvm::DenseMap FieldLocs; - RecordInitListHelper InitListHelper(S); + RecordStorageLocation &Loc = Env.getResultObjectLocation(*S); + Env.setValue(*S, refreshRecordValue(Loc, Env)); - for (auto [Base, Init] : InitListHelper.base_inits()) { - assert(Base->getType().getCanonicalType() == - Init->getType().getCanonicalType()); - auto *BaseVal = Env.get(*Init); - if (!BaseVal) - BaseVal = cast(Env.createValue(Init->getType())); - // Take ownership of the fields of the `RecordValue` for the base class - // and incorporate them into the "flattened" set of fields for the - // derived class. - auto Children = BaseVal->getLoc().children(); - FieldLocs.insert(Children.begin(), Children.end()); - } + // Initialization of base classes and fields of record type happens when we + // visit the nested `CXXConstructExpr` or `InitListExpr` for that base class + // or field. We therefore only need to deal with fields of non-record type + // here. - for (auto [Field, Init] : InitListHelper.field_inits()) { - assert( - // The types are same, or - Field->getType().getCanonicalType().getUnqualifiedType() == - Init->getType().getCanonicalType().getUnqualifiedType() || - // The field's type is T&, and initializer is T - (Field->getType()->isReferenceType() && - Field->getType().getCanonicalType()->getPointeeType() == - Init->getType().getCanonicalType())); - auto& Loc = Env.createObject(Field->getType(), Init); - FieldLocs.insert({Field, &Loc}); - } + RecordInitListHelper InitListHelper(S); - // In the case of a union, we don't in general have initializers for all - // of the fields. Create storage locations for the remaining fields (but - // don't associate them with values). - if (Type->isUnionType()) { - for (const FieldDecl *Field : - Env.getDataflowAnalysisContext().getModeledFields(Type)) { - if (auto [it, inserted] = FieldLocs.insert({Field, nullptr}); inserted) - it->second = &Env.createStorageLocation(Field->getType()); + for (auto [Field, Init] : InitListHelper.field_inits()) { + if (Field->getType()->isRecordType()) + continue; + if (Field->getType()->isReferenceType()) { + assert(Field->getType().getCanonicalType()->getPointeeType() == + Init->getType().getCanonicalType()); + Loc.setChild(*Field, &Env.createObject(Field->getType(), Init)); + continue; } + assert(Field->getType().getCanonicalType().getUnqualifiedType() == + Init->getType().getCanonicalType().getUnqualifiedType()); + StorageLocation *FieldLoc = Loc.getChild(*Field); + // Locations for non-reference fields must always be non-null. + assert(FieldLoc != nullptr); + Value *Val = Env.getValue(*Init); + if (Val == nullptr && isa(Init) && + Init->getType()->isPointerType()) + Val = + &Env.getOrCreateNullPointerValue(Init->getType()->getPointeeType()); + if (Val == nullptr) + Val = Env.createValue(Field->getType()); + if (Val != nullptr) + Env.setValue(*FieldLoc, *Val); } - // Check that we satisfy the invariant that a `RecordStorageLoation` - // contains exactly the set of modeled fields for that type. - // `ModeledFields` includes fields from all the bases, but only the - // modeled ones. However, if a class type is initialized with an - // `InitListExpr`, all fields in the class, including those from base - // classes, are included in the set of modeled fields. The code above - // should therefore populate exactly the modeled fields. - assert(containsSameFields( - Env.getDataflowAnalysisContext().getModeledFields(Type), FieldLocs)); - - RecordStorageLocation::SyntheticFieldMap SyntheticFieldLocs; - for (const auto &Entry : - Env.getDataflowAnalysisContext().getSyntheticFields(Type)) { - SyntheticFieldLocs.insert( - {Entry.getKey(), &Env.createObject(Entry.getValue())}); + for (const auto &[FieldName, FieldLoc] : Loc.synthetic_fields()) { + QualType FieldType = FieldLoc->getType(); + if (FieldType->isRecordType()) { + Env.initializeFieldsWithValues(*cast(FieldLoc)); + } else { + if (Value *Val = Env.createValue(FieldType)) + Env.setValue(*FieldLoc, *Val); + } } - auto &Loc = Env.getDataflowAnalysisContext().createRecordStorageLocation( - Type, std::move(FieldLocs), std::move(SyntheticFieldLocs)); - RecordValue &RecordVal = Env.create(Loc); - - Env.setValue(Loc, RecordVal); - - Env.setValue(*S, RecordVal); - // FIXME: Implement array initialization. } diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index 595f70f819ddb..1b73c5d683016 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -369,17 +369,10 @@ builtinTransferInitializer(const CFGInitializer &Elt, ParentLoc->setChild(*Member, InitExprLoc); } else if (auto *InitExprVal = Env.getValue(*InitExpr)) { assert(MemberLoc != nullptr); - if (Member->getType()->isRecordType()) { - auto *InitValStruct = cast(InitExprVal); - // FIXME: Rather than performing a copy here, we should really be - // initializing the field in place. This would require us to propagate the - // storage location of the field to the AST node that creates the - // `RecordValue`. - copyRecord(InitValStruct->getLoc(), - *cast(MemberLoc), Env); - } else { + // Record-type initializers construct themselves directly into the result + // object, so there is no need to handle them here. + if (!Member->getType()->isRecordType()) Env.setValue(*MemberLoc, *InitExprVal); - } } } diff --git a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp index 465a8e21690c4..cc20623f881ff 100644 --- a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp @@ -24,6 +24,7 @@ namespace { using namespace clang; using namespace dataflow; +using ::clang::dataflow::test::findValueDecl; using ::clang::dataflow::test::getFieldValue; using ::testing::Contains; using ::testing::IsNull; @@ -199,6 +200,48 @@ TEST_F(EnvironmentTest, JoinRecords) { } } +TEST_F(EnvironmentTest, DifferentReferenceLocInJoin) { + // This tests the case where the storage location for a reference-type + // variable is different for two states being joined. We used to believe this + // could not happen and therefore had an assertion disallowing this; this test + // exists to demonstrate that we can handle this condition without a failing + // assertion. See also the discussion here: + // https://discourse.llvm.org/t/70086/6 + + using namespace ast_matchers; + + std::string Code = R"cc( + void f(int &ref) {} + )cc"; + + auto Unit = + tooling::buildASTFromCodeWithArgs(Code, {"-fsyntax-only", "-std=c++11"}); + auto &Context = Unit->getASTContext(); + + ASSERT_EQ(Context.getDiagnostics().getClient()->getNumErrors(), 0U); + + const ValueDecl *Ref = findValueDecl(Context, "ref"); + + Environment Env1(DAContext); + StorageLocation &Loc1 = Env1.createStorageLocation(Context.IntTy); + Env1.setStorageLocation(*Ref, Loc1); + + Environment Env2(DAContext); + StorageLocation &Loc2 = Env2.createStorageLocation(Context.IntTy); + Env2.setStorageLocation(*Ref, Loc2); + + EXPECT_NE(&Loc1, &Loc2); + + Environment::ValueModel Model; + Environment EnvJoined = + Environment::join(Env1, Env2, Model, Environment::DiscardExprState); + + // Joining environments with different storage locations for the same + // declaration results in the declaration being removed from the joined + // environment. + EXPECT_EQ(EnvJoined.getStorageLocation(*Ref), nullptr); +} + TEST_F(EnvironmentTest, InitGlobalVarsFun) { using namespace ast_matchers; diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index ca055a462a286..00dafb2988c69 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -1582,10 +1582,9 @@ TEST(TransferTest, FieldsDontHaveValuesInConstructorWithBaseClass) { [](const llvm::StringMap> &Results, ASTContext &ASTCtx) { const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); - // FIXME: The field of the base class should already have been - // initialized with a value by the base constructor. This test documents - // the current buggy behavior. - EXPECT_EQ(getFieldValue(Env.getThisPointeeStorageLocation(), "BaseVal", + // The field of the base class should already have been initialized with + // a value by the base constructor. + EXPECT_NE(getFieldValue(Env.getThisPointeeStorageLocation(), "BaseVal", ASTCtx, Env), nullptr); EXPECT_EQ(getFieldValue(Env.getThisPointeeStorageLocation(), "Val", @@ -2998,8 +2997,12 @@ TEST(TransferTest, ResultObjectLocation) { TEST(TransferTest, ResultObjectLocationForDefaultArgExpr) { std::string Code = R"( - struct S {}; - void funcWithDefaultArg(S s = S()); + struct Inner {}; + struct Outer { + Inner I = {}; + }; + + void funcWithDefaultArg(Outer O = {}); void target() { funcWithDefaultArg(); // [[p]] @@ -3058,13 +3061,7 @@ TEST(TransferTest, ResultObjectLocationForDefaultInitExpr) { RecordStorageLocation &Loc = Env.getResultObjectLocation(*DefaultInit); - // FIXME: The result object location for the `CXXDefaultInitExpr` should - // be the location of the member variable being initialized, but we - // don't do this correctly yet; see also comments in - // `builtinTransferInitializer()`. - // For the time being, we just document the current erroneous behavior - // here (this should be `EXPECT_EQ` when the behavior is fixed). - EXPECT_NE(&Loc, Env.getThisPointeeStorageLocation()->getChild(*SField)); + EXPECT_EQ(&Loc, Env.getThisPointeeStorageLocation()->getChild(*SField)); }); } @@ -3101,6 +3098,79 @@ TEST(TransferTest, ResultObjectLocationForCXXOperatorCallExpr) { }); } +TEST(TransferTest, ResultObjectLocationForStdInitializerListExpr) { + std::string Code = R"( + namespace std { + template + struct initializer_list {}; + } // namespace std + + void target() { + std::initializer_list list = {1}; + // [[p]] + } + )"; + + using ast_matchers::cxxStdInitializerListExpr; + using ast_matchers::match; + using ast_matchers::selectFirst; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto *StdInitList = selectFirst( + "std_init_list", + match(cxxStdInitializerListExpr().bind("std_init_list"), ASTCtx)); + ASSERT_NE(StdInitList, nullptr); + + EXPECT_EQ(&Env.getResultObjectLocation(*StdInitList), + &getLocForDecl(ASTCtx, Env, "list")); + }); +} + +TEST(TransferTest, ResultObjectLocationPropagatesThroughConditionalOperator) { + std::string Code = R"( + struct A { + A(int); + }; + + void target(bool b) { + A a = b ? A(0) : A(1); + (void)0; // [[p]] + } + )"; + using ast_matchers::cxxConstructExpr; + using ast_matchers::equals; + using ast_matchers::hasArgument; + using ast_matchers::integerLiteral; + using ast_matchers::match; + using ast_matchers::selectFirst; + using ast_matchers::traverse; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto *ConstructExpr0 = selectFirst( + "construct", + match(cxxConstructExpr(hasArgument(0, integerLiteral(equals(0)))) + .bind("construct"), + ASTCtx)); + auto *ConstructExpr1 = selectFirst( + "construct", + match(cxxConstructExpr(hasArgument(0, integerLiteral(equals(1)))) + .bind("construct"), + ASTCtx)); + + auto &ALoc = getLocForDecl(ASTCtx, Env, "a"); + EXPECT_EQ(&Env.getResultObjectLocation(*ConstructExpr0), &ALoc); + EXPECT_EQ(&Env.getResultObjectLocation(*ConstructExpr1), &ALoc); + }); +} + TEST(TransferTest, StaticCast) { std::string Code = R"( void target(int Foo) { @@ -5886,6 +5956,38 @@ TEST(TransferTest, ContextSensitiveReturnRecord) { {BuiltinOptions{ContextSensitiveOptions{}}}); } +TEST(TransferTest, ContextSensitiveReturnSelfReferentialRecord) { + std::string Code = R"( + struct S { + S() { self = this; } + S *self; + }; + + S makeS() { + // RVO guarantees that this will be constructed directly into `MyS`. + return S(); + } + + void target() { + S MyS = makeS(); + // [[p]] + } + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto &MySLoc = getLocForDecl(ASTCtx, Env, "MyS"); + + auto *SelfVal = + cast(getFieldValue(&MySLoc, "self", ASTCtx, Env)); + EXPECT_EQ(&SelfVal->getPointeeLoc(), &MySLoc); + }, + {BuiltinOptions{ContextSensitiveOptions{}}}); +} + TEST(TransferTest, ContextSensitiveMethodLiteral) { std::string Code = R"( class MyClass { @@ -6830,50 +6932,6 @@ TEST(TransferTest, LambdaCaptureThis) { }); } -TEST(TransferTest, DifferentReferenceLocInJoin) { - // This test triggers a case where the storage location for a reference-type - // variable is different for two states being joined. We used to believe this - // could not happen and therefore had an assertion disallowing this; this test - // exists to demonstrate that we can handle this condition without a failing - // assertion. See also the discussion here: - // https://discourse.llvm.org/t/70086/6 - std::string Code = R"( - namespace std { - template struct initializer_list { - const T* begin(); - const T* end(); - }; - } - - void target(char* p, char* end) { - while (p != end) { - if (*p == ' ') { - p++; - continue; - } - - auto && range = {1, 2}; - for (auto b = range.begin(), e = range.end(); b != e; ++b) { - } - (void)0; - // [[p]] - } - } - )"; - runDataflow( - Code, - [](const llvm::StringMap> &Results, - ASTContext &ASTCtx) { - const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); - - // Joining environments with different storage locations for the same - // declaration results in the declaration being removed from the joined - // environment. - const ValueDecl *VD = findValueDecl(ASTCtx, "range"); - ASSERT_EQ(Env.getStorageLocation(*VD), nullptr); - }); -} - // This test verifies correct modeling of a relational dependency that goes // through unmodeled functions (the simple `cond()` in this case). TEST(TransferTest, ConditionalRelation) { From 297eca981ea1133388c82ddbfaf9d86391abac65 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Thu, 11 Apr 2024 08:23:48 +0200 Subject: [PATCH 103/886] [mlir][Interfaces] `ValueBoundsOpInterface`: Add API to compare values (#86915) This commit adds a new public API to `ValueBoundsOpInterface` to compare values/dims. Supported comparison operators are: LT, LE, EQ, GE, GT. The new `ValueBoundsOpInterface::compare` API replaces and generalizes `ValueBoundsOpInterface::areEqual`. Not only does it provide additional comparison operators, it also works in cases where the difference between the two values/dims is non-constant. The previous implementation of `areEqual` used to compute a constant bound of `val1 - val2` (check if it `== 0` or `!= 0`). Note: This commit refactors, generalizes and adds a public API for value/dim comparison. The comparison functionality itself was introduced in #85895 and is already in use for analyzing `scf.if`. In the long term, this improvement will allow for a more powerful analysis of subset ops. A future commit will update `areOverlappingSlices` to use the new comparison API. (`areEquivalentSlices` is already using the new API.) This will improve subset equivalence/disjointness checks with non-constant offsets/sizes/strides. --- .../mlir/Interfaces/ValueBoundsOpInterface.h | 57 ++++- .../SCF/IR/ValueBoundsOpInterfaceImpl.cpp | 31 +-- .../lib/Interfaces/ValueBoundsOpInterface.cpp | 237 +++++++++++++----- .../value-bounds-op-interface-impl.mlir | 43 +++- .../SCF/value-bounds-op-interface-impl.mlir | 12 + .../value-bounds-op-interface-impl.mlir | 16 +- .../Dialect/Affine/TestReifyValueBounds.cpp | 79 +++++- 7 files changed, 361 insertions(+), 114 deletions(-) diff --git a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h index 3543ab52407a3..1d7bc6ea961cc 100644 --- a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h +++ b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h @@ -211,7 +211,8 @@ class ValueBoundsConstraintSet /// Comparison operator for `ValueBoundsConstraintSet::compare`. enum ComparisonOperator { LT, LE, EQ, GT, GE }; - /// Try to prove that, based on the current state of this constraint set + /// Populate constraints for lhs/rhs (until the stop condition is met). Then, + /// try to prove that, based on the current state of this constraint set /// (i.e., without analyzing additional IR or adding new constraints), the /// "lhs" value/dim is LE/LT/EQ/GT/GE than the "rhs" value/dim. /// @@ -220,24 +221,37 @@ class ValueBoundsConstraintSet /// proven. This could be because the specified relation does in fact not hold /// or because there is not enough information in the constraint set. In other /// words, if we do not know for sure, this function returns "false". - bool compare(Value lhs, std::optional lhsDim, ComparisonOperator cmp, - Value rhs, std::optional rhsDim); + bool populateAndCompare(OpFoldResult lhs, std::optional lhsDim, + ComparisonOperator cmp, OpFoldResult rhs, + std::optional rhsDim); + + /// Return "true" if "lhs cmp rhs" was proven to hold. Return "false" if the + /// specified relation could not be proven. This could be because the + /// specified relation does in fact not hold or because there is not enough + /// information in the constraint set. In other words, if we do not know for + /// sure, this function returns "false". + /// + /// This function keeps traversing the backward slice of lhs/rhs until could + /// prove the relation or until it ran out of IR. + static bool compare(OpFoldResult lhs, std::optional lhsDim, + ComparisonOperator cmp, OpFoldResult rhs, + std::optional rhsDim); + static bool compare(AffineMap lhs, ValueDimList lhsOperands, + ComparisonOperator cmp, AffineMap rhs, + ValueDimList rhsOperands); + static bool compare(AffineMap lhs, ArrayRef lhsOperands, + ComparisonOperator cmp, AffineMap rhs, + ArrayRef rhsOperands); /// Compute whether the given values/dimensions are equal. Return "failure" if /// equality could not be determined. /// /// `dim1`/`dim2` must be `nullopt` if and only if `value1`/`value2` are /// index-typed. - static FailureOr areEqual(Value value1, Value value2, + static FailureOr areEqual(OpFoldResult value1, OpFoldResult value2, std::optional dim1 = std::nullopt, std::optional dim2 = std::nullopt); - /// Compute whether the given values/attributes are equal. Return "failure" if - /// equality could not be determined. - /// - /// `ofr1`/`ofr2` must be of index type. - static FailureOr areEqual(OpFoldResult ofr1, OpFoldResult ofr2); - /// Return "true" if the given slices are guaranteed to be overlapping. /// Return "false" if the given slices are guaranteed to be non-overlapping. /// Return "failure" if unknown. @@ -294,6 +308,20 @@ class ValueBoundsConstraintSet ValueBoundsConstraintSet(MLIRContext *ctx, StopConditionFn stopCondition); + /// Return "true" if, based on the current state of the constraint system, + /// "lhs cmp rhs" was proven to hold. Return "false" if the specified relation + /// could not be proven. This could be because the specified relation does in + /// fact not hold or because there is not enough information in the constraint + /// set. In other words, if we do not know for sure, this function returns + /// "false". + /// + /// This function does not analyze any IR and does not populate any additional + /// constraints. + bool compareValueDims(OpFoldResult lhs, std::optional lhsDim, + ComparisonOperator cmp, OpFoldResult rhs, + std::optional rhsDim); + bool comparePos(int64_t lhsPos, ComparisonOperator cmp, int64_t rhsPos); + /// Given an affine map with a single result (and map operands), add a new /// column to the constraint set that represents the result of the map. /// Traverse additional IR starting from the map operands as needed (as long @@ -319,6 +347,10 @@ class ValueBoundsConstraintSet /// set. AffineExpr getPosExpr(int64_t pos); + /// Return "true" if the given value/dim is mapped (i.e., has a corresponding + /// column in the constraint system). + bool isMapped(Value value, std::optional dim = std::nullopt) const; + /// Insert a value/dimension into the constraint set. If `isSymbol` is set to /// "false", a dimension is added. The value/dimension is added to the /// worklist if `addToWorklist` is set. @@ -338,6 +370,11 @@ class ValueBoundsConstraintSet /// dimensions but not for symbols. int64_t insert(bool isSymbol = true); + /// Insert the given affine map and its bound operands as a new column in the + /// constraint system. Return the position of the new column. Any operands + /// that were not analyzed yet are put on the worklist. + int64_t insert(AffineMap map, ValueDimList operands, bool isSymbol = true); + /// Project out the given column in the constraint set. void projectOut(int64_t pos); diff --git a/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp index 72c5aaa230678..087ffc438a830 100644 --- a/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp @@ -58,20 +58,11 @@ struct ForOpInterface Value iterArg = forOp.getRegionIterArg(iterArgIdx); Value initArg = forOp.getInitArgs()[iterArgIdx]; - // Populate constraints for the yielded value. - cstr.populateConstraints(yieldedValue, dim); - // Populate constraints for the iter_arg. This is just to ensure that the - // iter_arg is mapped in the constraint set, which is a prerequisite for - // `compare`. It may lead to a recursive call to this function in case the - // iter_arg was not visited when the constraints for the yielded value were - // populated, but no additional work is done. - cstr.populateConstraints(iterArg, dim); - // An EQ constraint can be added if the yielded value (dimension size) // equals the corresponding block argument (dimension size). - if (cstr.compare(yieldedValue, dim, - ValueBoundsConstraintSet::ComparisonOperator::EQ, iterArg, - dim)) { + if (cstr.populateAndCompare( + yieldedValue, dim, ValueBoundsConstraintSet::ComparisonOperator::EQ, + iterArg, dim)) { if (dim.has_value()) { cstr.bound(value)[*dim] == cstr.getExpr(initArg, dim); } else { @@ -113,10 +104,6 @@ struct IfOpInterface Value thenValue = ifOp.thenYield().getResults()[resultNum]; Value elseValue = ifOp.elseYield().getResults()[resultNum]; - // Populate constraints for the yielded value (and all values on the - // backward slice, as long as the current stop condition is not satisfied). - cstr.populateConstraints(thenValue, dim); - cstr.populateConstraints(elseValue, dim); auto boundsBuilder = cstr.bound(value); if (dim) boundsBuilder[*dim]; @@ -125,9 +112,9 @@ struct IfOpInterface // If thenValue <= elseValue: // * result <= elseValue // * result >= thenValue - if (cstr.compare(thenValue, dim, - ValueBoundsConstraintSet::ComparisonOperator::LE, - elseValue, dim)) { + if (cstr.populateAndCompare( + thenValue, dim, ValueBoundsConstraintSet::ComparisonOperator::LE, + elseValue, dim)) { if (dim) { cstr.bound(value)[*dim] >= cstr.getExpr(thenValue, dim); cstr.bound(value)[*dim] <= cstr.getExpr(elseValue, dim); @@ -139,9 +126,9 @@ struct IfOpInterface // If elseValue <= thenValue: // * result <= thenValue // * result >= elseValue - if (cstr.compare(elseValue, dim, - ValueBoundsConstraintSet::ComparisonOperator::LE, - thenValue, dim)) { + if (cstr.populateAndCompare( + elseValue, dim, ValueBoundsConstraintSet::ComparisonOperator::LE, + thenValue, dim)) { if (dim) { cstr.bound(value)[*dim] >= cstr.getExpr(elseValue, dim); cstr.bound(value)[*dim] <= cstr.getExpr(thenValue, dim); diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp index 6e3d6dd3c7575..c138056ab41cc 100644 --- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp +++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp @@ -202,6 +202,28 @@ int64_t ValueBoundsConstraintSet::insert(bool isSymbol) { return pos; } +int64_t ValueBoundsConstraintSet::insert(AffineMap map, ValueDimList operands, + bool isSymbol) { + assert(map.getNumResults() == 1 && "expected affine map with one result"); + int64_t pos = insert(/*isSymbol=*/false); + + // Add map and operands to the constraint set. Dimensions are converted to + // symbols. All operands are added to the worklist (unless they were already + // processed). + auto mapper = [&](std::pair> v) { + return getExpr(v.first, v.second); + }; + SmallVector dimReplacements = llvm::to_vector( + llvm::map_range(ArrayRef(operands).take_front(map.getNumDims()), mapper)); + SmallVector symReplacements = llvm::to_vector( + llvm::map_range(ArrayRef(operands).drop_front(map.getNumDims()), mapper)); + addBound( + presburger::BoundType::EQ, pos, + map.getResult(0).replaceDimsAndSymbols(dimReplacements, symReplacements)); + + return pos; +} + int64_t ValueBoundsConstraintSet::getPos(Value value, std::optional dim) const { #ifndef NDEBUG @@ -224,6 +246,13 @@ AffineExpr ValueBoundsConstraintSet::getPosExpr(int64_t pos) { : builder.getAffineSymbolExpr(pos - cstr.getNumDimVars()); } +bool ValueBoundsConstraintSet::isMapped(Value value, + std::optional dim) const { + auto it = + valueDimToPosition.find(std::make_pair(value, dim.value_or(kIndexValue))); + return it != valueDimToPosition.end(); +} + static Operation *getOwnerOfValue(Value value) { if (auto bbArg = dyn_cast(value)) return bbArg.getOwner()->getParentOp(); @@ -560,27 +589,10 @@ void ValueBoundsConstraintSet::populateConstraints(Value value, int64_t ValueBoundsConstraintSet::populateConstraints(AffineMap map, ValueDimList operands) { - assert(map.getNumResults() == 1 && "expected affine map with one result"); - int64_t pos = insert(/*isSymbol=*/false); - - // Add map and operands to the constraint set. Dimensions are converted to - // symbols. All operands are added to the worklist (unless they were already - // processed). - auto mapper = [&](std::pair> v) { - return getExpr(v.first, v.second); - }; - SmallVector dimReplacements = llvm::to_vector( - llvm::map_range(ArrayRef(operands).take_front(map.getNumDims()), mapper)); - SmallVector symReplacements = llvm::to_vector( - llvm::map_range(ArrayRef(operands).drop_front(map.getNumDims()), mapper)); - addBound( - presburger::BoundType::EQ, pos, - map.getResult(0).replaceDimsAndSymbols(dimReplacements, symReplacements)); - + int64_t pos = insert(map, operands, /*isSymbol=*/false); // Process the backward slice of `operands` (i.e., reverse use-def chain) // until `stopCondition` is met. processWorklist(); - return pos; } @@ -600,9 +612,18 @@ ValueBoundsConstraintSet::computeConstantDelta(Value value1, Value value2, {{value1, dim1}, {value2, dim2}}); } -bool ValueBoundsConstraintSet::compare(Value lhs, std::optional lhsDim, - ComparisonOperator cmp, Value rhs, - std::optional rhsDim) { +bool ValueBoundsConstraintSet::compareValueDims(OpFoldResult lhs, + std::optional lhsDim, + ComparisonOperator cmp, + OpFoldResult rhs, + std::optional rhsDim) { +#ifndef NDEBUG + if (auto lhsVal = dyn_cast(lhs)) + assertValidValueDim(lhsVal, lhsDim); + if (auto rhsVal = dyn_cast(rhs)) + assertValidValueDim(rhsVal, rhsDim); +#endif // NDEBUG + // This function returns "true" if "lhs CMP rhs" is proven to hold. // // Example for ComparisonOperator::LE and index-typed values: We would like to @@ -621,24 +642,32 @@ bool ValueBoundsConstraintSet::compare(Value lhs, std::optional lhsDim, // EQ can be expressed as LE and GE. if (cmp == EQ) - return compare(lhs, lhsDim, ComparisonOperator::LE, rhs, rhsDim) && - compare(lhs, lhsDim, ComparisonOperator::GE, rhs, rhsDim); + return compareValueDims(lhs, lhsDim, ComparisonOperator::LE, rhs, rhsDim) && + compareValueDims(lhs, lhsDim, ComparisonOperator::GE, rhs, rhsDim); // Construct inequality. For the above example: lhs > rhs. // `IntegerRelation` inequalities are expressed in the "flattened" form and // with ">= 0". I.e., lhs - rhs - 1 >= 0. - SmallVector eq(cstr.getNumDimAndSymbolVars() + 1, 0); + SmallVector eq(cstr.getNumCols(), 0); + auto addToEq = [&](OpFoldResult ofr, std::optional dim, + int64_t factor) { + if (auto constVal = ::getConstantIntValue(ofr)) { + eq[cstr.getNumCols() - 1] += *constVal * factor; + } else { + eq[getPos(cast(ofr), dim)] += factor; + } + }; if (cmp == LT || cmp == LE) { - ++eq[getPos(lhs, lhsDim)]; - --eq[getPos(rhs, rhsDim)]; + addToEq(lhs, lhsDim, 1); + addToEq(rhs, rhsDim, -1); } else if (cmp == GT || cmp == GE) { - --eq[getPos(lhs, lhsDim)]; - ++eq[getPos(rhs, rhsDim)]; + addToEq(lhs, lhsDim, -1); + addToEq(rhs, rhsDim, 1); } else { llvm_unreachable("unsupported comparison operator"); } if (cmp == LE || cmp == GE) - eq[cstr.getNumDimAndSymbolVars()] -= 1; + eq[cstr.getNumCols() - 1] -= 1; // Add inequality to the constraint set and check if it made the constraint // set empty. @@ -649,40 +678,128 @@ bool ValueBoundsConstraintSet::compare(Value lhs, std::optional lhsDim, return isEmpty; } +bool ValueBoundsConstraintSet::comparePos(int64_t lhsPos, + ComparisonOperator cmp, + int64_t rhsPos) { + // This function returns "true" if "lhs CMP rhs" is proven to hold. For + // detailed documentation, see `compareValueDims`. + + // EQ can be expressed as LE and GE. + if (cmp == EQ) + return comparePos(lhsPos, ComparisonOperator::LE, rhsPos) && + comparePos(lhsPos, ComparisonOperator::GE, rhsPos); + + // Construct inequality. + SmallVector eq(cstr.getNumCols(), 0); + if (cmp == LT || cmp == LE) { + ++eq[lhsPos]; + --eq[rhsPos]; + } else if (cmp == GT || cmp == GE) { + --eq[lhsPos]; + ++eq[rhsPos]; + } else { + llvm_unreachable("unsupported comparison operator"); + } + if (cmp == LE || cmp == GE) + eq[cstr.getNumCols() - 1] -= 1; + + // Add inequality to the constraint set and check if it made the constraint + // set empty. + int64_t ineqPos = cstr.getNumInequalities(); + cstr.addInequality(eq); + bool isEmpty = cstr.isEmpty(); + cstr.removeInequality(ineqPos); + return isEmpty; +} + +bool ValueBoundsConstraintSet::populateAndCompare( + OpFoldResult lhs, std::optional lhsDim, ComparisonOperator cmp, + OpFoldResult rhs, std::optional rhsDim) { +#ifndef NDEBUG + if (auto lhsVal = dyn_cast(lhs)) + assertValidValueDim(lhsVal, lhsDim); + if (auto rhsVal = dyn_cast(rhs)) + assertValidValueDim(rhsVal, rhsDim); +#endif // NDEBUG + + if (auto lhsVal = dyn_cast(lhs)) + populateConstraints(lhsVal, lhsDim); + if (auto rhsVal = dyn_cast(rhs)) + populateConstraints(rhsVal, rhsDim); + + return compareValueDims(lhs, lhsDim, cmp, rhs, rhsDim); +} + +bool ValueBoundsConstraintSet::compare(OpFoldResult lhs, + std::optional lhsDim, + ComparisonOperator cmp, OpFoldResult rhs, + std::optional rhsDim) { + auto stopCondition = [&](Value v, std::optional dim, + ValueBoundsConstraintSet &cstr) { + // Keep processing as long as lhs/rhs are not mapped. + if (auto lhsVal = dyn_cast(lhs)) + if (!cstr.isMapped(lhsVal, dim)) + return false; + if (auto rhsVal = dyn_cast(rhs)) + if (!cstr.isMapped(rhsVal, dim)) + return false; + // Keep processing as long as the relation cannot be proven. + return cstr.compareValueDims(lhs, lhsDim, cmp, rhs, rhsDim); + }; + + ValueBoundsConstraintSet cstr(lhs.getContext(), stopCondition); + return cstr.populateAndCompare(lhs, lhsDim, cmp, rhs, rhsDim); +} + +bool ValueBoundsConstraintSet::compare(AffineMap lhs, ValueDimList lhsOperands, + ComparisonOperator cmp, AffineMap rhs, + ValueDimList rhsOperands) { + int64_t lhsPos = -1, rhsPos = -1; + auto stopCondition = [&](Value v, std::optional dim, + ValueBoundsConstraintSet &cstr) { + // Keep processing as long as lhs/rhs were not processed. + if (lhsPos >= cstr.positionToValueDim.size() || + rhsPos >= cstr.positionToValueDim.size()) + return false; + // Keep processing as long as the relation cannot be proven. + return cstr.comparePos(lhsPos, cmp, rhsPos); + }; + ValueBoundsConstraintSet cstr(lhs.getContext(), stopCondition); + lhsPos = cstr.insert(lhs, lhsOperands); + rhsPos = cstr.insert(rhs, rhsOperands); + cstr.processWorklist(); + return cstr.comparePos(lhsPos, cmp, rhsPos); +} + +bool ValueBoundsConstraintSet::compare(AffineMap lhs, + ArrayRef lhsOperands, + ComparisonOperator cmp, AffineMap rhs, + ArrayRef rhsOperands) { + ValueDimList lhsValueDimOperands = + llvm::map_to_vector(lhsOperands, [](Value v) { + return std::make_pair(v, std::optional()); + }); + ValueDimList rhsValueDimOperands = + llvm::map_to_vector(rhsOperands, [](Value v) { + return std::make_pair(v, std::optional()); + }); + return ValueBoundsConstraintSet::compare(lhs, lhsValueDimOperands, cmp, rhs, + rhsValueDimOperands); +} + FailureOr -ValueBoundsConstraintSet::areEqual(Value value1, Value value2, +ValueBoundsConstraintSet::areEqual(OpFoldResult value1, OpFoldResult value2, std::optional dim1, std::optional dim2) { - // Subtract the two values/dimensions from each other. If the result is 0, - // both are equal. - FailureOr delta = computeConstantDelta(value1, value2, dim1, dim2); - if (failed(delta)) - return failure(); - return *delta == 0; -} - -FailureOr ValueBoundsConstraintSet::areEqual(OpFoldResult ofr1, - OpFoldResult ofr2) { - Builder b(ofr1.getContext()); - AffineMap map = - AffineMap::get(/*dimCount=*/0, /*symbolCount=*/2, - b.getAffineSymbolExpr(0) - b.getAffineSymbolExpr(1)); - SmallVector ofrOperands; - ofrOperands.push_back(ofr1); - ofrOperands.push_back(ofr2); - SmallVector valueOperands; - AffineMap foldedMap = - foldAttributesIntoMap(b, map, ofrOperands, valueOperands); - ValueDimList valueDims; - for (Value v : valueOperands) { - assert(v.getType().isIndex() && "expected index type"); - valueDims.emplace_back(v, std::nullopt); - } - FailureOr delta = - computeConstantBound(presburger::BoundType::EQ, foldedMap, valueDims); - if (failed(delta)) - return failure(); - return *delta == 0; + if (ValueBoundsConstraintSet::compare(value1, dim1, ComparisonOperator::EQ, + value2, dim2)) + return true; + if (ValueBoundsConstraintSet::compare(value1, dim1, ComparisonOperator::LT, + value2, dim2) || + ValueBoundsConstraintSet::compare(value1, dim1, ComparisonOperator::GT, + value2, dim2)) + return false; + return failure(); } FailureOr diff --git a/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir index 55282e8334abd..10da91870f49d 100644 --- a/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir +++ b/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir @@ -79,6 +79,17 @@ func.func @composed_affine_apply(%i1 : index) -> (index) { } +// ----- + +func.func @are_equal(%i1 : index) { + %i2 = affine.apply affine_map<(d0) -> ((d0 floordiv 32) * 16)>(%i1) + %i3 = affine.apply affine_map<(d0) -> ((d0 floordiv 32) * 16 + 8)>(%i1) + %s = affine.apply affine_map<()[s0, s1] -> (s0 - s1)>()[%i2, %i3] + // expected-remark @below{{false}} + "test.compare"(%i2, %i3) : (index, index) -> () + return +} + // ----- // Test for affine::fullyComposeAndCheckIfEqual @@ -87,6 +98,36 @@ func.func @composed_are_equal(%i1 : index) { %i3 = affine.apply affine_map<(d0) -> ((d0 floordiv 32) * 16 + 8)>(%i1) %s = affine.apply affine_map<()[s0, s1] -> (s0 - s1)>()[%i2, %i3] // expected-remark @below{{different}} - "test.are_equal"(%i2, %i3) {compose} : (index, index) -> () + "test.compare"(%i2, %i3) {compose} : (index, index) -> () + return +} + +// ----- + +func.func @compare_affine_max(%a: index, %b: index) { + %0 = affine.max affine_map<()[s0, s1] -> (s0, s1)>()[%a, %b] + // expected-remark @below{{true}} + "test.compare"(%0, %a) {cmp = "GE"} : (index, index) -> () + // expected-error @below{{unknown}} + "test.compare"(%0, %a) {cmp = "GT"} : (index, index) -> () + // expected-remark @below{{false}} + "test.compare"(%0, %a) {cmp = "LT"} : (index, index) -> () + // expected-error @below{{unknown}} + "test.compare"(%0, %a) {cmp = "LE"} : (index, index) -> () + return +} + +// ----- + +func.func @compare_affine_min(%a: index, %b: index) { + %0 = affine.min affine_map<()[s0, s1] -> (s0, s1)>()[%a, %b] + // expected-error @below{{unknown}} + "test.compare"(%0, %a) {cmp = "GE"} : (index, index) -> () + // expected-remark @below{{false}} + "test.compare"(%0, %a) {cmp = "GT"} : (index, index) -> () + // expected-error @below{{unknown}} + "test.compare"(%0, %a) {cmp = "LT"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare"(%0, %a) {cmp = "LE"} : (index, index) -> () return } diff --git a/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir index 0ea06737886d4..9ab03da1c9a94 100644 --- a/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir +++ b/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir @@ -219,3 +219,15 @@ func.func @scf_if_eq(%a: index, %b: index, %c : i1) { "test.some_use"(%reify1) : (index) -> () return } + +// ----- + +func.func @compare_scf_for(%a: index, %b: index, %c: index) { + scf.for %iv = %a to %b step %c { + // expected-remark @below{{true}} + "test.compare"(%iv, %a) {cmp = "GE"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare"(%iv, %b) {cmp = "LT"} : (index, index) -> () + } + return +} diff --git a/mlir/test/Dialect/Tensor/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/Tensor/value-bounds-op-interface-impl.mlir index 45520da6aeb0b..0c90bcdb42028 100644 --- a/mlir/test/Dialect/Tensor/value-bounds-op-interface-impl.mlir +++ b/mlir/test/Dialect/Tensor/value-bounds-op-interface-impl.mlir @@ -163,8 +163,8 @@ func.func @dynamic_dims_are_equal(%t: tensor) { %c0 = arith.constant 0 : index %dim0 = tensor.dim %t, %c0 : tensor %dim1 = tensor.dim %t, %c0 : tensor - // expected-remark @below {{equal}} - "test.are_equal"(%dim0, %dim1) : (index, index) -> () + // expected-remark @below {{true}} + "test.compare"(%dim0, %dim1) : (index, index) -> () return } @@ -175,8 +175,8 @@ func.func @dynamic_dims_are_different(%t: tensor) { %c1 = arith.constant 1 : index %dim0 = tensor.dim %t, %c0 : tensor %val = arith.addi %dim0, %c1 : index - // expected-remark @below {{different}} - "test.are_equal"(%dim0, %val) : (index, index) -> () + // expected-remark @below {{false}} + "test.compare"(%dim0, %val) : (index, index) -> () return } @@ -186,8 +186,8 @@ func.func @dynamic_dims_are_maybe_equal_1(%t: tensor) { %c0 = arith.constant 0 : index %c5 = arith.constant 5 : index %dim0 = tensor.dim %t, %c0 : tensor - // expected-error @below {{could not determine equality}} - "test.are_equal"(%dim0, %c5) : (index, index) -> () + // expected-error @below {{unknown}} + "test.compare"(%dim0, %c5) : (index, index) -> () return } @@ -198,7 +198,7 @@ func.func @dynamic_dims_are_maybe_equal_2(%t: tensor) { %c1 = arith.constant 1 : index %dim0 = tensor.dim %t, %c0 : tensor %dim1 = tensor.dim %t, %c1 : tensor - // expected-error @below {{could not determine equality}} - "test.are_equal"(%dim0, %dim1) : (index, index) -> () + // expected-error @below {{unknown}} + "test.compare"(%dim0, %dim1) : (index, index) -> () return } diff --git a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp index 4b2b1a06341b7..f38631054fb3c 100644 --- a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp +++ b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp @@ -57,7 +57,7 @@ struct TestReifyValueBounds } // namespace -FailureOr parseBoundType(const std::string &type) { +static FailureOr parseBoundType(const std::string &type) { if (type == "EQ") return BoundType::EQ; if (type == "LB") @@ -67,6 +67,34 @@ FailureOr parseBoundType(const std::string &type) { return failure(); } +static FailureOr +parseComparisonOperator(const std::string &type) { + if (type == "EQ") + return ValueBoundsConstraintSet::ComparisonOperator::EQ; + if (type == "LT") + return ValueBoundsConstraintSet::ComparisonOperator::LT; + if (type == "LE") + return ValueBoundsConstraintSet::ComparisonOperator::LE; + if (type == "GT") + return ValueBoundsConstraintSet::ComparisonOperator::GT; + if (type == "GE") + return ValueBoundsConstraintSet::ComparisonOperator::GE; + return failure(); +} + +static ValueBoundsConstraintSet::ComparisonOperator +invertComparisonOperator(ValueBoundsConstraintSet::ComparisonOperator cmp) { + if (cmp == ValueBoundsConstraintSet::ComparisonOperator::LT) + return ValueBoundsConstraintSet::ComparisonOperator::GE; + if (cmp == ValueBoundsConstraintSet::ComparisonOperator::LE) + return ValueBoundsConstraintSet::ComparisonOperator::GT; + if (cmp == ValueBoundsConstraintSet::ComparisonOperator::GT) + return ValueBoundsConstraintSet::ComparisonOperator::LE; + if (cmp == ValueBoundsConstraintSet::ComparisonOperator::GE) + return ValueBoundsConstraintSet::ComparisonOperator::LT; + llvm_unreachable("unsupported comparison operator"); +} + /// Look for "test.reify_bound" ops in the input and replace their results with /// the reified values. static LogicalResult testReifyValueBounds(func::FuncOp funcOp, @@ -215,18 +243,34 @@ static LogicalResult testReifyValueBounds(func::FuncOp funcOp, return failure(result.wasInterrupted()); } -/// Look for "test.are_equal" ops and emit errors/remarks. +/// Look for "test.compare" ops and emit errors/remarks. static LogicalResult testEquality(func::FuncOp funcOp) { IRRewriter rewriter(funcOp.getContext()); WalkResult result = funcOp.walk([&](Operation *op) { - // Look for test.are_equal ops. - if (op->getName().getStringRef() == "test.are_equal") { + // Look for test.compare ops. + if (op->getName().getStringRef() == "test.compare") { if (op->getNumOperands() != 2 || !op->getOperand(0).getType().isIndex() || !op->getOperand(1).getType().isIndex()) { op->emitOpError("invalid op"); return WalkResult::skip(); } + + // Get comparison operator. + std::string cmpStr = "EQ"; + if (auto cmpAttr = op->getAttrOfType("cmp")) + cmpStr = cmpAttr.str(); + auto cmpType = parseComparisonOperator(cmpStr); + if (failed(cmpType)) { + op->emitOpError("invalid comparison operator"); + return WalkResult::interrupt(); + } + if (op->hasAttr("compose")) { + if (cmpType != ValueBoundsConstraintSet::EQ) { + op->emitOpError( + "comparison operator must be EQ when 'composed' is specified"); + return WalkResult::interrupt(); + } FailureOr delta = affine::fullyComposeAndComputeConstantDelta( op->getOperand(0), op->getOperand(1)); if (failed(delta)) { @@ -236,16 +280,25 @@ static LogicalResult testEquality(func::FuncOp funcOp) { } else { op->emitRemark("different"); } + return WalkResult::advance(); + } + + auto compare = [&](ValueBoundsConstraintSet::ComparisonOperator cmp) { + return ValueBoundsConstraintSet::compare( + /*lhs=*/op->getOperand(0), /*lhsDim=*/std::nullopt, cmp, + /*rhs=*/op->getOperand(1), /*rhsDim=*/std::nullopt); + }; + if (compare(*cmpType)) { + op->emitRemark("true"); + } else if (*cmpType != ValueBoundsConstraintSet::EQ && + compare(invertComparisonOperator(*cmpType))) { + op->emitRemark("false"); + } else if (*cmpType == ValueBoundsConstraintSet::EQ && + (compare(ValueBoundsConstraintSet::ComparisonOperator::LT) || + compare(ValueBoundsConstraintSet::ComparisonOperator::GT))) { + op->emitRemark("false"); } else { - FailureOr equal = ValueBoundsConstraintSet::areEqual( - op->getOperand(0), op->getOperand(1)); - if (failed(equal)) { - op->emitError("could not determine equality"); - } else if (*equal) { - op->emitRemark("equal"); - } else { - op->emitRemark("different"); - } + op->emitError("unknown"); } } return WalkResult::advance(); From 21265f692e4b3b2146b6095cf23122b20e8fa0ed Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Thu, 11 Apr 2024 08:27:12 +0200 Subject: [PATCH 104/886] [mlir][Interfaces] `ValueBoundsOpInterface`: Fix typo (#87976) This was likely a copy-and-paste typo. --- mlir/lib/Interfaces/ValueBoundsOpInterface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp index c138056ab41cc..fa66da4a0def9 100644 --- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp +++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp @@ -205,7 +205,7 @@ int64_t ValueBoundsConstraintSet::insert(bool isSymbol) { int64_t ValueBoundsConstraintSet::insert(AffineMap map, ValueDimList operands, bool isSymbol) { assert(map.getNumResults() == 1 && "expected affine map with one result"); - int64_t pos = insert(/*isSymbol=*/false); + int64_t pos = insert(isSymbol); // Add map and operands to the constraint set. Dimensions are converted to // symbols. All operands are added to the worklist (unless they were already From dc39028906ba4196c3ba544c43ef6b428cf47c51 Mon Sep 17 00:00:00 2001 From: Jie Fu Date: Thu, 11 Apr 2024 14:50:40 +0800 Subject: [PATCH 105/886] [mlir] Fix -Wsign-compare in ValueBoundsOpInterface.cpp (NFC) /llvm-project/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp:762:16: error: comparison of integers of different signs: 'int64_t' (aka 'long') and 'size_t' (aka 'unsigned long') [-Werror,-Wsign-compare] rhsPos >= cstr.positionToValueDim.size()) ~~~~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /llvm-project/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp:761:16: error: comparison of integers of different signs: 'int64_t' (aka 'long') and 'size_t' (aka 'unsigned long') [-Werror,-Wsign-compare] if (lhsPos >= cstr.positionToValueDim.size() || ~~~~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2 errors generated. --- mlir/lib/Interfaces/ValueBoundsOpInterface.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp index fa66da4a0def9..ffa4c0b55cad7 100644 --- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp +++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp @@ -758,8 +758,8 @@ bool ValueBoundsConstraintSet::compare(AffineMap lhs, ValueDimList lhsOperands, auto stopCondition = [&](Value v, std::optional dim, ValueBoundsConstraintSet &cstr) { // Keep processing as long as lhs/rhs were not processed. - if (lhsPos >= cstr.positionToValueDim.size() || - rhsPos >= cstr.positionToValueDim.size()) + if (size_t(lhsPos) >= cstr.positionToValueDim.size() || + size_t(rhsPos) >= cstr.positionToValueDim.size()) return false; // Keep processing as long as the relation cannot be proven. return cstr.comparePos(lhsPos, cmp, rhsPos); From 3f7f446d3803a699f5964a7429c6e1de0d783452 Mon Sep 17 00:00:00 2001 From: Haohai Wen Date: Thu, 11 Apr 2024 15:28:32 +0800 Subject: [PATCH 106/886] [llvm-profgen] Remove temporary perf script files (#86668) The temporary perf script files converted from perf data will occupy lots of space for large project. This patch removes them when llvm-profgen exits normally or receives signals. --- llvm/tools/llvm-profgen/PerfReader.cpp | 6 ++++++ llvm/tools/llvm-profgen/PerfReader.h | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index 878147642aa6e..e9442027aed3f 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -11,6 +11,7 @@ #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Process.h" +#include "llvm/Support/ToolOutputFile.h" #define DEBUG_TYPE "perf-reader" @@ -375,6 +376,9 @@ PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, StringRef(ErrorFile)}; // Stderr sys::ExecuteAndWait(PerfPath, ScriptMMapArgs, std::nullopt, Redirects); + PerfScriptReader::TempFileCleanups.emplace_back(PerfTraceFile); + PerfScriptReader::TempFileCleanups.emplace_back(ErrorFile); + // Collect the PIDs TraceStream TraceIt(PerfTraceFile); std::string PIDs; @@ -1220,5 +1224,7 @@ void PerfScriptReader::parsePerfTraces() { writeUnsymbolizedProfile(OutputFilename); } +SmallVector PerfScriptReader::TempFileCleanups; + } // end namespace sampleprof } // end namespace llvm diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h index e9f619350bf97..b821cbe13efae 100644 --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -21,6 +21,9 @@ using namespace llvm; using namespace sampleprof; namespace llvm { + +class CleanupInstaller; + namespace sampleprof { // Stream based trace line iterator @@ -604,6 +607,11 @@ class PerfScriptReader : public PerfReaderBase { // Extract perf script type by peaking at the input static PerfContent checkPerfScriptType(StringRef FileName); + // Cleanup installers for temporary files created by perf script command. + // Those files will be automatically removed when running destructor or + // receiving signals. + static SmallVector TempFileCleanups; + protected: // The parsed MMap event struct MMapEvent { From a53674359da8507af539bf879e1b8292e3720eb8 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 11 Apr 2024 08:45:28 +0100 Subject: [PATCH 107/886] [AArch64] Add ZIP and UZP shuffle costs. (#88150) This adds some costs for the shuffle instructions that should be lowered to zip1/zip2/uzp1/uzp2 instructions. --- .../Target/AArch64/AArch64ISelLowering.cpp | 29 -------- .../Target/AArch64/AArch64PerfectShuffle.h | 33 ++++++++++ .../AArch64/AArch64TargetTransformInfo.cpp | 10 +++ .../CostModel/AArch64/shuffle-other.ll | 66 +++++++++---------- .../CostModel/AArch64/shuffle-store.ll | 14 ++-- .../AArch64/vecreduce-shuffle.ll | 2 +- 6 files changed, 84 insertions(+), 70 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 744b2cdef504d..80181a77c9d23 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11851,35 +11851,6 @@ static bool isREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { return true; } -static bool isZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { - unsigned NumElts = VT.getVectorNumElements(); - if (NumElts % 2 != 0) - return false; - WhichResult = (M[0] == 0 ? 0 : 1); - unsigned Idx = WhichResult * NumElts / 2; - for (unsigned i = 0; i != NumElts; i += 2) { - if ((M[i] >= 0 && (unsigned)M[i] != Idx) || - (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts)) - return false; - Idx += 1; - } - - return true; -} - -static bool isUZPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { - unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - for (unsigned i = 0; i != NumElts; ++i) { - if (M[i] < 0) - continue; // ignore UNDEF indices - if ((unsigned)M[i] != 2 * i + WhichResult) - return false; - } - - return true; -} - static bool isTRNMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned NumElts = VT.getVectorNumElements(); if (NumElts % 2 != 0) diff --git a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h index 5846fd454b654..7abaead694d11 100644 --- a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h +++ b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h @@ -16,6 +16,8 @@ #include "llvm/ADT/ArrayRef.h" +namespace llvm { + // 31 entries have cost 0 // 756 entries have cost 1 // 3690 entries have cost 2 @@ -6618,4 +6620,35 @@ static unsigned getPerfectShuffleCost(llvm::ArrayRef M) { return (PFEntry >> 30) + 1; } +inline bool isZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + if (NumElts % 2 != 0) + return false; + WhichResult = (M[0] == 0 ? 0 : 1); + unsigned Idx = WhichResult * NumElts / 2; + for (unsigned i = 0; i != NumElts; i += 2) { + if ((M[i] >= 0 && (unsigned)M[i] != Idx) || + (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts)) + return false; + Idx += 1; + } + + return true; +} + +inline bool isUZPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i != NumElts; ++i) { + if (M[i] < 0) + continue; // ignore UNDEF indices + if ((unsigned)M[i] != 2 * i + WhichResult) + return false; + } + + return true; +} + +} // namespace llvm + #endif diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 20150d7386753..bd943de06b4b2 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3932,6 +3932,16 @@ InstructionCost AArch64TTIImpl::getShuffleCost( })) return 0; + // Check for other shuffles that are not SK_ kinds but we have native + // instructions for, for example ZIP and UZP. + unsigned Unused; + if (LT.second.isFixedLengthVector() && + LT.second.getVectorNumElements() == Mask.size() && + (Kind == TTI::SK_PermuteTwoSrc || Kind == TTI::SK_PermuteSingleSrc) && + (isZIPMask(Mask, LT.second, Unused) || + isUZPMask(Mask, LT.second, Unused))) + return 1; + if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose || Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc || Kind == TTI::SK_Reverse || Kind == TTI::SK_Splice) { diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll index d469ce6305932..6c45ebcb69f40 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll @@ -171,28 +171,28 @@ define void @zip() { ; CHECK-LABEL: 'zip' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip1v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip2v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %zipv2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %zip1v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %zip2v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %zipv4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %zip1v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %zip2v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %zipv8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %zip1v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %zip2v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %zipv16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zipv2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip1v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip2v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zipv4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip1v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip2v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zipv8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip1v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip2v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zipv16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip1v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip2v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zipv2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip1v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip2v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %zipv4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %zip1v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %zip2v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %zipv8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %zip1v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %zip2v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %zipv16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zipv4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip1v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip2v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zipv8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zip1v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zip2v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %zipv16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip1v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip2v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zipv2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> @@ -276,24 +276,24 @@ define void @zip() { define void @uzp() { ; CHECK-LABEL: 'uzp' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %uzp1v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %uzp2v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp1v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp2v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %uzpv4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %uzp1v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %uzp2v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp1v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp2v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %uzpv8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %uzp1v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %uzp2v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %uzpv16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp1v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp2v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %uzpv16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp1v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp2v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %uzpv4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %uzp1v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %uzp2v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %uzpv8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %uzp1v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %uzp2v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %uzpv16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp1v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp2v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %uzpv8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %uzp1v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %uzp2v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uzpv16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp1v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp2v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %uzpv4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> @@ -360,10 +360,10 @@ define void @uzp() { define void @multipart() { ; CHECK-LABEL: 'multipart' -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v16a = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16a = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16b = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v16c = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v16d = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16c = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16d = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32a = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32a4 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32idrev = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll index 12de334574f5c..cd434afddc9a9 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll @@ -5,21 +5,21 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @vst2(ptr %p) { ; CHECK-LABEL: 'vst2' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4i8, ptr %p, align 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8i8, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16i8, ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> %v32i8, ptr %p, align 32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v4i16, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v8i16, ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %v16i16, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> %v32i16, ptr %p, align 64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v4i32, ptr %p, align 16 diff --git a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll index c505cb7b181c2..d69cb75664a8c 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll @@ -416,7 +416,7 @@ define i16 @reduceshuffle_twoin_notlowelt_v16i16(<16 x i16> %a, <16 x i16> %b) { define i16 @reduceshuffle_twoin_uneven_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_uneven_v16i16( -; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> ; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], ; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]]) ; CHECK-NEXT: ret i16 [[R]] From 85bc6de67ef28cd203da0c5abc1485609bea989c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Wed, 10 Apr 2024 11:18:11 +0200 Subject: [PATCH 108/886] Revert "Use setup_host_tool for clang-ast-dump, fixes 76707" This reverts commit b4adb42151bbfa80be4cf6d076cbe5edf680693e. The original commit increased local rebuild times a lot. See the discussion in https://github.com/llvm/llvm-project/issues/76707 --- clang/lib/Tooling/CMakeLists.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/clang/lib/Tooling/CMakeLists.txt b/clang/lib/Tooling/CMakeLists.txt index 8b4ab0e212964..91e6cbdcbc44f 100644 --- a/clang/lib/Tooling/CMakeLists.txt +++ b/clang/lib/Tooling/CMakeLists.txt @@ -53,16 +53,14 @@ else() list(APPEND implicitDirs -I ${implicitDir}) endforeach() - setup_host_tool(clang-ast-dump CLANG_AST_DUMP clang_ast_dump_exe clang_ast_dump_target) - include(GetClangResourceDir) get_clang_resource_dir(resource_dir PREFIX ${LLVM_BINARY_DIR}) add_custom_command( COMMENT Generate ASTNodeAPI.json OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ASTNodeAPI.json - DEPENDS ${clang_ast_dump_target} clang-resource-headers + DEPENDS clang-ast-dump clang-resource-headers COMMAND - ${clang_ast_dump_exe} + $ # Skip this in debug mode because parsing AST.h is too slow --skip-processing=${skip_expensive_processing} -I ${resource_dir}/include From d7e0ea205fa111fba46e08f3df2860f76b47acb6 Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Thu, 11 Apr 2024 03:33:04 -0400 Subject: [PATCH 109/886] [PowerPC] add testcase for a xxinsertw bug, NFC --- llvm/test/CodeGen/PowerPC/xxinsertw.ll | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/xxinsertw.ll diff --git a/llvm/test/CodeGen/PowerPC/xxinsertw.ll b/llvm/test/CodeGen/PowerPC/xxinsertw.ll new file mode 100644 index 0000000000000..b48eac06a694a --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/xxinsertw.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-ibm-aix -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -stop-after=finalize-isel -verify-machineinstrs < %s | \ +; RUN: FileCheck %s + +define <4 x i1> @foo(i1 %c1, i1 %c2, i1 %c3) { + ; CHECK-LABEL: name: foo + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $x3, $x4, $x5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY $x5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:g8rc = COPY $x4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:g8rc = COPY $x3 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprc = COPY [[COPY1]].sub_32 + ; CHECK-NEXT: [[MTVSRWZ:%[0-9]+]]:vsfrc = MTVSRWZ killed [[COPY3]] + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:vrrc = SUBREG_TO_REG 1, killed [[MTVSRWZ]], %subreg.sub_64 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gprc = COPY [[COPY2]].sub_32 + ; CHECK-NEXT: [[MTVSRWZ1:%[0-9]+]]:vsfrc = MTVSRWZ killed [[COPY4]] + ; CHECK-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:vrrc = SUBREG_TO_REG 1, killed [[MTVSRWZ1]], %subreg.sub_64 + ; CHECK-NEXT: [[VMRGOW:%[0-9]+]]:vrrc = VMRGOW killed [[SUBREG_TO_REG1]], killed [[SUBREG_TO_REG]] + ; CHECK-NEXT: [[LDtocCPT:%[0-9]+]]:g8rc_and_g8rc_nox0 = LDtocCPT %const.0, $x2 :: (load (s64) from got) + ; CHECK-NEXT: [[LXV:%[0-9]+]]:vsrc = LXV 0, killed [[LDtocCPT]] :: (load (s128) from constant-pool) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gprc = COPY [[COPY]].sub_32 + ; CHECK-NEXT: [[MTVSRWZ2:%[0-9]+]]:vsfrc = MTVSRWZ killed [[COPY5]] + ; CHECK-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:vsrc = SUBREG_TO_REG 1, killed [[MTVSRWZ2]], %subreg.sub_64 + ; CHECK-NEXT: [[XXPERM:%[0-9]+]]:vsrc = XXPERM killed [[VMRGOW]], [[SUBREG_TO_REG2]], killed [[LXV]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vsrc = IMPLICIT_DEF + ; CHECK-NEXT: [[XXINSERTW:%[0-9]+]]:vsrc = XXINSERTW [[DEF]], killed [[XXPERM]], 8 + ; CHECK-NEXT: $v2 = COPY [[XXINSERTW]] + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $v2 + %1 = insertelement <4 x i1> poison, i1 %c1, i64 0 + %2 = insertelement <4 x i1> %1, i1 %c2, i64 1 + %3 = insertelement <4 x i1> %2, i1 %c3, i64 3 + %4 = shufflevector <4 x i1> %3, <4 x i1> poison, <4 x i32> + ret <4 x i1> %4 +} From 053750c3b42c126eb4620f62cbf4e665803b941d Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Thu, 11 Apr 2024 03:39:07 -0400 Subject: [PATCH 110/886] [PowerPC] Fix the undef register for VECINSERT If the V2 of the vector_shuffle is undef, the two vector inputs are expected to be the same when do the VECINSERT transformation. For now the first operand of VECINSERT is set to undef which is not right. This patch fixes this bug. --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 +++- llvm/test/CodeGen/PowerPC/xxinsertw.ll | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 43e4a34a9b348..52d5b71367059 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10142,7 +10142,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, if (Subtarget.hasP9Vector() && PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, isLittleEndian)) { - if (Swap) + if (V2.isUndef()) + V2 = V1; + else if (Swap) std::swap(V1, V2); SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2); diff --git a/llvm/test/CodeGen/PowerPC/xxinsertw.ll b/llvm/test/CodeGen/PowerPC/xxinsertw.ll index b48eac06a694a..f944b5a175be4 100644 --- a/llvm/test/CodeGen/PowerPC/xxinsertw.ll +++ b/llvm/test/CodeGen/PowerPC/xxinsertw.ll @@ -24,8 +24,7 @@ define <4 x i1> @foo(i1 %c1, i1 %c2, i1 %c3) { ; CHECK-NEXT: [[MTVSRWZ2:%[0-9]+]]:vsfrc = MTVSRWZ killed [[COPY5]] ; CHECK-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:vsrc = SUBREG_TO_REG 1, killed [[MTVSRWZ2]], %subreg.sub_64 ; CHECK-NEXT: [[XXPERM:%[0-9]+]]:vsrc = XXPERM killed [[VMRGOW]], [[SUBREG_TO_REG2]], killed [[LXV]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:vsrc = IMPLICIT_DEF - ; CHECK-NEXT: [[XXINSERTW:%[0-9]+]]:vsrc = XXINSERTW [[DEF]], killed [[XXPERM]], 8 + ; CHECK-NEXT: [[XXINSERTW:%[0-9]+]]:vsrc = XXINSERTW [[XXPERM]], [[XXPERM]], 8 ; CHECK-NEXT: $v2 = COPY [[XXINSERTW]] ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $v2 %1 = insertelement <4 x i1> poison, i1 %c1, i64 0 From a1cd5e69544ad3e6a865f5e0593ac26195ccb4f7 Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Thu, 11 Apr 2024 10:18:34 +0200 Subject: [PATCH 111/886] [flang] Do not create .f18.mod files for each compiled module (#85249) The default CMake scripts had a copy operation to copy a compiled `.mod` file to also be available with suffix `.f18.mod`. This seems no longer needed. Also updated ModFiles.md to point to `-module-suffix`. --------- Co-authored-by: Kiran Chandramohan --- flang/docs/ModFiles.md | 4 +++- flang/tools/f18/CMakeLists.txt | 7 ++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/flang/docs/ModFiles.md b/flang/docs/ModFiles.md index e55d72fa3a705..7463454c8563a 100644 --- a/flang/docs/ModFiles.md +++ b/flang/docs/ModFiles.md @@ -27,7 +27,9 @@ often use `rm *.mod` to clean up. The disadvantage of using the same name as other compilers is that it is not clear which compiler created a `.mod` file and files from multiple compilers cannot be in the same directory. This could be solved by adding something -between the module name and extension, e.g. `-f18.mod`. +between the module name and extension, e.g. `-f18.mod`. If this +is needed, Flang's fc1 accepts the option `-module-suffix` to alter the suffix +used for the module file. ## Format diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index e266055a4bf01..dda3b6887be89 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -62,11 +62,8 @@ if (NOT CMAKE_CROSSCOMPILING) ${FLANG_SOURCE_DIR}/module/${filename}.f90 DEPENDS flang-new ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends} ) - add_custom_command(OUTPUT ${base}.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}.mod ${base}.f18.mod) - list(APPEND MODULE_FILES ${base}.mod ${base}.f18.mod) - install(FILES ${base}.mod ${base}.f18.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang") + list(APPEND MODULE_FILES ${base}.mod) + install(FILES ${base}.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang") endforeach() # Special case for omp_lib.mod, because its source comes from openmp/runtime/src/include. From 9c6e54b154cbbb7da0f45b4ae1e66bcf492151f1 Mon Sep 17 00:00:00 2001 From: Maciej Gabka Date: Thu, 11 Apr 2024 09:32:17 +0100 Subject: [PATCH 112/886] [AArch64] Fix to Neoverse V2 scheduling model (#88130) The size of ROB was incorrecty copied from the Neoverse N2, while it has actually higher value as descibed in https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/arm-neoverse-v2-platform-best-in-class-cloud-and-ai-ml-performance --- llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td index 4d7f44e7b9b9a..7fed8fed90017 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -15,7 +15,7 @@ def NeoverseV2Model : SchedMachineModel { let IssueWidth = 16; // Micro-ops dispatched at a time. - let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer. NOTE: Copied from N2. + let MicroOpBufferSize = 320; // Entries in micro-op re-order buffer. let LoadLatency = 4; // Optimistic load latency. let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N2. let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. From cd14e7132f18dccd5fc7ed5e60258460bc1352f8 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 11 Apr 2024 09:36:56 +0100 Subject: [PATCH 113/886] lld/AArch64: handle more relocation addends (#87328) The function getImplicitAddend() is incomplete, as it is possible to cook up object files with other relocation addends. Although using llvm-mc or the clang integrated assembler does not produce such object files, a proprietary assembler known as armasm can: https://developer.arm.com/documentation/101754/0622/armasm-Legacy-Assembler-Reference armasm is in a frozen state, but it is still actively used in a lot of legacy codebases as the directives, macros and operators are very different from the clang integrated assembler. This makes porting a lot of legacy code from armasm syntax impractical for a lot of projects. Some internal testing of projects using open-source clang and lld fell over at link time when legacy armasm objects were included in the link. The goal of this patch is to enable people with legacy armasm objects to be able to use lld as the linker. Sadly armasm uses SHT_REL format relocations for AArch64 rather than SHT_RELA, which causes lld to reject the objects. As a frozen project we know the small number of relocations that the assembler officially supports and won't include (outside the equivalent of the .reloc directive which I think we can rule out of scope as that is not commonly used). The benefit to lld is that it will ease migration from a proprietary to an open-source toolchain. The drawback is the implementation of a small number of SHT_REL relocations. Although this patch doesn't aim to comprehensively cover all possible relocation addends, it does extend lld to work with the relocation addends that armasm produces, using the canonical aaelf64 document as a reference: https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst --- lld/ELF/Arch/AArch64.cpp | 42 +++++++++++++++---- .../ELF/aarch64-reloc-implicit-addend.test | 15 ++++++- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 017c17c2b03d8..2bf6e2c6c8519 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -69,6 +69,13 @@ struct AArch64Relaxer { }; } // namespace +// Return the bits [Start, End] from Val shifted Start bits. +// For instance, getBits(0xF0, 4, 8) returns 0xF. +static uint64_t getBits(uint64_t val, int start, int end) { + uint64_t mask = ((uint64_t)1 << (end + 1 - start)) - 1; + return (val >> start) & mask; +} + AArch64::AArch64() { copyRel = R_AARCH64_COPY; relativeRel = R_AARCH64_RELATIVE; @@ -219,6 +226,10 @@ int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const { case R_AARCH64_GLOB_DAT: case R_AARCH64_JUMP_SLOT: return 0; + case R_AARCH64_ABS16: + case R_AARCH64_PREL16: + return SignExtend64<16>(read16(buf)); + case R_AARCH64_ABS32: case R_AARCH64_PREL32: return SignExtend64<32>(read32(buf)); case R_AARCH64_ABS64: @@ -227,6 +238,30 @@ int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const { case R_AARCH64_IRELATIVE: case R_AARCH64_TLS_TPREL64: return read64(buf); + case R_AARCH64_MOVW_UABS_G0: + case R_AARCH64_MOVW_UABS_G0_NC: + return getBits(SignExtend64<16>(read16(buf)), 0, 15); + case R_AARCH64_MOVW_UABS_G1: + case R_AARCH64_MOVW_UABS_G1_NC: + return getBits(SignExtend64<32>(read32(buf)), 16, 31); + case R_AARCH64_MOVW_UABS_G2: + case R_AARCH64_MOVW_UABS_G2_NC: + return getBits(read64(buf), 32, 47); + case R_AARCH64_MOVW_UABS_G3: + return getBits(read64(buf), 48, 63); + case R_AARCH64_TSTBR14: + return getBits(SignExtend64<32>(read32(buf)), 2, 15); + case R_AARCH64_CONDBR19: + case R_AARCH64_LD_PREL_LO19: + return getBits(SignExtend64<32>(read32(buf)), 2, 20); + case R_AARCH64_ADD_ABS_LO12_NC: + return getBits(SignExtend64<16>(read16(buf)), 0, 11); + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_ADR_PREL_PG_HI21_NC: + return getBits(SignExtend64<32>(read32(buf)), 12, 32); + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + return getBits(SignExtend64<32>(read32(buf)), 2, 27); default: internalLinkerError(getErrorLocation(buf), "cannot read addend for relocation " + toString(type)); @@ -330,13 +365,6 @@ static void write32AArch64Addr(uint8_t *l, uint64_t imm) { write32le(l, (read32le(l) & ~mask) | immLo | immHi); } -// Return the bits [Start, End] from Val shifted Start bits. -// For instance, getBits(0xF0, 4, 8) returns 0xF. -static uint64_t getBits(uint64_t val, int start, int end) { - uint64_t mask = ((uint64_t)1 << (end + 1 - start)) - 1; - return (val >> start) & mask; -} - static void or32le(uint8_t *p, int32_t v) { write32le(p, read32le(p) | v); } // Update the immediate field in a AARCH64 ldr, str, and add instruction. diff --git a/lld/test/ELF/aarch64-reloc-implicit-addend.test b/lld/test/ELF/aarch64-reloc-implicit-addend.test index 15f42c4d87b57..804ed97a27371 100644 --- a/lld/test/ELF/aarch64-reloc-implicit-addend.test +++ b/lld/test/ELF/aarch64-reloc-implicit-addend.test @@ -1,8 +1,19 @@ ## Test certain REL relocation types generated by legacy armasm. # RUN: yaml2obj %s -o %t.o -# RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck %s +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -s %t | FileCheck %s -# CHECK-COUNT-17: internal linker error: cannot read addend +# CHECK: Contents of section .abs: +# CHECK-NEXT: [[#%x,]] 29002800 00002700 00000000 0000fcff ).(...'......... +# CHECK-NEXT: [[#%x,]] ffffffff ffff ...... +# CHECK-NEXT: Contents of section .uabs: +# CHECK-NEXT: [[#%x,]] 40ffffff 40ffffff 20ffffff 20ffffff @...@... ... ... +# CHECK-NEXT: [[#%x,]] 00ffffff 00ffffff ........ +# CHECK-NEXT: Contents of section .prel: +# CHECK-NEXT: [[#%x,]] 00ffffff fcfeffff f8feffff a0ffffff ................ +# CHECK-NEXT: [[#%x,]] 0010009f 0010009f ........ +# CHECK-NEXT: Contents of section .branch: +# CHECK-NEXT: [[#%x,]] f0ffffff f0ffffff fdffffff fcffff14 ................ --- !ELF From def6174d2a7a5a66b3871d4ce5035a71e513e6ef Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Thu, 11 Apr 2024 10:37:57 +0200 Subject: [PATCH 114/886] [BOLT] Emit empty FDE for injected functions This fixes an issue where `PatchEntries` overwrites function body but keeps CFI untouched. Existing FDEs thus become invalid. This doesn't affect unwinding because patched functions are transparent from EH/unwinding perspective, but it breaks BOLT during disassembling those functions. Emit empty FDE for injected functions (emitted to the same address as .org functions) that take precedence over the original FDE. This adds eh_frame overhead, but restores the ability to disassemble .org functions. Note that the overhead is avoided in `-use-old-text` mode. Test Plan: updated bolt/test/X86/patch-entries.test Reviewers: rafaelauler, maksfb, dcci, ayermolo Reviewed By: maksfb, dcci Pull Request: https://github.com/llvm/llvm-project/pull/87967 --- bolt/include/bolt/Core/BinaryFunction.h | 3 ++- bolt/test/X86/patch-entries.test | 23 ++++++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index bc047fefa3151..26d2d01f86267 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -1402,7 +1402,8 @@ class BinaryFunction { /// Return true if the function has CFI instructions bool hasCFI() const { - return !FrameInstructions.empty() || !CIEFrameInstructions.empty(); + return !FrameInstructions.empty() || !CIEFrameInstructions.empty() || + IsInjected; } /// Return unique number associated with the function. diff --git a/bolt/test/X86/patch-entries.test b/bolt/test/X86/patch-entries.test index 54f358f273e79..4a725412dd616 100644 --- a/bolt/test/X86/patch-entries.test +++ b/bolt/test/X86/patch-entries.test @@ -7,4 +7,25 @@ REQUIRES: system-linux RUN: %clang %cflags -no-pie -g %p/Inputs/patch-entries.c -fuse-ld=lld -o %t.exe \ RUN: -Wl,-q -I%p/../Inputs -RUN: llvm-bolt -relocs %t.exe -o %t.out --update-debug-sections --force-patch +RUN: llvm-bolt -relocs %t.exe -o %t.out --update-debug-sections --force-patch \ +RUN: --enable-bat + +# Check that patched functions can be disassembled (override FDE from the +# original function) +# PREAGG: B X:0 #foo.org.0# 1 0 +RUN: link_fdata %s %t.out %t.preagg PREAGG +RUN: perf2bolt %t.out -p %t.preagg --pa -o %t.yaml --profile-format=yaml \ +RUN: -print-disasm -print-only=foo.org.0/1 2>&1 | FileCheck %s +CHECK-NOT: BOLT-WARNING: sizes differ for function foo.org.0/1 +CHECK: Binary Function "foo.org.0/1(*2)" after disassembly { + +# Check the expected eh_frame contents +RUN: llvm-nm --print-size %t.out > %t.foo +RUN: llvm-objdump %t.out --dwarf=frames >> %t.foo +RUN: FileCheck %s --input-file %t.foo --check-prefix=CHECK-FOO +CHECK-FOO: 0000000000[[#%x,FOO:]] [[#%x,OPTSIZE:]] t foo +CHECK-FOO: 0000000000[[#%x,ORG:]] [[#%x,ORGSIZE:]] t foo.org.0 +# patched FDE comes first +CHECK-FOO: FDE {{.*}} pc=00[[#%x,ORG]]...00[[#%x,ORG+ORGSIZE]] +# original FDE comes second +CHECK-FOO: FDE {{.*}} pc=00[[#%x,ORG]]...00[[#%x,ORG+OPTSIZE]] From fe3b20d5ab4b47823fb48ad7cfbc47b8224ce826 Mon Sep 17 00:00:00 2001 From: NagyDonat Date: Thu, 11 Apr 2024 10:44:35 +0200 Subject: [PATCH 115/886] [analyzer] Use CDM::CLibrary instead of isGlobalCFunction() (#88267) This commit updates several checkers to use call descriptions with the matching mode `CDM::CLibrary` instead of checking `Call.isGlobalCFunction()` after performing the match. This resolves several TODOs in various checkers. Note that both matching with `CDM::CLibrary` and calling `isGlobalCFunction` leads to `CheckerContext::isCLibraryFunction()` checks (so this change is close to being NFC), but if it is used via the matching mode then the checker can automatically recognize the builtin variants of the matched functions. I'll also make similar changes in GenericTaintChecker, but that checker has separate and inconsistent rules for handling the normal and the builtin variant of several functions (e.g. `memcpy` and `__builtin_memcpy`), so I'll put those changes into a separate commit. --- .../Checkers/BasicObjCFoundationChecks.cpp | 12 +- .../Checkers/PthreadLockChecker.cpp | 115 +++++++++++------- .../Checkers/SimpleStreamChecker.cpp | 10 +- .../StaticAnalyzer/Checkers/StreamChecker.cpp | 69 +++++------ .../StaticAnalyzer/Checkers/ValistChecker.cpp | 35 +++--- 5 files changed, 130 insertions(+), 111 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp index c72a97cc01e91..80f128b917b20 100644 --- a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp @@ -542,10 +542,10 @@ namespace { class CFRetainReleaseChecker : public Checker { mutable APIMisuse BT{this, "null passed to CF memory management function"}; const CallDescriptionSet ModelledCalls = { - {{"CFRetain"}, 1}, - {{"CFRelease"}, 1}, - {{"CFMakeCollectable"}, 1}, - {{"CFAutorelease"}, 1}, + {CDM::CLibrary, {"CFRetain"}, 1}, + {CDM::CLibrary, {"CFRelease"}, 1}, + {CDM::CLibrary, {"CFMakeCollectable"}, 1}, + {CDM::CLibrary, {"CFAutorelease"}, 1}, }; public: @@ -555,10 +555,6 @@ class CFRetainReleaseChecker : public Checker { void CFRetainReleaseChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { - // TODO: Make this check part of CallDescription. - if (!Call.isGlobalCFunction()) - return; - // Check if we called CFRetain/CFRelease/CFMakeCollectable/CFAutorelease. if (!ModelledCalls.contains(Call)) return; diff --git a/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp index fa8572cf85edf..86530086ff1b2 100644 --- a/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp @@ -87,7 +87,8 @@ class PthreadLockChecker : public Checker PThreadCallbacks = { // Init. - {{{"pthread_mutex_init"}, 2}, &PthreadLockChecker::InitAnyLock}, + {{CDM::CLibrary, {"pthread_mutex_init"}, 2}, + &PthreadLockChecker::InitAnyLock}, // TODO: pthread_rwlock_init(2 arguments). // TODO: lck_mtx_init(3 arguments). // TODO: lck_mtx_alloc_init(2 arguments) => returns the mutex. @@ -95,74 +96,106 @@ class PthreadLockChecker : public Checker returns the mutex. // Acquire. - {{{"pthread_mutex_lock"}, 1}, &PthreadLockChecker::AcquirePthreadLock}, - {{{"pthread_rwlock_rdlock"}, 1}, &PthreadLockChecker::AcquirePthreadLock}, - {{{"pthread_rwlock_wrlock"}, 1}, &PthreadLockChecker::AcquirePthreadLock}, - {{{"lck_mtx_lock"}, 1}, &PthreadLockChecker::AcquireXNULock}, - {{{"lck_rw_lock_exclusive"}, 1}, &PthreadLockChecker::AcquireXNULock}, - {{{"lck_rw_lock_shared"}, 1}, &PthreadLockChecker::AcquireXNULock}, + {{CDM::CLibrary, {"pthread_mutex_lock"}, 1}, + &PthreadLockChecker::AcquirePthreadLock}, + {{CDM::CLibrary, {"pthread_rwlock_rdlock"}, 1}, + &PthreadLockChecker::AcquirePthreadLock}, + {{CDM::CLibrary, {"pthread_rwlock_wrlock"}, 1}, + &PthreadLockChecker::AcquirePthreadLock}, + {{CDM::CLibrary, {"lck_mtx_lock"}, 1}, + &PthreadLockChecker::AcquireXNULock}, + {{CDM::CLibrary, {"lck_rw_lock_exclusive"}, 1}, + &PthreadLockChecker::AcquireXNULock}, + {{CDM::CLibrary, {"lck_rw_lock_shared"}, 1}, + &PthreadLockChecker::AcquireXNULock}, // Try. - {{{"pthread_mutex_trylock"}, 1}, &PthreadLockChecker::TryPthreadLock}, - {{{"pthread_rwlock_tryrdlock"}, 1}, &PthreadLockChecker::TryPthreadLock}, - {{{"pthread_rwlock_trywrlock"}, 1}, &PthreadLockChecker::TryPthreadLock}, - {{{"lck_mtx_try_lock"}, 1}, &PthreadLockChecker::TryXNULock}, - {{{"lck_rw_try_lock_exclusive"}, 1}, &PthreadLockChecker::TryXNULock}, - {{{"lck_rw_try_lock_shared"}, 1}, &PthreadLockChecker::TryXNULock}, + {{CDM::CLibrary, {"pthread_mutex_trylock"}, 1}, + &PthreadLockChecker::TryPthreadLock}, + {{CDM::CLibrary, {"pthread_rwlock_tryrdlock"}, 1}, + &PthreadLockChecker::TryPthreadLock}, + {{CDM::CLibrary, {"pthread_rwlock_trywrlock"}, 1}, + &PthreadLockChecker::TryPthreadLock}, + {{CDM::CLibrary, {"lck_mtx_try_lock"}, 1}, + &PthreadLockChecker::TryXNULock}, + {{CDM::CLibrary, {"lck_rw_try_lock_exclusive"}, 1}, + &PthreadLockChecker::TryXNULock}, + {{CDM::CLibrary, {"lck_rw_try_lock_shared"}, 1}, + &PthreadLockChecker::TryXNULock}, // Release. - {{{"pthread_mutex_unlock"}, 1}, &PthreadLockChecker::ReleaseAnyLock}, - {{{"pthread_rwlock_unlock"}, 1}, &PthreadLockChecker::ReleaseAnyLock}, - {{{"lck_mtx_unlock"}, 1}, &PthreadLockChecker::ReleaseAnyLock}, - {{{"lck_rw_unlock_exclusive"}, 1}, &PthreadLockChecker::ReleaseAnyLock}, - {{{"lck_rw_unlock_shared"}, 1}, &PthreadLockChecker::ReleaseAnyLock}, - {{{"lck_rw_done"}, 1}, &PthreadLockChecker::ReleaseAnyLock}, + {{CDM::CLibrary, {"pthread_mutex_unlock"}, 1}, + &PthreadLockChecker::ReleaseAnyLock}, + {{CDM::CLibrary, {"pthread_rwlock_unlock"}, 1}, + &PthreadLockChecker::ReleaseAnyLock}, + {{CDM::CLibrary, {"lck_mtx_unlock"}, 1}, + &PthreadLockChecker::ReleaseAnyLock}, + {{CDM::CLibrary, {"lck_rw_unlock_exclusive"}, 1}, + &PthreadLockChecker::ReleaseAnyLock}, + {{CDM::CLibrary, {"lck_rw_unlock_shared"}, 1}, + &PthreadLockChecker::ReleaseAnyLock}, + {{CDM::CLibrary, {"lck_rw_done"}, 1}, + &PthreadLockChecker::ReleaseAnyLock}, // Destroy. - {{{"pthread_mutex_destroy"}, 1}, &PthreadLockChecker::DestroyPthreadLock}, - {{{"lck_mtx_destroy"}, 2}, &PthreadLockChecker::DestroyXNULock}, + {{CDM::CLibrary, {"pthread_mutex_destroy"}, 1}, + &PthreadLockChecker::DestroyPthreadLock}, + {{CDM::CLibrary, {"lck_mtx_destroy"}, 2}, + &PthreadLockChecker::DestroyXNULock}, // TODO: pthread_rwlock_destroy(1 argument). // TODO: lck_rw_destroy(2 arguments). }; CallDescriptionMap FuchsiaCallbacks = { // Init. - {{{"spin_lock_init"}, 1}, &PthreadLockChecker::InitAnyLock}, + {{CDM::CLibrary, {"spin_lock_init"}, 1}, + &PthreadLockChecker::InitAnyLock}, // Acquire. - {{{"spin_lock"}, 1}, &PthreadLockChecker::AcquirePthreadLock}, - {{{"spin_lock_save"}, 3}, &PthreadLockChecker::AcquirePthreadLock}, - {{{"sync_mutex_lock"}, 1}, &PthreadLockChecker::AcquirePthreadLock}, - {{{"sync_mutex_lock_with_waiter"}, 1}, + {{CDM::CLibrary, {"spin_lock"}, 1}, + &PthreadLockChecker::AcquirePthreadLock}, + {{CDM::CLibrary, {"spin_lock_save"}, 3}, + &PthreadLockChecker::AcquirePthreadLock}, + {{CDM::CLibrary, {"sync_mutex_lock"}, 1}, + &PthreadLockChecker::AcquirePthreadLock}, + {{CDM::CLibrary, {"sync_mutex_lock_with_waiter"}, 1}, &PthreadLockChecker::AcquirePthreadLock}, // Try. - {{{"spin_trylock"}, 1}, &PthreadLockChecker::TryFuchsiaLock}, - {{{"sync_mutex_trylock"}, 1}, &PthreadLockChecker::TryFuchsiaLock}, - {{{"sync_mutex_timedlock"}, 2}, &PthreadLockChecker::TryFuchsiaLock}, + {{CDM::CLibrary, {"spin_trylock"}, 1}, + &PthreadLockChecker::TryFuchsiaLock}, + {{CDM::CLibrary, {"sync_mutex_trylock"}, 1}, + &PthreadLockChecker::TryFuchsiaLock}, + {{CDM::CLibrary, {"sync_mutex_timedlock"}, 2}, + &PthreadLockChecker::TryFuchsiaLock}, // Release. - {{{"spin_unlock"}, 1}, &PthreadLockChecker::ReleaseAnyLock}, - {{{"spin_unlock_restore"}, 3}, &PthreadLockChecker::ReleaseAnyLock}, - {{{"sync_mutex_unlock"}, 1}, &PthreadLockChecker::ReleaseAnyLock}, + {{CDM::CLibrary, {"spin_unlock"}, 1}, + &PthreadLockChecker::ReleaseAnyLock}, + {{CDM::CLibrary, {"spin_unlock_restore"}, 3}, + &PthreadLockChecker::ReleaseAnyLock}, + {{CDM::CLibrary, {"sync_mutex_unlock"}, 1}, + &PthreadLockChecker::ReleaseAnyLock}, }; CallDescriptionMap C11Callbacks = { // Init. - {{{"mtx_init"}, 2}, &PthreadLockChecker::InitAnyLock}, + {{CDM::CLibrary, {"mtx_init"}, 2}, &PthreadLockChecker::InitAnyLock}, // Acquire. - {{{"mtx_lock"}, 1}, &PthreadLockChecker::AcquirePthreadLock}, + {{CDM::CLibrary, {"mtx_lock"}, 1}, + &PthreadLockChecker::AcquirePthreadLock}, // Try. - {{{"mtx_trylock"}, 1}, &PthreadLockChecker::TryC11Lock}, - {{{"mtx_timedlock"}, 2}, &PthreadLockChecker::TryC11Lock}, + {{CDM::CLibrary, {"mtx_trylock"}, 1}, &PthreadLockChecker::TryC11Lock}, + {{CDM::CLibrary, {"mtx_timedlock"}, 2}, &PthreadLockChecker::TryC11Lock}, // Release. - {{{"mtx_unlock"}, 1}, &PthreadLockChecker::ReleaseAnyLock}, + {{CDM::CLibrary, {"mtx_unlock"}, 1}, &PthreadLockChecker::ReleaseAnyLock}, // Destroy - {{{"mtx_destroy"}, 1}, &PthreadLockChecker::DestroyPthreadLock}, + {{CDM::CLibrary, {"mtx_destroy"}, 1}, + &PthreadLockChecker::DestroyPthreadLock}, }; ProgramStateRef resolvePossiblyDestroyedMutex(ProgramStateRef state, @@ -258,13 +291,9 @@ REGISTER_MAP_WITH_PROGRAMSTATE(DestroyRetVal, const MemRegion *, SymbolRef) void PthreadLockChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const { - // An additional umbrella check that all functions modeled by this checker - // are global C functions. - // TODO: Maybe make this the default behavior of CallDescription - // with exactly one identifier? // FIXME: Try to handle cases when the implementation was inlined rather // than just giving up. - if (!Call.isGlobalCFunction() || C.wasInlined) + if (C.wasInlined) return; if (const FnCheck *Callback = PThreadCallbacks.lookup(Call)) diff --git a/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp index 50d50562d3e75..5152624d00f46 100644 --- a/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp @@ -52,8 +52,8 @@ class SimpleStreamChecker : public Checker { - const CallDescription OpenFn{{"fopen"}, 2}; - const CallDescription CloseFn{{"fclose"}, 1}; + const CallDescription OpenFn{CDM::CLibrary, {"fopen"}, 2}; + const CallDescription CloseFn{CDM::CLibrary, {"fclose"}, 1}; const BugType DoubleCloseBugType{this, "Double fclose", "Unix Stream API Error"}; @@ -92,9 +92,6 @@ REGISTER_MAP_WITH_PROGRAMSTATE(StreamMap, SymbolRef, StreamState) void SimpleStreamChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const { - if (!Call.isGlobalCFunction()) - return; - if (!OpenFn.matches(Call)) return; @@ -111,9 +108,6 @@ void SimpleStreamChecker::checkPostCall(const CallEvent &Call, void SimpleStreamChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { - if (!Call.isGlobalCFunction()) - return; - if (!CloseFn.matches(Call)) return; diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp index 31c756ab0c581..bd495cd0f9710 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -302,85 +302,88 @@ class StreamChecker : public Checker FnDescriptions = { - {{{"fopen"}, 2}, {nullptr, &StreamChecker::evalFopen, ArgNone}}, - {{{"fdopen"}, 2}, {nullptr, &StreamChecker::evalFopen, ArgNone}}, - {{{"freopen"}, 3}, + {{CDM::CLibrary, {"fopen"}, 2}, + {nullptr, &StreamChecker::evalFopen, ArgNone}}, + {{CDM::CLibrary, {"fdopen"}, 2}, + {nullptr, &StreamChecker::evalFopen, ArgNone}}, + {{CDM::CLibrary, {"freopen"}, 3}, {&StreamChecker::preFreopen, &StreamChecker::evalFreopen, 2}}, - {{{"tmpfile"}, 0}, {nullptr, &StreamChecker::evalFopen, ArgNone}}, - {{{"fclose"}, 1}, + {{CDM::CLibrary, {"tmpfile"}, 0}, + {nullptr, &StreamChecker::evalFopen, ArgNone}}, + {{CDM::CLibrary, {"fclose"}, 1}, {&StreamChecker::preDefault, &StreamChecker::evalFclose, 0}}, - {{{"fread"}, 4}, + {{CDM::CLibrary, {"fread"}, 4}, {&StreamChecker::preRead, std::bind(&StreamChecker::evalFreadFwrite, _1, _2, _3, _4, true), 3}}, - {{{"fwrite"}, 4}, + {{CDM::CLibrary, {"fwrite"}, 4}, {&StreamChecker::preWrite, std::bind(&StreamChecker::evalFreadFwrite, _1, _2, _3, _4, false), 3}}, - {{{"fgetc"}, 1}, + {{CDM::CLibrary, {"fgetc"}, 1}, {&StreamChecker::preRead, std::bind(&StreamChecker::evalFgetx, _1, _2, _3, _4, true), 0}}, - {{{"fgets"}, 3}, + {{CDM::CLibrary, {"fgets"}, 3}, {&StreamChecker::preRead, std::bind(&StreamChecker::evalFgetx, _1, _2, _3, _4, false), 2}}, - {{{"getc"}, 1}, + {{CDM::CLibrary, {"getc"}, 1}, {&StreamChecker::preRead, std::bind(&StreamChecker::evalFgetx, _1, _2, _3, _4, true), 0}}, - {{{"fputc"}, 2}, + {{CDM::CLibrary, {"fputc"}, 2}, {&StreamChecker::preWrite, std::bind(&StreamChecker::evalFputx, _1, _2, _3, _4, true), 1}}, - {{{"fputs"}, 2}, + {{CDM::CLibrary, {"fputs"}, 2}, {&StreamChecker::preWrite, std::bind(&StreamChecker::evalFputx, _1, _2, _3, _4, false), 1}}, - {{{"putc"}, 2}, + {{CDM::CLibrary, {"putc"}, 2}, {&StreamChecker::preWrite, std::bind(&StreamChecker::evalFputx, _1, _2, _3, _4, true), 1}}, - {{{"fprintf"}}, + {{CDM::CLibrary, {"fprintf"}}, {&StreamChecker::preWrite, std::bind(&StreamChecker::evalFprintf, _1, _2, _3, _4), 0}}, - {{{"vfprintf"}, 3}, + {{CDM::CLibrary, {"vfprintf"}, 3}, {&StreamChecker::preWrite, std::bind(&StreamChecker::evalFprintf, _1, _2, _3, _4), 0}}, - {{{"fscanf"}}, + {{CDM::CLibrary, {"fscanf"}}, {&StreamChecker::preRead, std::bind(&StreamChecker::evalFscanf, _1, _2, _3, _4), 0}}, - {{{"vfscanf"}, 3}, + {{CDM::CLibrary, {"vfscanf"}, 3}, {&StreamChecker::preRead, std::bind(&StreamChecker::evalFscanf, _1, _2, _3, _4), 0}}, - {{{"ungetc"}, 2}, + {{CDM::CLibrary, {"ungetc"}, 2}, {&StreamChecker::preWrite, std::bind(&StreamChecker::evalUngetc, _1, _2, _3, _4), 1}}, - {{{"getdelim"}, 4}, + {{CDM::CLibrary, {"getdelim"}, 4}, {&StreamChecker::preRead, std::bind(&StreamChecker::evalGetdelim, _1, _2, _3, _4), 3}}, - {{{"getline"}, 3}, + {{CDM::CLibrary, {"getline"}, 3}, {&StreamChecker::preRead, std::bind(&StreamChecker::evalGetdelim, _1, _2, _3, _4), 2}}, - {{{"fseek"}, 3}, + {{CDM::CLibrary, {"fseek"}, 3}, {&StreamChecker::preFseek, &StreamChecker::evalFseek, 0}}, - {{{"fseeko"}, 3}, + {{CDM::CLibrary, {"fseeko"}, 3}, {&StreamChecker::preFseek, &StreamChecker::evalFseek, 0}}, - {{{"ftell"}, 1}, + {{CDM::CLibrary, {"ftell"}, 1}, {&StreamChecker::preWrite, &StreamChecker::evalFtell, 0}}, - {{{"ftello"}, 1}, + {{CDM::CLibrary, {"ftello"}, 1}, {&StreamChecker::preWrite, &StreamChecker::evalFtell, 0}}, - {{{"fflush"}, 1}, + {{CDM::CLibrary, {"fflush"}, 1}, {&StreamChecker::preFflush, &StreamChecker::evalFflush, 0}}, - {{{"rewind"}, 1}, + {{CDM::CLibrary, {"rewind"}, 1}, {&StreamChecker::preDefault, &StreamChecker::evalRewind, 0}}, - {{{"fgetpos"}, 2}, + {{CDM::CLibrary, {"fgetpos"}, 2}, {&StreamChecker::preWrite, &StreamChecker::evalFgetpos, 0}}, - {{{"fsetpos"}, 2}, + {{CDM::CLibrary, {"fsetpos"}, 2}, {&StreamChecker::preDefault, &StreamChecker::evalFsetpos, 0}}, - {{{"clearerr"}, 1}, + {{CDM::CLibrary, {"clearerr"}, 1}, {&StreamChecker::preDefault, &StreamChecker::evalClearerr, 0}}, - {{{"feof"}, 1}, + {{CDM::CLibrary, {"feof"}, 1}, {&StreamChecker::preDefault, std::bind(&StreamChecker::evalFeofFerror, _1, _2, _3, _4, ErrorFEof), 0}}, - {{{"ferror"}, 1}, + {{CDM::CLibrary, {"ferror"}, 1}, {&StreamChecker::preDefault, std::bind(&StreamChecker::evalFeofFerror, _1, _2, _3, _4, ErrorFError), 0}}, - {{{"fileno"}, 1}, + {{CDM::CLibrary, {"fileno"}, 1}, {&StreamChecker::preDefault, &StreamChecker::evalFileno, 0}}, }; @@ -540,8 +543,6 @@ class StreamChecker : public CheckergetType(); if (!T->isIntegralOrEnumerationType() && !T->isPointerType() && diff --git a/clang/lib/StaticAnalyzer/Checkers/ValistChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ValistChecker.cpp index 2d1b873abf73f..28320f46f237a 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ValistChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ValistChecker.cpp @@ -100,32 +100,31 @@ class ValistChecker : public Checker, }; const SmallVector - ValistChecker::VAListAccepters = {{{{"vfprintf"}, 3}, 2}, - {{{"vfscanf"}, 3}, 2}, - {{{"vprintf"}, 2}, 1}, - {{{"vscanf"}, 2}, 1}, - {{{"vsnprintf"}, 4}, 3}, - {{{"vsprintf"}, 3}, 2}, - {{{"vsscanf"}, 3}, 2}, - {{{"vfwprintf"}, 3}, 2}, - {{{"vfwscanf"}, 3}, 2}, - {{{"vwprintf"}, 2}, 1}, - {{{"vwscanf"}, 2}, 1}, - {{{"vswprintf"}, 4}, 3}, + ValistChecker::VAListAccepters = {{{CDM::CLibrary, {"vfprintf"}, 3}, 2}, + {{CDM::CLibrary, {"vfscanf"}, 3}, 2}, + {{CDM::CLibrary, {"vprintf"}, 2}, 1}, + {{CDM::CLibrary, {"vscanf"}, 2}, 1}, + {{CDM::CLibrary, {"vsnprintf"}, 4}, 3}, + {{CDM::CLibrary, {"vsprintf"}, 3}, 2}, + {{CDM::CLibrary, {"vsscanf"}, 3}, 2}, + {{CDM::CLibrary, {"vfwprintf"}, 3}, 2}, + {{CDM::CLibrary, {"vfwscanf"}, 3}, 2}, + {{CDM::CLibrary, {"vwprintf"}, 2}, 1}, + {{CDM::CLibrary, {"vwscanf"}, 2}, 1}, + {{CDM::CLibrary, {"vswprintf"}, 4}, 3}, // vswprintf is the wide version of // vsnprintf, vsprintf has no wide version - {{{"vswscanf"}, 3}, 2}}; + {{CDM::CLibrary, {"vswscanf"}, 3}, 2}}; -const CallDescription ValistChecker::VaStart({"__builtin_va_start"}, /*Args=*/2, +const CallDescription ValistChecker::VaStart(CDM::CLibrary, + {"__builtin_va_start"}, /*Args=*/2, /*Params=*/1), - ValistChecker::VaCopy({"__builtin_va_copy"}, 2), - ValistChecker::VaEnd({"__builtin_va_end"}, 1); + ValistChecker::VaCopy(CDM::CLibrary, {"__builtin_va_copy"}, 2), + ValistChecker::VaEnd(CDM::CLibrary, {"__builtin_va_end"}, 1); } // end anonymous namespace void ValistChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { - if (!Call.isGlobalCFunction()) - return; if (VaStart.matches(Call)) checkVAListStartCall(Call, C, false); else if (VaCopy.matches(Call)) From 5c9315f575370393ccc89ef0229743c05f6fe703 Mon Sep 17 00:00:00 2001 From: Johannes Reifferscheid Date: Thu, 11 Apr 2024 10:54:50 +0200 Subject: [PATCH 116/886] Fix complex log1p accuracy with large abs values. (#88364) This ports openxla/xla#10503 by @pearu. In addition to the filecheck test here, the accuracy was tested with XLA's complex_unary_op_test and its MLIR emitters. This is a fixed version of https://github.com/llvm/llvm-project/pull/88260. The previous version relied on implementation-specific behavior in the order of evaluation of maxAbsOfRealPlusOneAndImagMinusOne's operands. --- .../ComplexToStandard/ComplexToStandard.cpp | 51 ++++++++++--------- .../convert-to-standard.mlir | 48 ++++++++++------- 2 files changed, 58 insertions(+), 41 deletions(-) diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp index 9c3c4d96a301e..a6fcf6a758c07 100644 --- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp +++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp @@ -570,37 +570,40 @@ struct Log1pOpConversion : public OpConversionPattern { ConversionPatternRewriter &rewriter) const override { auto type = cast(adaptor.getComplex().getType()); auto elementType = cast(type.getElementType()); - arith::FastMathFlagsAttr fmf = op.getFastMathFlagsAttr(); + arith::FastMathFlags fmf = op.getFastMathFlagsAttr().getValue(); mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter); - Value real = b.create(elementType, adaptor.getComplex()); - Value imag = b.create(elementType, adaptor.getComplex()); + Value real = b.create(adaptor.getComplex()); + Value imag = b.create(adaptor.getComplex()); Value half = b.create(elementType, b.getFloatAttr(elementType, 0.5)); Value one = b.create(elementType, b.getFloatAttr(elementType, 1)); - Value two = b.create(elementType, - b.getFloatAttr(elementType, 2)); - - // log1p(a+bi) = .5*log((a+1)^2+b^2) + i*atan2(b, a + 1) - // log((a+1)+bi) = .5*log(a*a + 2*a + 1 + b*b) + i*atan2(b, a+1) - // log((a+1)+bi) = .5*log1p(a*a + 2*a + b*b) + i*atan2(b, a+1) - Value sumSq = b.create(real, real, fmf.getValue()); - sumSq = b.create( - sumSq, b.create(real, two, fmf.getValue()), - fmf.getValue()); - sumSq = b.create( - sumSq, b.create(imag, imag, fmf.getValue()), - fmf.getValue()); - Value logSumSq = - b.create(elementType, sumSq, fmf.getValue()); - Value resultReal = b.create(logSumSq, half, fmf.getValue()); - - Value realPlusOne = b.create(real, one, fmf.getValue()); - - Value resultImag = - b.create(elementType, imag, realPlusOne, fmf.getValue()); + Value realPlusOne = b.create(real, one, fmf); + Value absRealPlusOne = b.create(realPlusOne, fmf); + Value absImag = b.create(imag, fmf); + + Value maxAbs = b.create(absRealPlusOne, absImag, fmf); + Value minAbs = b.create(absRealPlusOne, absImag, fmf); + + Value useReal = b.create(arith::CmpFPredicate::OGT, + realPlusOne, absImag, fmf); + Value maxMinusOne = b.create(maxAbs, one, fmf); + Value maxAbsOfRealPlusOneAndImagMinusOne = + b.create(useReal, real, maxMinusOne); + Value minMaxRatio = b.create(minAbs, maxAbs, fmf); + Value logOfMaxAbsOfRealPlusOneAndImag = + b.create(maxAbsOfRealPlusOneAndImagMinusOne, fmf); + Value logOfSqrtPart = b.create( + b.create(minMaxRatio, minMaxRatio, fmf), fmf); + Value r = b.create( + b.create(half, logOfSqrtPart, fmf), + logOfMaxAbsOfRealPlusOneAndImag, fmf); + Value resultReal = b.create( + b.create(arith::CmpFPredicate::UNO, r, r, fmf), minAbs, + r); + Value resultImag = b.create(imag, realPlusOne, fmf); rewriter.replaceOpWithNewOp(op, type, resultReal, resultImag); return success(); diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir index f5d9499eadda4..46dba04a88aa0 100644 --- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir +++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir @@ -300,15 +300,22 @@ func.func @complex_log1p(%arg: complex) -> complex { // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex // CHECK: %[[ONE_HALF:.*]] = arith.constant 5.000000e-01 : f32 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 -// CHECK: %[[TWO:.*]] = arith.constant 2.000000e+00 : f32 -// CHECK: %[[SQ_SUM_0:.*]] = arith.mulf %[[REAL]], %[[REAL]] : f32 -// CHECK: %[[TWO_REAL:.*]] = arith.mulf %[[REAL]], %[[TWO]] : f32 -// CHECK: %[[SQ_SUM_1:.*]] = arith.addf %[[SQ_SUM_0]], %[[TWO_REAL]] : f32 -// CHECK: %[[SQ_IMAG:.*]] = arith.mulf %[[IMAG]], %[[IMAG]] : f32 -// CHECK: %[[SQ_SUM_2:.*]] = arith.addf %[[SQ_SUM_1]], %[[SQ_IMAG]] : f32 -// CHECK: %[[LOG_SQ_SUM:.*]] = math.log1p %[[SQ_SUM_2]] : f32 -// CHECK: %[[RESULT_REAL:.*]] = arith.mulf %[[LOG_SQ_SUM]], %[[ONE_HALF]] : f32 // CHECK: %[[REAL_PLUS_ONE:.*]] = arith.addf %[[REAL]], %[[ONE]] : f32 +// CHECK: %[[ABS_REAL_PLUS_ONE:.*]] = math.absf %[[REAL_PLUS_ONE]] : f32 +// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] : f32 +// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] : f32 +// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] : f32 +// CHECK: %[[CMPF:.*]] = arith.cmpf ogt, %[[REAL_PLUS_ONE]], %[[ABS_IMAG]] : f32 +// CHECK: %[[MAX_MINUS_ONE:.*]] = arith.subf %[[MAX]], %[[ONE]] : f32 +// CHECK: %[[SELECT:.*]] = arith.select %[[CMPF]], %0, %[[MAX_MINUS_ONE]] : f32 +// CHECK: %[[MIN_MAX_RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] : f32 +// CHECK: %[[LOG_1:.*]] = math.log1p %[[SELECT]] : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[MIN_MAX_RATIO]], %[[MIN_MAX_RATIO]] : f32 +// CHECK: %[[LOG_SQ:.*]] = math.log1p %[[RATIO_SQ]] : f32 +// CHECK: %[[HALF_LOG_SQ:.*]] = arith.mulf %cst, %[[LOG_SQ]] : f32 +// CHECK: %[[R:.*]] = arith.addf %[[HALF_LOG_SQ]], %[[LOG_1]] : f32 +// CHECK: %[[ISNAN:.*]] = arith.cmpf uno, %[[R]], %[[R]] : f32 +// CHECK: %[[RESULT_REAL:.*]] = arith.select %[[ISNAN]], %[[MIN]], %[[R]] : f32 // CHECK: %[[RESULT_IMAG:.*]] = math.atan2 %[[IMAG]], %[[REAL_PLUS_ONE]] : f32 // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex @@ -963,15 +970,22 @@ func.func @complex_log1p_with_fmf(%arg: complex) -> complex { // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex // CHECK: %[[ONE_HALF:.*]] = arith.constant 5.000000e-01 : f32 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 -// CHECK: %[[TWO:.*]] = arith.constant 2.000000e+00 : f32 -// CHECK: %[[SQ_SUM_0:.*]] = arith.mulf %[[REAL]], %[[REAL]] fastmath : f32 -// CHECK: %[[TWO_REAL:.*]] = arith.mulf %[[REAL]], %[[TWO]] fastmath : f32 -// CHECK: %[[SQ_SUM_1:.*]] = arith.addf %[[SQ_SUM_0]], %[[TWO_REAL]] fastmath : f32 -// CHECK: %[[SQ_IMAG:.*]] = arith.mulf %[[IMAG]], %[[IMAG]] fastmath : f32 -// CHECK: %[[SQ_SUM_2:.*]] = arith.addf %[[SQ_SUM_1]], %[[SQ_IMAG]] fastmath : f32 -// CHECK: %[[LOG_SQ_SUM:.*]] = math.log1p %[[SQ_SUM_2]] fastmath : f32 -// CHECK: %[[RESULT_REAL:.*]] = arith.mulf %[[LOG_SQ_SUM]], %[[ONE_HALF]] fastmath : f32 -// CHECK: %[[REAL_PLUS_ONE:.*]] = arith.addf %[[REAL]], %[[ONE]] fastmath : f32 +// CHECK: %[[REAL_PLUS_ONE:.*]] = arith.addf %[[REAL]], %[[ONE]] fastmath : f32 +// CHECK: %[[ABS_REAL_PLUS_ONE:.*]] = math.absf %[[REAL_PLUS_ONE]] fastmath : f32 +// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] fastmath : f32 +// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[CMPF:.*]] = arith.cmpf ogt, %[[REAL_PLUS_ONE]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[MAX_MINUS_ONE:.*]] = arith.subf %[[MAX]], %[[ONE]] fastmath : f32 +// CHECK: %[[SELECT:.*]] = arith.select %[[CMPF]], %0, %[[MAX_MINUS_ONE]] : f32 +// CHECK: %[[MIN_MAX_RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 +// CHECK: %[[LOG_1:.*]] = math.log1p %[[SELECT]] fastmath : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[MIN_MAX_RATIO]], %[[MIN_MAX_RATIO]] fastmath : f32 +// CHECK: %[[LOG_SQ:.*]] = math.log1p %[[RATIO_SQ]] fastmath : f32 +// CHECK: %[[HALF_LOG_SQ:.*]] = arith.mulf %cst, %[[LOG_SQ]] fastmath : f32 +// CHECK: %[[R:.*]] = arith.addf %[[HALF_LOG_SQ]], %[[LOG_1]] fastmath : f32 +// CHECK: %[[ISNAN:.*]] = arith.cmpf uno, %[[R]], %[[R]] fastmath : f32 +// CHECK: %[[RESULT_REAL:.*]] = arith.select %[[ISNAN]], %[[MIN]], %[[R]] : f32 // CHECK: %[[RESULT_IMAG:.*]] = math.atan2 %[[IMAG]], %[[REAL_PLUS_ONE]] fastmath : f32 // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex From db2fb3d96b217f0d2e139e7816c98d9f95974f25 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Thu, 11 Apr 2024 16:57:32 +0800 Subject: [PATCH 117/886] [X86] Define __APX_F__ when APX is enabled. (#88343) Relate gcc patch: https://gcc.gnu.org/pipermail/gcc-patches/2024-April/648789.html --- clang/lib/Basic/Targets/X86.cpp | 3 +++ clang/test/Preprocessor/x86_target_features.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 1966af17904d6..bf1767c87fe1c 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -954,6 +954,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__CCMP__"); if (HasCF) Builder.defineMacro("__CF__"); + // Condition here is aligned with the feature set of mapxf in Options.td + if (HasEGPR && HasPush2Pop2 && HasPPX && HasNDD) + Builder.defineMacro("__APX_F__"); // Each case falls through to the previous one here. switch (SSELevel) { diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c index a1882043910f7..5602c59158fe5 100644 --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -803,7 +803,8 @@ // RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-features=ndd -x c -E -dM -o - %s | FileCheck --check-prefix=NDD %s // RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-features=ccmp -x c -E -dM -o - %s | FileCheck --check-prefix=CCMP %s // RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-features=cf -x c -E -dM -o - %s | FileCheck --check-prefix=CF %s -// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapxf -x c -E -dM -o - %s | FileCheck --check-prefixes=EGPR,PUSH2POP2,PPX,NDD %s +// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapxf -x c -E -dM -o - %s | FileCheck --check-prefixes=EGPR,PUSH2POP2,PPX,NDD,APXF %s +// APXF: #define __APX_F__ 1 // CCMP: #define __CCMP__ 1 // CF: #define __CF__ 1 // EGPR: #define __EGPR__ 1 From 32b95a37083d1fee1a638e292be0aac9a98792fd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 11 Apr 2024 10:05:53 +0100 Subject: [PATCH 118/886] [VectorCombine][X86] Extend shuffle(bitcast(x),bitcast(y)) test coverage As discussed on #87510 the intention is to fold shuffle(bitcast(x),bitcast(y)) -> bitcast(shuffle(x,y)), but it must not interfere with existing bitcast(shuffle(bitcast(x),bitcast(y))) folds. --- .../VectorCombine/X86/shuffle-of-casts.ll | 67 ++++++++++++++++--- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll index 2031c2d04c601..60c6ff97c58b5 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll @@ -165,6 +165,51 @@ define <8 x double> @interleave_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> % ret <8 x double> %r } +; TODO - bitcasts (same element count) + +define <8 x float> @concat_bitcast_v4i32_v8f32(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @concat_bitcast_v4i32_v8f32( +; CHECK-NEXT: [[X0:%.*]] = bitcast <4 x i32> [[A0:%.*]] to <4 x float> +; CHECK-NEXT: [[X1:%.*]] = bitcast <4 x i32> [[A1:%.*]] to <4 x float> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X0]], <4 x float> [[X1]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[R]] +; + %x0 = bitcast <4 x i32> %a0 to <4 x float> + %x1 = bitcast <4 x i32> %a1 to <4 x float> + %r = shufflevector <4 x float> %x0, <4 x float> %x1, <8 x i32> + ret <8 x float> %r +} + +; TODO - bitcasts (lower element count) + +define <4 x double> @concat_bitcast_v8i16_v4f64(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: @concat_bitcast_v8i16_v4f64( +; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i16> [[A0:%.*]] to <2 x double> +; CHECK-NEXT: [[X1:%.*]] = bitcast <8 x i16> [[A1:%.*]] to <2 x double> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x double> [[X0]], <2 x double> [[X1]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[R]] +; + %x0 = bitcast <8 x i16> %a0 to <2 x double> + %x1 = bitcast <8 x i16> %a1 to <2 x double> + %r = shufflevector <2 x double> %x0, <2 x double> %x1, <4 x i32> + ret <4 x double> %r +} + +; TODO - bitcasts (higher element count) + +define <16 x i16> @concat_bitcast_v4i32_v16i16(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @concat_bitcast_v4i32_v16i16( +; CHECK-NEXT: [[X0:%.*]] = bitcast <4 x i32> [[A0:%.*]] to <8 x i16> +; CHECK-NEXT: [[X1:%.*]] = bitcast <4 x i32> [[A1:%.*]] to <8 x i16> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[X0]], <8 x i16> [[X1]], <16 x i32> +; CHECK-NEXT: ret <16 x i16> [[R]] +; + %x0 = bitcast <4 x i32> %a0 to <8 x i16> + %x1 = bitcast <4 x i32> %a1 to <8 x i16> + %r = shufflevector <8 x i16> %x0, <8 x i16> %x1, <16 x i32> + ret <16 x i16> %r +} + ; negative - multiuse define <8 x i16> @concat_trunc_v4i32_v8i16_multiuse(<4 x i32> %a0, <4 x i32> %a1, ptr %a2) { @@ -182,19 +227,19 @@ define <8 x i16> @concat_trunc_v4i32_v8i16_multiuse(<4 x i32> %a0, <4 x i32> %a1 ret <8 x i16> %r } -; negative - bitcasts +; negative - bitcasts (unscalable higher element count) -define <8 x float> @concat_bitcast_v4i32_v8f32(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: @concat_bitcast_v4i32_v8f32( -; CHECK-NEXT: [[X0:%.*]] = bitcast <4 x i32> [[A0:%.*]] to <4 x float> -; CHECK-NEXT: [[X1:%.*]] = bitcast <4 x i32> [[A1:%.*]] to <4 x float> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X0]], <4 x float> [[X1]], <8 x i32> -; CHECK-NEXT: ret <8 x float> [[R]] +define <16 x i16> @revpair_bitcast_v4i32_v16i16(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @revpair_bitcast_v4i32_v16i16( +; CHECK-NEXT: [[X0:%.*]] = bitcast <4 x i32> [[A0:%.*]] to <8 x i16> +; CHECK-NEXT: [[X1:%.*]] = bitcast <4 x i32> [[A1:%.*]] to <8 x i16> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[X0]], <8 x i16> [[X1]], <16 x i32> +; CHECK-NEXT: ret <16 x i16> [[R]] ; - %x0 = bitcast <4 x i32> %a0 to <4 x float> - %x1 = bitcast <4 x i32> %a1 to <4 x float> - %r = shufflevector <4 x float> %x0, <4 x float> %x1, <8 x i32> - ret <8 x float> %r + %x0 = bitcast <4 x i32> %a0 to <8 x i16> + %x1 = bitcast <4 x i32> %a1 to <8 x i16> + %r = shufflevector <8 x i16> %x0, <8 x i16> %x1, <16 x i32> + ret <16 x i16> %r } ; negative - src type mismatch From 478c42004c2bd4c91a01c47450eca6cdb6b0982d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 11 Apr 2024 10:25:56 +0100 Subject: [PATCH 119/886] [VPlan] Update recipe ::clone definitions to use cloned tys (NFC). Update definitions on ::clone in recipe sub-types to use the sub-type as return type. This avoids typecasts down to the cloned type in some cases. --- llvm/lib/Transforms/Vectorize/VPlan.h | 56 ++++++++++++++------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 5dc905a3c407b..d86a81d4fb4c7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -888,6 +888,8 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { return R && classof(R); } + virtual VPSingleDefRecipe *clone() override = 0; + /// Returns the underlying instruction. Instruction *getUnderlyingInstr() { return cast(getUnderlyingValue()); @@ -1248,7 +1250,7 @@ class VPInstruction : public VPRecipeWithIRFlags { VP_CLASSOF_IMPL(VPDef::VPInstructionSC) - VPRecipeBase *clone() override { + VPInstruction *clone() override { SmallVector Operands(operands()); auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name); New->transferFlags(*this); @@ -1335,7 +1337,7 @@ class VPWidenRecipe : public VPRecipeWithIRFlags { ~VPWidenRecipe() override = default; - VPRecipeBase *clone() override { + VPWidenRecipe *clone() override { auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands()); R->transferFlags(*this); return R; @@ -1380,7 +1382,7 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags { ~VPWidenCastRecipe() override = default; - VPRecipeBase *clone() override { + VPWidenCastRecipe *clone() override { if (auto *UV = getUnderlyingValue()) return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, *cast(UV)); @@ -1420,7 +1422,7 @@ class VPScalarCastRecipe : public VPSingleDefRecipe { ~VPScalarCastRecipe() override = default; - VPRecipeBase *clone() override { + VPScalarCastRecipe *clone() override { return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy); } @@ -1465,7 +1467,7 @@ class VPWidenCallRecipe : public VPSingleDefRecipe { ~VPWidenCallRecipe() override = default; - VPRecipeBase *clone() override { + VPWidenCallRecipe *clone() override { return new VPWidenCallRecipe(*cast(getUnderlyingInstr()), operands(), VectorIntrinsicID, getDebugLoc(), Variant); @@ -1492,7 +1494,7 @@ struct VPWidenSelectRecipe : public VPSingleDefRecipe { ~VPWidenSelectRecipe() override = default; - VPRecipeBase *clone() override { + VPWidenSelectRecipe *clone() override { return new VPWidenSelectRecipe(*cast(getUnderlyingInstr()), operands()); } @@ -1540,7 +1542,7 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags { ~VPWidenGEPRecipe() override = default; - VPRecipeBase *clone() override { + VPWidenGEPRecipe *clone() override { return new VPWidenGEPRecipe(cast(getUnderlyingInstr()), operands()); } @@ -1581,7 +1583,7 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags { return true; } - VPRecipeBase *clone() override { + VPVectorPointerRecipe *clone() override { return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse, isInBounds(), getDebugLoc()); } @@ -1696,7 +1698,7 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe { ~VPWidenIntOrFpInductionRecipe() override = default; - VPRecipeBase *clone() override { + VPWidenIntOrFpInductionRecipe *clone() override { return new VPWidenIntOrFpInductionRecipe(IV, getStartValue(), getStepValue(), IndDesc, Trunc); } @@ -1771,7 +1773,7 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe { ~VPWidenPointerInductionRecipe() override = default; - VPRecipeBase *clone() override { + VPWidenPointerInductionRecipe *clone() override { return new VPWidenPointerInductionRecipe( cast(getUnderlyingInstr()), getOperand(0), getOperand(1), IndDesc, IsScalarAfterVectorization); @@ -1810,7 +1812,7 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe { addOperand(Start); } - VPRecipeBase *clone() override { + VPWidenPHIRecipe *clone() override { llvm_unreachable("cloning not implemented yet"); } @@ -1853,7 +1855,7 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe { return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC; } - VPRecipeBase *clone() override { + VPFirstOrderRecurrencePHIRecipe *clone() override { return new VPFirstOrderRecurrencePHIRecipe( cast(getUnderlyingInstr()), *getOperand(0)); } @@ -1893,7 +1895,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe { ~VPReductionPHIRecipe() override = default; - VPRecipeBase *clone() override { + VPReductionPHIRecipe *clone() override { auto *R = new VPReductionPHIRecipe(cast(getUnderlyingInstr()), RdxDesc, *getOperand(0), IsInLoop, IsOrdered); @@ -1940,7 +1942,7 @@ class VPBlendRecipe : public VPSingleDefRecipe { "Expected an odd number of operands"); } - VPRecipeBase *clone() override { + VPBlendRecipe *clone() override { SmallVector Ops(operands()); return new VPBlendRecipe(cast(getUnderlyingValue()), Ops); } @@ -2019,7 +2021,7 @@ class VPInterleaveRecipe : public VPRecipeBase { } ~VPInterleaveRecipe() override = default; - VPRecipeBase *clone() override { + VPInterleaveRecipe *clone() override { return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(), NeedsMaskForGaps); } @@ -2093,7 +2095,7 @@ class VPReductionRecipe : public VPSingleDefRecipe { ~VPReductionRecipe() override = default; - VPRecipeBase *clone() override { + VPReductionRecipe *clone() override { return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(), getVecOp(), getCondOp(), IsOrdered); } @@ -2142,7 +2144,7 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags { ~VPReplicateRecipe() override = default; - VPRecipeBase *clone() override { + VPReplicateRecipe *clone() override { auto *Copy = new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform, isPredicated() ? getMask() : nullptr); @@ -2204,7 +2206,7 @@ class VPBranchOnMaskRecipe : public VPRecipeBase { addOperand(BlockInMask); } - VPRecipeBase *clone() override { + VPBranchOnMaskRecipe *clone() override { return new VPBranchOnMaskRecipe(getOperand(0)); } @@ -2255,7 +2257,7 @@ class VPPredInstPHIRecipe : public VPSingleDefRecipe { : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {} ~VPPredInstPHIRecipe() override = default; - VPRecipeBase *clone() override { + VPPredInstPHIRecipe *clone() override { return new VPPredInstPHIRecipe(getOperand(0)); } @@ -2323,7 +2325,7 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase { setMask(Mask); } - VPRecipeBase *clone() override { + VPWidenMemoryInstructionRecipe *clone() override { if (isStore()) return new VPWidenMemoryInstructionRecipe( cast(Ingredient), getAddr(), getStoredValue(), getMask(), @@ -2399,7 +2401,9 @@ class VPExpandSCEVRecipe : public VPSingleDefRecipe { ~VPExpandSCEVRecipe() override = default; - VPRecipeBase *clone() override { return new VPExpandSCEVRecipe(Expr, SE); } + VPExpandSCEVRecipe *clone() override { + return new VPExpandSCEVRecipe(Expr, SE); + } VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC) @@ -2426,7 +2430,7 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { ~VPCanonicalIVPHIRecipe() override = default; - VPRecipeBase *clone() override { + VPCanonicalIVPHIRecipe *clone() override { auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc()); R->addOperand(getBackedgeValue()); return R; @@ -2484,7 +2488,7 @@ class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe { ~VPActiveLaneMaskPHIRecipe() override = default; - VPRecipeBase *clone() override { + VPActiveLaneMaskPHIRecipe *clone() override { return new VPActiveLaneMaskPHIRecipe(getOperand(0), getDebugLoc()); } @@ -2551,7 +2555,7 @@ class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe { ~VPWidenCanonicalIVRecipe() override = default; - VPRecipeBase *clone() override { + VPWidenCanonicalIVRecipe *clone() override { return new VPWidenCanonicalIVRecipe( cast(getOperand(0))); } @@ -2602,7 +2606,7 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { ~VPDerivedIVRecipe() override = default; - VPRecipeBase *clone() override { + VPDerivedIVRecipe *clone() override { return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1), getStepValue()); } @@ -2656,7 +2660,7 @@ class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags { ~VPScalarIVStepsRecipe() override = default; - VPRecipeBase *clone() override { + VPScalarIVStepsRecipe *clone() override { return new VPScalarIVStepsRecipe( getOperand(0), getOperand(1), InductionOpcode, hasFastMathFlags() ? getFastMathFlags() : FastMathFlags()); From 462e1023838703f1d3e763869afdd72ec5342a33 Mon Sep 17 00:00:00 2001 From: Poseydon42 Date: Thu, 11 Apr 2024 10:40:52 +0100 Subject: [PATCH 120/886] [InstCombine] Fold (X / C) < X and (X >> C) < X into X > 0 (#85555) Proofs: https://alive2.llvm.org/ce/z/52droC This resolves #85313. --- .../InstCombine/InstCombineCompares.cpp | 34 +++++ .../InstCombine/icmp-div-constant.ll | 141 ++++++++++++++++++ 2 files changed, 175 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9ff1e3aa5502e..7292bb62702aa 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -7163,6 +7163,40 @@ Instruction *InstCombinerImpl::foldICmpCommutative(ICmpInst::Predicate Pred, if (Value *V = foldICmpWithLowBitMaskedVal(Pred, Op0, Op1, Q, *this)) return replaceInstUsesWith(CxtI, V); + // Folding (X / Y) pred X => X swap(pred) 0 for constant Y other than 0 or 1 + { + const APInt *Divisor; + if (match(Op0, m_UDiv(m_Specific(Op1), m_APInt(Divisor))) && + Divisor->ugt(1)) { + return new ICmpInst(ICmpInst::getSwappedPredicate(Pred), Op1, + Constant::getNullValue(Op1->getType())); + } + + if (!ICmpInst::isUnsigned(Pred) && + match(Op0, m_SDiv(m_Specific(Op1), m_APInt(Divisor))) && + Divisor->ugt(1)) { + return new ICmpInst(ICmpInst::getSwappedPredicate(Pred), Op1, + Constant::getNullValue(Op1->getType())); + } + } + + // Another case of this fold is (X >> Y) pred X => X swap(pred) 0 if Y != 0 + { + const APInt *Shift; + if (match(Op0, m_LShr(m_Specific(Op1), m_APInt(Shift))) && + !Shift->isZero()) { + return new ICmpInst(ICmpInst::getSwappedPredicate(Pred), Op1, + Constant::getNullValue(Op1->getType())); + } + + if ((Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SGE) && + match(Op0, m_AShr(m_Specific(Op1), m_APInt(Shift))) && + !Shift->isZero()) { + return new ICmpInst(ICmpInst::getSwappedPredicate(Pred), Op1, + Constant::getNullValue(Op1->getType())); + } + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll index 8dcb96284685f..b047715432d77 100644 --- a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll +++ b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll @@ -375,3 +375,144 @@ define i1 @sdiv_eq_smin_use(i32 %x, i32 %y) { %r = icmp eq i32 %d, -2147483648 ret i1 %r } + +; Fold (X / C) cmp X into X ~cmp 0 (~cmp is the inverse predicate of cmp), for some C != 1 +; Alternative form of this fold is when division is replaced with logic right shift + +define i1 @sdiv_x_by_const_cmp_x(i32 %x) { +; CHECK-LABEL: @sdiv_x_by_const_cmp_x( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %v = sdiv i32 %x, 13 + %r = icmp eq i32 %v, %x + ret i1 %r +} + +define i1 @udiv_x_by_const_cmp_x(i32 %x) { +; CHECK-LABEL: @udiv_x_by_const_cmp_x( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %1 = udiv i32 %x, 123 + %2 = icmp slt i32 %1, %x + ret i1 %2 +} + +; Same as above but with right shift instead of division (C != 0) + +define i1 @lshr_x_by_const_cmp_x(i32 %x) { +; CHECK-LABEL: @lshr_x_by_const_cmp_x( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[TMP1]] +; + %v = lshr i32 %x, 1 + %r = icmp eq i32 %v, %x + ret i1 %r +} + +define <4 x i1> @lshr_by_const_cmp_sle_value(<4 x i32> %x) { +; CHECK-LABEL: @lshr_by_const_cmp_sle_value( +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[X:%.*]], +; CHECK-NEXT: ret <4 x i1> [[R]] +; + %v = lshr <4 x i32> %x, + %r = icmp sle <4 x i32> %v, %x + ret <4 x i1> %r +} + +define i1 @lshr_by_const_cmp_sge_value(i32 %x) { +; CHECK-LABEL: @lshr_by_const_cmp_sge_value( +; CHECK-NEXT: [[R:%.*]] = icmp slt i32 [[X:%.*]], 1 +; CHECK-NEXT: ret i1 [[R]] +; + %v = lshr i32 %x, 3 + %r = icmp sge i32 %v, %x + ret i1 %r +} + +define i1 @ashr_x_by_const_cmp_sge_x(i32 %x) { +; CHECK-LABEL: @ashr_x_by_const_cmp_sge_x( +; CHECK-NEXT: [[R:%.*]] = icmp slt i32 [[X:%.*]], 1 +; CHECK-NEXT: ret i1 [[R]] +; + %v = ashr i32 %x, 5 + %r = icmp sge i32 %v, %x + ret i1 %r +} + +; Negative test - constant is 1 + +define <2 x i1> @udiv_x_by_const_cmp_eq_value_neg(<2 x i32> %x) { +; CHECK-LABEL: @udiv_x_by_const_cmp_eq_value_neg( +; CHECK-NEXT: [[V:%.*]] = udiv <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i32> [[V]], [[X]] +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %v = udiv <2 x i32> %x, + %r = icmp eq <2 x i32> %v, %x + ret <2 x i1> %r +} + +define <2 x i1> @sdiv_x_by_const_cmp_eq_value_neg(<2 x i32> %x) { +; CHECK-LABEL: @sdiv_x_by_const_cmp_eq_value_neg( +; CHECK-NEXT: [[V:%.*]] = sdiv <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i32> [[V]], [[X]] +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %v = sdiv <2 x i32> %x, + %r = icmp eq <2 x i32> %v, %x + ret <2 x i1> %r +} + +; Negative test - constant is 0 + +define <2 x i1> @lshr_x_by_const_cmp_slt_value_neg(<2 x i32> %x) { +; CHECK-LABEL: @lshr_x_by_const_cmp_slt_value_neg( +; CHECK-NEXT: [[V:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp slt <2 x i32> [[V]], [[X]] +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %v = lshr <2 x i32> %x, + %r = icmp slt <2 x i32> %v, %x + ret <2 x i1> %r +} + +; Negative test - unsigned predicate with sdiv + +define i1 @sdiv_x_by_const_cmp_ult_value_neg(i32 %x) { +; CHECK-LABEL: @sdiv_x_by_const_cmp_ult_value_neg( +; CHECK-NEXT: [[V:%.*]] = sdiv i32 [[X:%.*]], 3 +; CHECK-NEXT: [[R:%.*]] = icmp ult i32 [[V]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %v = sdiv i32 %x, 3 + %r = icmp ult i32 %v, %x + ret i1 %r +} + +; Negative case - one of the components of a vector is 1 + +define <4 x i1> @sdiv_x_by_const_cmp_sgt_value_neg(<4 x i32> %x) { +; CHECK-LABEL: @sdiv_x_by_const_cmp_sgt_value_neg( +; CHECK-NEXT: [[V:%.*]] = sdiv <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[V]], [[X]] +; CHECK-NEXT: ret <4 x i1> [[R]] +; + %v = sdiv <4 x i32> %x, + %r = icmp sgt <4 x i32> %v, %x + ret <4 x i1> %r +} + +; Negative case - ashr only allows sge/slt predicates + +define i1 @ashr_x_by_const_cmp_sle_value_neg(i32 %x) { +; CHECK-LABEL: @ashr_x_by_const_cmp_sle_value_neg( +; CHECK-NEXT: [[V:%.*]] = ashr i32 [[X:%.*]], 3 +; CHECK-NEXT: [[R:%.*]] = icmp sle i32 [[V]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %v = ashr i32 %x, 3 + %r = icmp sle i32 %v, %x + ret i1 %r +} From 82ae646eb49cfd762db7db0a74b130970fe45d97 Mon Sep 17 00:00:00 2001 From: Thomas Preud'homme Date: Thu, 11 Apr 2024 10:41:07 +0100 Subject: [PATCH 121/886] [llvm-mca] Remove spurious include_directories() (#88277) llvm-mca does not have an include directory so this commit removes the spurious include_directories directive. --- llvm/tools/llvm-mca/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/tools/llvm-mca/CMakeLists.txt b/llvm/tools/llvm-mca/CMakeLists.txt index 878a05c51cfb4..4ef8b9afa12a7 100644 --- a/llvm/tools/llvm-mca/CMakeLists.txt +++ b/llvm/tools/llvm-mca/CMakeLists.txt @@ -1,5 +1,3 @@ -include_directories(include) - set(LLVM_LINK_COMPONENTS AllTargetsAsmParsers AllTargetsMCAs # CustomBehaviour and InstrPostProcess From e7bc53726459bba3a48b1f529f1fd9472ad9051c Mon Sep 17 00:00:00 2001 From: Andreas Jonson Date: Thu, 11 Apr 2024 11:42:59 +0200 Subject: [PATCH 122/886] [IPSCCP] Add range attribute handling (#86747) Support the new range attribute to infer ConstantRanges in IPSCCP. --- .../llvm/Transforms/Utils/SCCPSolver.h | 4 + llvm/lib/Transforms/IPO/SCCP.cpp | 3 +- llvm/lib/Transforms/Utils/SCCPSolver.cpp | 40 ++++- llvm/test/Transforms/SCCP/range-attribute.ll | 154 ++++++++++++++++++ 4 files changed, 197 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/SCCP/range-attribute.ll diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h index 1a95f80812aab..9f7ccd4a8a32c 100644 --- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h +++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h @@ -151,6 +151,10 @@ class SCCPSolver { /// works with both scalars and structs. void markOverdefined(Value *V); + /// trackValueOfArgument - Mark the specified argument overdefined unless it + /// have range attribute. This works with both scalars and structs. + void trackValueOfArgument(Argument *V); + // isStructLatticeConstant - Return true if all the lattice values // corresponding to elements of the structure are constants, // false otherwise. diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp index b1f9b827dcbaf..f8920541e6fd6 100644 --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -144,9 +144,8 @@ static bool runIPSCCP( // Assume the function is called. Solver.markBlockExecutable(&F.front()); - // Assume nothing about the incoming arguments. for (Argument &AI : F.args()) - Solver.markOverdefined(&AI); + Solver.trackValueOfArgument(&AI); } // Determine if we can track any of the module's global variables. If so, add diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index 38fc7763c5b20..b82ed25a6d0cf 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -428,6 +428,13 @@ class SCCPInstVisitor : public InstVisitor { return markConstant(ValueState[V], V, C); } + /// markConstantRange - Mark the object as constant range with \p CR. If the + /// object is not a constant range with the range \p CR, add it to the + /// instruction work list so that the users of the instruction are updated + /// later. + bool markConstantRange(ValueLatticeElement &IV, Value *V, + const ConstantRange &CR); + // markOverdefined - Make a value be marked as "overdefined". If the // value is not already overdefined, add it to the overdefined instruction // work list so that the users of the instruction are updated later. @@ -788,6 +795,17 @@ class SCCPInstVisitor : public InstVisitor { markOverdefined(ValueState[V], V); } + void trackValueOfArgument(Argument *A) { + if (A->getType()->isIntegerTy()) { + if (std::optional Range = A->getRange()) { + markConstantRange(ValueState[A], A, *Range); + return; + } + } + // Assume nothing about the incoming arguments without range. + markOverdefined(A); + } + bool isStructLatticeConstant(Function *F, StructType *STy); Constant *getConstant(const ValueLatticeElement &LV, Type *Ty) const; @@ -873,6 +891,15 @@ bool SCCPInstVisitor::markConstant(ValueLatticeElement &IV, Value *V, return true; } +bool SCCPInstVisitor::markConstantRange(ValueLatticeElement &IV, Value *V, + const ConstantRange &CR) { + if (!IV.markConstantRange(CR)) + return false; + LLVM_DEBUG(dbgs() << "markConstantRange: " << CR << ": " << *V << '\n'); + pushToWorkList(IV, V); + return true; +} + bool SCCPInstVisitor::markOverdefined(ValueLatticeElement &IV, Value *V) { if (!IV.markOverdefined()) return false; @@ -1581,10 +1608,15 @@ void SCCPInstVisitor::visitStoreInst(StoreInst &SI) { } static ValueLatticeElement getValueFromMetadata(const Instruction *I) { - if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) - if (I->getType()->isIntegerTy()) + if (I->getType()->isIntegerTy()) { + if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) return ValueLatticeElement::getRange( getConstantRangeFromMetadata(*Ranges)); + + if (const auto *CB = dyn_cast(I)) + if (std::optional Range = CB->getRange()) + return ValueLatticeElement::getRange(*Range); + } if (I->hasMetadata(LLVMContext::MD_nonnull)) return ValueLatticeElement::getNot( ConstantPointerNull::get(cast(I->getType()))); @@ -2090,6 +2122,10 @@ const SmallPtrSet SCCPSolver::getMRVFunctionsTracked() { void SCCPSolver::markOverdefined(Value *V) { Visitor->markOverdefined(V); } +void SCCPSolver::trackValueOfArgument(Argument *V) { + Visitor->trackValueOfArgument(V); +} + bool SCCPSolver::isStructLatticeConstant(Function *F, StructType *STy) { return Visitor->isStructLatticeConstant(F, STy); } diff --git a/llvm/test/Transforms/SCCP/range-attribute.ll b/llvm/test/Transforms/SCCP/range-attribute.ll new file mode 100644 index 0000000000000..ae66af91bb8e3 --- /dev/null +++ b/llvm/test/Transforms/SCCP/range-attribute.ll @@ -0,0 +1,154 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=ipsccp -S | FileCheck %s + +declare void @use(i1) +declare i32 @get_i32() + +define void @range_attribute(i32 range(i32 0, 10) %v) { +; CHECK-LABEL: @range_attribute( +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: [[C2:%.*]] = icmp ult i32 [[V:%.*]], 9 +; CHECK-NEXT: call void @use(i1 [[C2]]) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: [[C4:%.*]] = icmp ugt i32 [[V]], 8 +; CHECK-NEXT: call void @use(i1 [[C4]]) +; CHECK-NEXT: ret void +; + %c1 = icmp ult i32 %v, 10 + call void @use(i1 %c1) + %c2 = icmp ult i32 %v, 9 + call void @use(i1 %c2) + %c3 = icmp ugt i32 %v, 9 + call void @use(i1 %c3) + %c4 = icmp ugt i32 %v, 8 + call void @use(i1 %c4) + ret void +} + +define i32 @range_attribute_single(i32 range(i32 0, 1) %v) { +; CHECK-LABEL: @range_attribute_single( +; CHECK-NEXT: ret i32 0 +; + ret i32 %v +} + +define void @call_range_attribute() { +; CHECK-LABEL: @call_range_attribute( +; CHECK-NEXT: [[V:%.*]] = call range(i32 0, 10) i32 @get_i32() +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: [[C2:%.*]] = icmp ult i32 [[V]], 9 +; CHECK-NEXT: call void @use(i1 [[C2]]) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: [[C4:%.*]] = icmp ugt i32 [[V]], 8 +; CHECK-NEXT: call void @use(i1 [[C4]]) +; CHECK-NEXT: ret void +; + %v = call range(i32 0, 10) i32 @get_i32() + %c1 = icmp ult i32 %v, 10 + call void @use(i1 %c1) + %c2 = icmp ult i32 %v, 9 + call void @use(i1 %c2) + %c3 = icmp ugt i32 %v, 9 + call void @use(i1 %c3) + %c4 = icmp ugt i32 %v, 8 + call void @use(i1 %c4) + ret void +} + + +declare range(i32 0, 10) i32 @get_i32_in_range() + +define void @call_range_result() { +; CHECK-LABEL: @call_range_result( +; CHECK-NEXT: [[V:%.*]] = call i32 @get_i32_in_range() +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: [[C2:%.*]] = icmp ult i32 [[V]], 9 +; CHECK-NEXT: call void @use(i1 [[C2]]) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: [[C4:%.*]] = icmp ugt i32 [[V]], 8 +; CHECK-NEXT: call void @use(i1 [[C4]]) +; CHECK-NEXT: ret void +; + %v = call i32 @get_i32_in_range() + %c1 = icmp ult i32 %v, 10 + call void @use(i1 %c1) + %c2 = icmp ult i32 %v, 9 + call void @use(i1 %c2) + %c3 = icmp ugt i32 %v, 9 + call void @use(i1 %c3) + %c4 = icmp ugt i32 %v, 8 + call void @use(i1 %c4) + ret void +} + +define internal i1 @ip_cmp_range_attribute(i32 %v) { +; CHECK-LABEL: @ip_cmp_range_attribute( +; CHECK-NEXT: ret i1 undef +; + %c = icmp ult i32 %v, 10 + ret i1 %c +} + +define i1 @ip_range_attribute(i32 range(i32 0, 10) %v) { +; CHECK-LABEL: @ip_range_attribute( +; CHECK-NEXT: [[C:%.*]] = call i1 @ip_cmp_range_attribute(i32 [[V:%.*]]) +; CHECK-NEXT: ret i1 true +; + %c = call i1 @ip_cmp_range_attribute(i32 %v) + ret i1 %c +} + +define internal i1 @ip_cmp_range_call(i32 %v) { +; CHECK-LABEL: @ip_cmp_range_call( +; CHECK-NEXT: ret i1 undef +; + %c = icmp ult i32 %v, 10 + ret i1 %c +} + +define i1 @ip_range_call() { +; CHECK-LABEL: @ip_range_call( +; CHECK-NEXT: [[V:%.*]] = call range(i32 0, 10) i32 @get_i32() +; CHECK-NEXT: [[C:%.*]] = call i1 @ip_cmp_range_call(i32 [[V]]) +; CHECK-NEXT: ret i1 true +; + %v = call range(i32 0, 10) i32 @get_i32() + %c = call i1 @ip_cmp_range_call(i32 %v) + ret i1 %c +} + +define internal i1 @ip_cmp_range_result(i32 %v) { +; CHECK-LABEL: @ip_cmp_range_result( +; CHECK-NEXT: ret i1 undef +; + %c = icmp ult i32 %v, 10 + ret i1 %c +} + +define i1 @ip_range_result() { +; CHECK-LABEL: @ip_range_result( +; CHECK-NEXT: [[V:%.*]] = call range(i32 0, 10) i32 @get_i32() +; CHECK-NEXT: [[C:%.*]] = call i1 @ip_cmp_range_result(i32 [[V]]) +; CHECK-NEXT: ret i1 true +; + %v = call range(i32 0, 10) i32 @get_i32() + %c = call i1 @ip_cmp_range_result(i32 %v) + ret i1 %c +} + +define internal i1 @ip_cmp_with_range_attribute(i32 range(i32 0, 10) %v) { +; CHECK-LABEL: @ip_cmp_with_range_attribute( +; CHECK-NEXT: ret i1 undef +; + %c = icmp eq i32 %v, 5 + ret i1 %c +} + +define i1 @ip_range_attribute_constant() { +; CHECK-LABEL: @ip_range_attribute_constant( +; CHECK-NEXT: [[C:%.*]] = call i1 @ip_cmp_with_range_attribute(i32 5) +; CHECK-NEXT: ret i1 true +; + %c = call i1 @ip_cmp_with_range_attribute(i32 5) + ret i1 %c +} From eef63d3c92766c6f8e78eefb9bb37ae01fbedbfc Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Thu, 11 Apr 2024 10:43:49 +0100 Subject: [PATCH 123/886] [mlir][OpenMP] add missing load for reduction cleanup region (#88289) I missed this before. For by-ref reductions, the private reduction variable is a pointer to the pointer to the variable. So an extra load is required to get the right value. See the "red.private.value.n" loads in the reduction combiner region for reference. --- .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 13 +++++++++++-- .../LLVMIR/openmp-parallel-reduction-cleanup.mlir | 6 ++++-- .../LLVMIR/openmp-wsloop-reduction-cleanup.mlir | 6 ++++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index a59677c02fc39..300fc8ba56fc5 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -889,8 +889,17 @@ static LogicalResult inlineReductionCleanup( // map the argument to the cleanup region Block &entry = cleanupRegion.front(); - moduleTranslation.mapValue(entry.getArgument(0), - privateReductionVariables[i]); + + llvm::Instruction *potentialTerminator = + builder.GetInsertBlock()->empty() ? nullptr + : &builder.GetInsertBlock()->back(); + if (potentialTerminator && potentialTerminator->isTerminator()) + builder.SetInsertPoint(potentialTerminator); + llvm::Value *reductionVar = builder.CreateLoad( + moduleTranslation.convertType(entry.getArgument(0).getType()), + privateReductionVariables[i]); + + moduleTranslation.mapValue(entry.getArgument(0), reductionVar); if (failed(inlineConvertOmpRegions(cleanupRegion, "omp.reduction.cleanup", builder, moduleTranslation))) diff --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir index 9ae4c4ad392b1..b7f71f438e56b 100644 --- a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir +++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir @@ -86,8 +86,10 @@ // Cleanup region: // CHECK: [[OMP_FINALIZE]]: -// CHECK: call void @free(ptr %[[PRIV_PTR_I]]) -// CHECK: call void @free(ptr %[[PRIV_PTR_J]]) +// CHECK: %[[PRIV_I:.+]] = load ptr, ptr %[[PRIV_PTR_I]], align 8 +// CHECK: call void @free(ptr %[[PRIV_I]]) +// CHECK: %[[PRIV_J:.+]] = load ptr, ptr %[[PRIV_PTR_J]], align 8 +// CHECK: call void @free(ptr %[[PRIV_J]]) // Reduction function. // CHECK: define internal void @[[REDFUNC]] diff --git a/mlir/test/Target/LLVMIR/openmp-wsloop-reduction-cleanup.mlir b/mlir/test/Target/LLVMIR/openmp-wsloop-reduction-cleanup.mlir index a1e17afa53e21..3842522934e48 100644 --- a/mlir/test/Target/LLVMIR/openmp-wsloop-reduction-cleanup.mlir +++ b/mlir/test/Target/LLVMIR/openmp-wsloop-reduction-cleanup.mlir @@ -63,8 +63,10 @@ // Weirdly the finalization block is generated before the reduction blocks: // CHECK: [[FINALIZE:.+]]: // CHECK: call void @__kmpc_barrier -// CHECK: call void @free(ptr %[[PRIV_PTR_I]]) -// CHECK: call void @free(ptr %[[PRIV_PTR_J]]) +// CHECK: %[[PRIV_I:.+]] = load ptr, ptr %[[PRIV_PTR_I]], align 8 +// CHECK: call void @free(ptr %[[PRIV_I]]) +// CHECK: %[[PRIV_J:.+]] = load ptr, ptr %[[PRIV_PTR_J]], align 8 +// CHECK: call void @free(ptr %[[PRIV_J]]) // CHECK: ret void // Non-atomic reduction: From 6f068b9cf1ac09945c096269f0c6c276d2ec95c4 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Thu, 11 Apr 2024 10:44:09 +0100 Subject: [PATCH 124/886] [flang][OpenMP] Allocate array reduction variables on the heap (#87773) Following up on a review comment: https://github.com/llvm/llvm-project/pull/84958#discussion_r1527627848 Reductions might be inlined inside of a loop so stack allocations are not safe. Normally flang allocates arrays on the stack. Allocatable arrays have a different type: fir.box>> instead of fir.box>. This patch will allocate all arrays on the heap. Reductions on allocatable arrays still aren't supported (but I will get to this soon). --- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 78 +++++++++++++++++-- .../Lower/OpenMP/parallel-reduction-array.f90 | 20 ++++- .../OpenMP/parallel-reduction-array2.f90 | 20 ++++- .../test/Lower/OpenMP/parallel-reduction3.f90 | 31 ++++---- .../wsloop-reduction-array-assumed-shape.f90 | 17 +++- .../Lower/OpenMP/wsloop-reduction-array.f90 | 19 ++++- .../Lower/OpenMP/wsloop-reduction-array2.f90 | 21 ++++- 7 files changed, 171 insertions(+), 35 deletions(-) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index 0453c01522779..918edf27baf66 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -20,6 +20,7 @@ #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/Support/FatalError.h" #include "flang/Parser/tools.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "llvm/Support/CommandLine.h" @@ -391,8 +392,60 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, TODO(loc, "OpenMP genCombiner for unsupported reduction variable type"); } +static void +createReductionCleanupRegion(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::omp::DeclareReductionOp &reductionDecl) { + mlir::Type redTy = reductionDecl.getType(); + + mlir::Region &cleanupRegion = reductionDecl.getCleanupRegion(); + assert(cleanupRegion.empty()); + mlir::Block *block = + builder.createBlock(&cleanupRegion, cleanupRegion.end(), {redTy}, {loc}); + builder.setInsertionPointToEnd(block); + + auto typeError = [loc]() { + fir::emitFatalError(loc, + "Attempt to create an omp reduction cleanup region " + "for a type that wasn't allocated", + /*genCrashDiag=*/true); + }; + + mlir::Type valTy = fir::unwrapRefType(redTy); + if (auto boxTy = mlir::dyn_cast_or_null(valTy)) { + mlir::Type innerTy = fir::extractSequenceType(boxTy); + if (!mlir::isa(innerTy)) + typeError(); + + mlir::Value arg = block->getArgument(0); + arg = builder.loadIfRef(loc, arg); + assert(mlir::isa(arg.getType())); + + // Deallocate box + // The FIR type system doesn't nesecarrily know that this is a mutable box + // if we allocated the thread local array on the heap to avoid looped stack + // allocations. + mlir::Value addr = + hlfir::genVariableRawAddress(loc, builder, hlfir::Entity{arg}); + mlir::Value isAllocated = builder.genIsNotNullAddr(loc, addr); + fir::IfOp ifOp = + builder.create(loc, isAllocated, /*withElseRegion=*/false); + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + + mlir::Value cast = builder.createConvert( + loc, fir::HeapType::get(fir::dyn_cast_ptrEleTy(addr.getType())), addr); + builder.create(loc, cast); + + builder.setInsertionPointAfter(ifOp); + builder.create(loc); + return; + } + + typeError(); +} + static mlir::Value createReductionInitRegion(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::omp::DeclareReductionOp &reductionDecl, const ReductionProcessor::ReductionIdentifier redId, mlir::Type type, bool isByRef) { mlir::Type ty = fir::unwrapRefType(type); @@ -419,11 +472,24 @@ createReductionInitRegion(fir::FirOpBuilder &builder, mlir::Location loc, // Create the private copy from the initial fir.box: hlfir::Entity source = hlfir::Entity{builder.getBlock()->getArgument(0)}; - // TODO: if the whole reduction is nested inside of a loop, this alloca - // could lead to a stack overflow (the memory is only freed at the end of - // the stack frame). The reduction declare operation needs a deallocation - // region to undo the init region. - hlfir::Entity temp = createStackTempFromMold(loc, builder, source); + // Allocating on the heap in case the whole reduction is nested inside of a + // loop + // TODO: compare performance here to using allocas - this could be made to + // work by inserting stacksave/stackrestore around the reduction in + // openmpirbuilder + auto [temp, needsDealloc] = createTempFromMold(loc, builder, source); + // if needsDealloc isn't statically false, add cleanup region. TODO: always + // do this for allocatable boxes because they might have been re-allocated + // in the body of the loop/parallel region + std::optional cstNeedsDealloc = + fir::getIntIfConstant(needsDealloc); + assert(cstNeedsDealloc.has_value() && + "createTempFromMold decides this statically"); + if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) { + auto insPt = builder.saveInsertionPoint(); + createReductionCleanupRegion(builder, loc, reductionDecl); + builder.restoreInsertionPoint(insPt); + } // Put the temporary inside of a box: hlfir::Entity box = hlfir::genVariableBox(loc, builder, temp); @@ -462,7 +528,7 @@ mlir::omp::DeclareReductionOp ReductionProcessor::createDeclareReduction( builder.setInsertionPointToEnd(&decl.getInitializerRegion().back()); mlir::Value init = - createReductionInitRegion(builder, loc, redId, type, isByRef); + createReductionInitRegion(builder, loc, decl, redId, type, isByRef); builder.create(loc, init); builder.createBlock(&decl.getReductionRegion(), diff --git a/flang/test/Lower/OpenMP/parallel-reduction-array.f90 b/flang/test/Lower/OpenMP/parallel-reduction-array.f90 index 56dcabbb75c3a..26c9d4f085096 100644 --- a/flang/test/Lower/OpenMP/parallel-reduction-array.f90 +++ b/flang/test/Lower/OpenMP/parallel-reduction-array.f90 @@ -15,13 +15,15 @@ program reduce ! CHECK-LABEL: omp.declare_reduction @add_reduction_byref_box_3xi32 : !fir.ref>> init { ! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): -! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.array<3xi32> {bindc_name = ".tmp"} ! CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32 ! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref>> ! CHECK: %[[VAL_4:.*]] = arith.constant 3 : index ! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1> -! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +! CHECK: %[[VAL_1:.*]] = fir.allocmem !fir.array<3xi32> {bindc_name = ".tmp", uniq_name = ""} +! CHECK: %[[TRUE:.*]] = arith.constant true +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.heap>, +!fir.shape<1>) -> (!fir.heap>, !fir.heap>) +! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box> ! CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.box> ! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box> ! CHECK: fir.store %[[VAL_7]] to %[[VAL_8]] : !fir.ref>> @@ -43,6 +45,18 @@ program reduce ! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref ! CHECK: } ! CHECK: omp.yield(%[[VAL_0]] : !fir.ref>>) +! CHECK: } cleanup { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): +! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref>> +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>) -> !fir.ref> +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>) -> i64 +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64 +! CHECK: fir.if %[[VAL_5]] { +! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>) -> !fir.heap> +! CHECK: fir.freemem %[[VAL_6]] : !fir.heap> +! CHECK: } +! CHECK: omp.yield ! CHECK: } ! CHECK-LABEL: func.func @_QQmain() diff --git a/flang/test/Lower/OpenMP/parallel-reduction-array2.f90 b/flang/test/Lower/OpenMP/parallel-reduction-array2.f90 index 94bff410a2f0d..bed04401248be 100644 --- a/flang/test/Lower/OpenMP/parallel-reduction-array2.f90 +++ b/flang/test/Lower/OpenMP/parallel-reduction-array2.f90 @@ -15,13 +15,15 @@ program reduce ! CHECK-LABEL: omp.declare_reduction @add_reduction_byref_box_3xi32 : !fir.ref>> init { ! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): -! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.array<3xi32> {bindc_name = ".tmp"} ! CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32 ! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref>> ! CHECK: %[[VAL_4:.*]] = arith.constant 3 : index ! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1> -! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +! CHECK: %[[VAL_1:.*]] = fir.allocmem !fir.array<3xi32> +! CHECK: %[[TRUE:.*]] = arith.constant true +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.heap>, +!fir.shape<1>) -> (!fir.heap>, !fir.heap>) +! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box> ! CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.box> ! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box> ! CHECK: fir.store %[[VAL_7]] to %[[VAL_8]] : !fir.ref>> @@ -43,6 +45,18 @@ program reduce ! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref ! CHECK: } ! CHECK: omp.yield(%[[VAL_0]] : !fir.ref>>) +! CHECK: } cleanup { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): +! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref>> +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>) -> !fir.ref> +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>) -> i64 +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64 +! CHECK: fir.if %[[VAL_5]] { +! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>) -> !fir.heap> +! CHECK: fir.freemem %[[VAL_6]] : !fir.heap> +! CHECK: } +! CHECK: omp.yield ! CHECK: } ! CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "reduce"} { diff --git a/flang/test/Lower/OpenMP/parallel-reduction3.f90 b/flang/test/Lower/OpenMP/parallel-reduction3.f90 index b25759713e318..ce6bd17265ddb 100644 --- a/flang/test/Lower/OpenMP/parallel-reduction3.f90 +++ b/flang/test/Lower/OpenMP/parallel-reduction3.f90 @@ -1,15 +1,6 @@ -! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py - -! The script is designed to make adding checks to -! a test case fast, it is *not* designed to be authoritative -! about what constitutes a good test! The CHECK should be -! minimized and named to reflect the test intent. - ! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s - - ! CHECK-LABEL: omp.declare_reduction @add_reduction_byref_box_Uxi32 : !fir.ref>> init { ! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): ! CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32 @@ -17,14 +8,14 @@ ! CHECK: %[[VAL_3:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_3]] : (!fir.box>, index) -> (index, index, index) ! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]]#1 : (index) -> !fir.shape<1> -! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.array, %[[VAL_4]]#1 {bindc_name = ".tmp"} -! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +! CHECK: %[[VAL_6:.*]] = fir.allocmem !fir.array, %[[VAL_4]]#1 {bindc_name = ".tmp", uniq_name = ""} +! CHECK: %[[TRUE:.*]] = arith.constant true +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) ! CHECK: hlfir.assign %[[VAL_1]] to %[[VAL_7]]#0 : i32, !fir.box> ! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box> ! CHECK: fir.store %[[VAL_7]]#0 to %[[VAL_8]] : !fir.ref>> ! CHECK: omp.yield(%[[VAL_8]] : !fir.ref>>) - -! CHECK-LABEL: } combiner { +! CHECK: } combiner { ! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>, %[[VAL_1:.*]]: !fir.ref>>): ! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref>> ! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref>> @@ -41,6 +32,18 @@ ! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref ! CHECK: } ! CHECK: omp.yield(%[[VAL_0]] : !fir.ref>>) +! CHECK: } cleanup { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): +! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref>> +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>) -> !fir.ref> +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>) -> i64 +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64 +! CHECK: fir.if %[[VAL_5]] { +! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>) -> !fir.heap> +! CHECK: fir.freemem %[[VAL_6]] : !fir.heap> +! CHECK: } +! CHECK: omp.yield ! CHECK: } ! CHECK-LABEL: func.func @_QPs( @@ -122,4 +125,4 @@ subroutine s(x) !$omp end parallel do if (c(1) /= 5050) stop 1 -end subroutine s \ No newline at end of file +end subroutine s diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 index a1f339faea5cd..8f83a30c9fe78 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 @@ -29,8 +29,9 @@ subroutine reduce(r) ! CHECK: %[[VAL_3:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_3]] : (!fir.box>, index) -> (index, index, index) ! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]]#1 : (index) -> !fir.shape<1> -! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.array, %[[VAL_4]]#1 {bindc_name = ".tmp"} -! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +! CHECK: %[[VAL_6:.*]] = fir.allocmem !fir.array, %[[VAL_4]]#1 {bindc_name = ".tmp", uniq_name = ""} +! CHECK: %[[TRUE:.*]] = arith.constant true +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) ! CHECK: hlfir.assign %[[VAL_1]] to %[[VAL_7]]#0 : f64, !fir.box> ! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box> ! CHECK: fir.store %[[VAL_7]]#0 to %[[VAL_8]] : !fir.ref>> @@ -53,6 +54,18 @@ subroutine reduce(r) ! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref ! CHECK: } ! CHECK: omp.yield(%[[VAL_0]] : !fir.ref>>) +! CHECK: } cleanup { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): +! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref>> +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>) -> !fir.ref> +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>) -> i64 +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64 +! CHECK: fir.if %[[VAL_5]] { +! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>) -> !fir.heap> +! CHECK: fir.freemem %[[VAL_6]] : !fir.heap> +! CHECK: } +! CHECK: omp.yield ! CHECK: } ! CHECK-LABEL: func.func private @_QFPreduce( diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 index a898204c881d9..a08bca9eb283b 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 @@ -16,13 +16,14 @@ program reduce ! CHECK-LABEL omp.declare_reduction @add_reduction_byref_box_2xi32 : !fir.ref>> init { ! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): -! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.array<2xi32> {bindc_name = ".tmp"} ! CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32 ! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref>> ! CHECK: %[[VAL_4:.*]] = arith.constant 2 : index ! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1> -! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +! CHECK: %[[VAL_1:.*]] = fir.allocmem !fir.array<2xi32> {bindc_name = ".tmp", uniq_name = ""} +! CHECK: %[[TRUE:.*]] = arith.constant true +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.heap>, !fir.heap>) +! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box> ! CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.box> ! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box> ! CHECK: fir.store %[[VAL_7]] to %[[VAL_8]] : !fir.ref>> @@ -45,6 +46,18 @@ program reduce ! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref ! CHECK: } ! CHECK: omp.yield(%[[VAL_0]] : !fir.ref>>) +! CHECK: } cleanup { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): +! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref>> +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>) -> !fir.ref> +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>) -> i64 +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64 +! CHECK: fir.if %[[VAL_5]] { +! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>) -> !fir.heap> +! CHECK: fir.freemem %[[VAL_6]] : !fir.heap> +! CHECK: } +! CHECK: omp.yield ! CHECK: } ! CHECK-LABEL func.func @_QQmain() attributes {fir.bindc_name = "reduce"} { diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 index f3745c8460915..045208d6f7ffa 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 @@ -16,19 +16,20 @@ program reduce ! CHECK-LABEL omp.declare_reduction @add_reduction_byref_box_2xi32 : !fir.ref>> init { ! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): -! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.array<2xi32> {bindc_name = ".tmp"} ! CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32 ! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref>> ! CHECK: %[[VAL_4:.*]] = arith.constant 2 : index ! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1> -! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +! CHECK: %[[VAL_1:.*]] = fir.allocmem !fir.array<2xi32> {bindc_name = ".tmp", uniq_name = ""} +! CHECK: %[[TRUE:.*]] = arith.constant true +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.heap>, !fir.heap>) +! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[VAL_5]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box> ! CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.box> ! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box> ! CHECK: fir.store %[[VAL_7]] to %[[VAL_8]] : !fir.ref>> ! CHECK: omp.yield(%[[VAL_8]] : !fir.ref>>) -! CHECK-LABEL } combiner { +! CHECK: } combiner { ! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>, %[[VAL_1:.*]]: !fir.ref>>): ! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref>> ! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref>> @@ -45,6 +46,18 @@ program reduce ! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref ! CHECK: } ! CHECK: omp.yield(%[[VAL_0]] : !fir.ref>>) +! CHECK: } cleanup { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): +! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref>> +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>) -> !fir.ref> +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>) -> i64 +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64 +! CHECK: fir.if %[[VAL_5]] { +! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>) -> !fir.heap> +! CHECK: fir.freemem %[[VAL_6]] : !fir.heap> +! CHECK: } +! CHECK: omp.yield ! CHECK: } ! CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "reduce"} { From b800a9352330a5b3db91d43f2cc6a0ddeda03aa6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 11 Apr 2024 10:36:02 +0100 Subject: [PATCH 125/886] Fix MSVC "not all control paths return a value" warning. NFC. --- llvm/lib/IR/AsmWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 609de920ba7dd..941f6a7a7d823 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -3313,7 +3313,7 @@ static const char *getImportTypeName(GlobalValueSummary::ImportKind IK) { case GlobalValueSummary::Declaration: return "declaration"; } - assert(false && "invalid import kind"); + llvm_unreachable("invalid import kind"); } void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) { From 759422c6df2dfe42d01bb64b42f43ab57db6e59e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 11 Apr 2024 10:42:49 +0100 Subject: [PATCH 126/886] [DAG] visitEXTRACT_SUBVECTOR - pull out repeated SDLoc. NFC. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8fe074666a3dc..95c1cde0b9347 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24433,6 +24433,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); uint64_t ExtIdx = N->getConstantOperandVal(1); + SDLoc DL(N); // Extract from UNDEF is UNDEF. if (V.isUndef()) @@ -24448,7 +24449,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(), V.getConstantOperandVal(1)) && TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) { - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0), + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0), V.getOperand(1)); } } @@ -24457,7 +24458,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { if (V.getOpcode() == ISD::SPLAT_VECTOR) if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse()) if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT)) - return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0)); + return DAG.getSplatVector(NVT, DL, V.getOperand(0)); // Try to move vector bitcast after extract_subv by scaling extraction index: // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') @@ -24471,10 +24472,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { if ((SrcNumElts % DestNumElts) == 0) { unsigned SrcDestRatio = SrcNumElts / DestNumElts; ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio; - EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), - NewExtEC); + EVT NewExtVT = + EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC); if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { - SDLoc DL(N); SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL); SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, V.getOperand(0), NewIndex); @@ -24488,7 +24488,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio); EVT ScalarVT = SrcVT.getScalarType(); if ((ExtIdx % DestSrcRatio) == 0) { - SDLoc DL(N); unsigned IndexValScaled = ExtIdx / DestSrcRatio; EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC); @@ -24536,7 +24535,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // v2i8 extract_subvec v8i8 Y, 6 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) { - SDLoc DL(N); unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts; assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts && "Trying to extract from >1 concat operand?"); @@ -24575,13 +24573,13 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { if (NumElems == 1) { SDValue Src = V->getOperand(IdxVal); if (EltVT != Src.getValueType()) - Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Src); + Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src); return DAG.getBitcast(NVT, Src); } // Extract the pieces from the original build_vector. - SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), - V->ops().slice(IdxVal, NumElems)); + SDValue BuildVec = + DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems)); return DAG.getBitcast(NVT, BuildVec); } } @@ -24608,7 +24606,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { return DAG.getBitcast(NVT, V.getOperand(1)); } return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, + ISD::EXTRACT_SUBVECTOR, DL, NVT, DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), N->getOperand(1)); } From cca30dfb5935e05837e37cced4407a63393c6642 Mon Sep 17 00:00:00 2001 From: nikitalita Date: Thu, 11 Apr 2024 03:08:38 -0700 Subject: [PATCH 127/886] [DebugInfo][ObjectYAML] Remove duplicate "Flags" field from LabelSym (#88194) There was a duplicate flags field mistakenly left in LabelSym. [LabelSym only has one flags field](https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/include/cvinfo.h#L3806) --- llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp index 64e1a58aa71a8..e1d2700623ec3 100644 --- a/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp +++ b/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp @@ -467,7 +467,6 @@ template <> void SymbolRecordImpl::map(IO &IO) { IO.mapOptional("Offset", Symbol.CodeOffset, 0U); IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); IO.mapRequired("Flags", Symbol.Flags); - IO.mapRequired("Flags", Symbol.Flags); IO.mapRequired("DisplayName", Symbol.Name); } From b60974dc9e5d98054f5a3a0dac7eab70e38bd416 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 11 Apr 2024 11:12:39 +0100 Subject: [PATCH 128/886] [VectorCombine][X86] Extend bitcast(shuffle(x,y)) test coverage As discussed on #87510 the intention is only to fold bitcast(shuffle(x,y)) -> shuffle(bitcast(x),bitcast(y)) if we won't actually increase the number of bitcasts (i.e. x or y is already bitcasted from the correct type). --- .../Transforms/VectorCombine/X86/shuffle.ll | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll index d1484fd5ab339..5020d37f86f56 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll @@ -121,6 +121,48 @@ define <16 x i8> @bitcast_shuf_uses(<4 x i32> %v) { ret <16 x i8> %r } +; shuffle of 2 operands removes bitcasts + +define <4 x i64> @bitcast_shuf_remove_bitcasts(<2 x i64> %a0, <2 x i64> %a1) { +; CHECK-LABEL: @bitcast_shuf_remove_bitcasts( +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]], <4 x i32> +; CHECK-NEXT: ret <4 x i64> [[R]] +; + %bc0 = bitcast <2 x i64> %a0 to <4 x i32> + %bc1 = bitcast <2 x i64> %a1 to <4 x i32> + %shuf = shufflevector <4 x i32> %bc0, <4 x i32> %bc1, <8 x i32> + %r = bitcast <8 x i32> %shuf to <4 x i64> + ret <4 x i64> %r +} + +; shuffle of 2 operands must reduce bitcasts + +define <8 x i32> @bitcast_shuf_one_bitcast(<4 x i32> %a0, <2 x i64> %a1) { +; CHECK-LABEL: @bitcast_shuf_one_bitcast( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A1:%.*]] to <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[R]] +; + %bc0 = bitcast <4 x i32> %a0 to <2 x i64> + %shuf = shufflevector <2 x i64> %bc0, <2 x i64> %a1, <4 x i32> + %r = bitcast <4 x i64> %shuf to <8 x i32> + ret <8 x i32> %r +} + +; TODO - Negative test - shuffle of 2 operands must not increase bitcasts + +define <8 x i32> @bitcast_shuf_too_many_bitcasts(<2 x i64> %a0, <2 x i64> %a1) { +; CHECK-LABEL: @bitcast_shuf_too_many_bitcasts( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A0:%.*]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A1:%.*]] to <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[R]] +; + %shuf = shufflevector <2 x i64> %a0, <2 x i64> %a1, <4 x i32> + %r = bitcast <4 x i64> %shuf to <8 x i32> + ret <8 x i32> %r +} + define <2 x i64> @PR35454_1(<2 x i64> %v) { ; SSE-LABEL: @PR35454_1( ; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> From a8b461603b3fab3b229ea6552433cb359c30350c Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Thu, 11 Apr 2024 10:25:53 +0000 Subject: [PATCH 129/886] [mlir] Apply ClangTidy BugProne fix forwarding reference passed to std::move(), which may unexpectedly cause lvalues to be moved; use std::forward() instead. --- mlir/lib/Transforms/Utils/DialectConversion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 8671c1008902a..f4e34a03d3d09 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -757,7 +757,7 @@ class UnresolvedMaterializationRewrite : public OperationRewrite { /// rewrite type and operation among the given rewrites. template static bool hasRewrite(R &&rewrites, Operation *op) { - return any_of(std::move(rewrites), [&](auto &rewrite) { + return any_of(std::forward(rewrites), [&](auto &rewrite) { auto *rewriteTy = dyn_cast(rewrite.get()); return rewriteTy && rewriteTy->getOperation() == op; }); From 962534c4b490239269bb2e11d036596826539046 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Thu, 11 Apr 2024 10:28:10 +0000 Subject: [PATCH 130/886] [mlir] Apply ClangTidy BugProne patch This time for real. --- mlir/lib/Transforms/Utils/DialectConversion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index f4e34a03d3d09..d85938847c776 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -757,7 +757,7 @@ class UnresolvedMaterializationRewrite : public OperationRewrite { /// rewrite type and operation among the given rewrites. template static bool hasRewrite(R &&rewrites, Operation *op) { - return any_of(std::forward(rewrites), [&](auto &rewrite) { + return any_of(std::forward(rewrites), [&](auto &rewrite) { auto *rewriteTy = dyn_cast(rewrite.get()); return rewriteTy && rewriteTy->getOperation() == op; }); From 364963a0a3935ced1acb2e959ecd08aef39405ef Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Thu, 11 Apr 2024 06:29:51 -0400 Subject: [PATCH 131/886] [BOLT][NFC] Do not assume text section name in more places (#88303) Fixes a couple more places where ".text" is presumed for the main code section name. --- bolt/lib/Rewrite/RewriteInstance.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 0c8ee0d417233..1f778d030f7ac 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -556,7 +556,7 @@ Error RewriteInstance::discoverStorage() { if (Error E = SectionNameOrErr.takeError()) return E; StringRef SectionName = SectionNameOrErr.get(); - if (SectionName == ".text") { + if (SectionName == BC->getMainCodeSectionName()) { BC->OldTextSectionAddress = Section.getAddress(); BC->OldTextSectionSize = Section.getSize(); @@ -1864,7 +1864,8 @@ Error RewriteInstance::readSpecialSections() { "Use -update-debug-sections to keep it.\n"; } - HasTextRelocations = (bool)BC->getUniqueSectionByName(".rela.text"); + HasTextRelocations = (bool)BC->getUniqueSectionByName( + ".rela" + std::string(BC->getMainCodeSectionName())); HasSymbolTable = (bool)BC->getUniqueSectionByName(".symtab"); EHFrameSection = BC->getUniqueSectionByName(".eh_frame"); BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id"); @@ -3441,7 +3442,8 @@ void RewriteInstance::emitAndLink() { ErrorOr TextSection = BC->getUniqueSectionByName(BC->getMainCodeSectionName()); if (BC->HasRelocations && TextSection) - BC->renameSection(*TextSection, getOrgSecPrefix() + ".text"); + BC->renameSection(*TextSection, + getOrgSecPrefix() + BC->getMainCodeSectionName()); ////////////////////////////////////////////////////////////////////////////// // Assign addresses to new sections. From e2d482395992d725663543d297f5ab3cc5918fcc Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Thu, 11 Apr 2024 06:35:28 -0400 Subject: [PATCH 132/886] [BOLT][NFC] Make RepRet X86-specific (#88286) Bolt's RepRet pass is x86-specific, no need to add it for non-x86 targets. --- bolt/lib/Rewrite/BinaryPassManager.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 6c26bb7957269..be4888ccfa564 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -377,8 +377,9 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { Manager.registerPass(std::make_unique(PrintNormalized)); - Manager.registerPass(std::make_unique(NeverPrint), - opts::StripRepRet); + if (BC.isX86()) + Manager.registerPass(std::make_unique(NeverPrint), + opts::StripRepRet); Manager.registerPass(std::make_unique(PrintICF), opts::ICF); From a403ad9336a24c459ee79d2cb7675c4b1f32bfd9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 11 Apr 2024 11:26:41 +0100 Subject: [PATCH 133/886] [VectorCombine] foldBitcastShuffle - limit bitcast(shuffle(x,y)) -> shuffle(bitcast(x),bitcast(y)) Only fold bitcast(shuffle(x,y)) -> shuffle(bitcast(x),bitcast(y)) if we won't actually increase the number of bitcasts (i.e. x or y is already bitcasted from the correct type). --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 13 ++++++++++++- llvm/test/Transforms/VectorCombine/X86/shuffle.ll | 7 +++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 633b46e2dc8ba..2f9767538e6cb 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -713,6 +713,18 @@ bool VectorCombine::foldBitcastShuffle(Instruction &I) { if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0) return false; + bool IsUnary = isa(V1); + + // For binary shuffles, only fold bitcast(shuffle(X,Y)) + // if it won't increase the number of bitcasts. + if (!IsUnary) { + auto *BCTy0 = dyn_cast(peekThroughBitcasts(V0)->getType()); + auto *BCTy1 = dyn_cast(peekThroughBitcasts(V1)->getType()); + if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) && + !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType())) + return false; + } + SmallVector NewMask; if (DestEltSize <= SrcEltSize) { // The bitcast is from wide to narrow/equal elements. The shuffle mask can @@ -736,7 +748,6 @@ bool VectorCombine::foldBitcastShuffle(Instruction &I) { FixedVectorType::get(DestTy->getScalarType(), NumSrcElts); auto *OldShuffleTy = FixedVectorType::get(SrcTy->getScalarType(), Mask.size()); - bool IsUnary = isa(V1); unsigned NumOps = IsUnary ? 1 : 2; // The new shuffle must not cost more than the old shuffle. diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll index 5020d37f86f56..3d47f373ab77c 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll @@ -149,13 +149,12 @@ define <8 x i32> @bitcast_shuf_one_bitcast(<4 x i32> %a0, <2 x i64> %a1) { ret <8 x i32> %r } -; TODO - Negative test - shuffle of 2 operands must not increase bitcasts +; Negative test - shuffle of 2 operands must not increase bitcasts define <8 x i32> @bitcast_shuf_too_many_bitcasts(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK-LABEL: @bitcast_shuf_too_many_bitcasts( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A0:%.*]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A1:%.*]] to <4 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i64> [[SHUF]] to <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %shuf = shufflevector <2 x i64> %a0, <2 x i64> %a1, <4 x i32> From 717d3f3974f43d90c1b8829a4077bbc2a2413c83 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 11 Apr 2024 11:42:57 +0100 Subject: [PATCH 134/886] [VectorCombine] foldShuffleOfCastops - add initial shuffle(bitcast(x),bitcast(y)) -> bitcast(shuffle(x,y)) support Just handle cases where the bitcast src/dst element counts are the same (future patches will add shuffle mask scaling) --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 7 +++---- llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 2f9767538e6cb..b74fdf27d213a 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1459,7 +1459,7 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { return false; Instruction::CastOps Opcode = C0->getOpcode(); - if (Opcode == Instruction::BitCast || C0->getSrcTy() != C1->getSrcTy()) + if (C0->getSrcTy() != C1->getSrcTy()) return false; // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds. @@ -1473,10 +1473,9 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { auto *ShuffleDstTy = dyn_cast(I.getType()); auto *CastDstTy = dyn_cast(C0->getDestTy()); auto *CastSrcTy = dyn_cast(C0->getSrcTy()); - if (!ShuffleDstTy || !CastDstTy || !CastSrcTy) + if (!ShuffleDstTy || !CastDstTy || !CastSrcTy || + CastDstTy->getElementCount() != CastSrcTy->getElementCount()) return false; - assert(CastDstTy->getElementCount() == CastSrcTy->getElementCount() && - "Unexpected src/dst element counts"); auto *NewShuffleDstTy = FixedVectorType::get(CastSrcTy->getScalarType(), Mask.size()); diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll index 60c6ff97c58b5..97fceacd82758 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll @@ -165,13 +165,12 @@ define <8 x double> @interleave_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> % ret <8 x double> %r } -; TODO - bitcasts (same element count) +; bitcasts (same element count) define <8 x float> @concat_bitcast_v4i32_v8f32(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @concat_bitcast_v4i32_v8f32( -; CHECK-NEXT: [[X0:%.*]] = bitcast <4 x i32> [[A0:%.*]] to <4 x float> -; CHECK-NEXT: [[X1:%.*]] = bitcast <4 x i32> [[A1:%.*]] to <4 x float> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X0]], <4 x float> [[X1]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float> ; CHECK-NEXT: ret <8 x float> [[R]] ; %x0 = bitcast <4 x i32> %a0 to <4 x float> From 9d9bb7b1b6e96dc833133dacf1e2c7d9792e640e Mon Sep 17 00:00:00 2001 From: Johannes Reifferscheid Date: Thu, 11 Apr 2024 12:57:46 +0200 Subject: [PATCH 135/886] Fix complex abs corner cases. (#88373) The current implementation fails for very small and very large values. For example, (0, -inf) should return inf, but it returns -inf. This ports the logic used in XLA. Tested with XLA's exhaustive_binary_test_f32_f64. --- .../ComplexToStandard/ComplexToStandard.cpp | 54 +-- .../convert-to-standard.mlir | 348 +++++++----------- .../ComplexToStandard/full-conversion.mlir | 34 +- 3 files changed, 162 insertions(+), 274 deletions(-) diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp index a6fcf6a758c07..462036e51a1f1 100644 --- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp +++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp @@ -26,7 +26,7 @@ namespace mlir { using namespace mlir; namespace { -// The algorithm is listed in https://dl.acm.org/doi/pdf/10.1145/363717.363780. + struct AbsOpConversion : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -35,49 +35,27 @@ struct AbsOpConversion : public OpConversionPattern { ConversionPatternRewriter &rewriter) const override { mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter); - arith::FastMathFlagsAttr fmf = op.getFastMathFlagsAttr(); + arith::FastMathFlags fmf = op.getFastMathFlagsAttr().getValue(); Type elementType = op.getType(); - Value arg = adaptor.getComplex(); - - Value zero = - b.create(elementType, b.getZeroAttr(elementType)); Value one = b.create(elementType, b.getFloatAttr(elementType, 1.0)); - Value real = b.create(elementType, arg); - Value imag = b.create(elementType, arg); - - Value realIsZero = - b.create(arith::CmpFPredicate::OEQ, real, zero); - Value imagIsZero = - b.create(arith::CmpFPredicate::OEQ, imag, zero); + Value real = b.create(adaptor.getComplex()); + Value imag = b.create(adaptor.getComplex()); + Value absReal = b.create(real, fmf); + Value absImag = b.create(imag, fmf); - // Real > Imag - Value imagDivReal = b.create(imag, real, fmf.getValue()); - Value imagSq = - b.create(imagDivReal, imagDivReal, fmf.getValue()); - Value imagSqPlusOne = b.create(imagSq, one, fmf.getValue()); - Value imagSqrt = b.create(imagSqPlusOne, fmf.getValue()); - Value realAbs = b.create(real, fmf.getValue()); - Value absImag = b.create(imagSqrt, realAbs, fmf.getValue()); - - // Real <= Imag - Value realDivImag = b.create(real, imag, fmf.getValue()); - Value realSq = - b.create(realDivImag, realDivImag, fmf.getValue()); - Value realSqPlusOne = b.create(realSq, one, fmf.getValue()); - Value realSqrt = b.create(realSqPlusOne, fmf.getValue()); - Value imagAbs = b.create(imag, fmf.getValue()); - Value absReal = b.create(realSqrt, imagAbs, fmf.getValue()); - - rewriter.replaceOpWithNewOp( - op, realIsZero, imagAbs, - b.create( - imagIsZero, realAbs, - b.create( - b.create(arith::CmpFPredicate::OGT, real, imag), - absImag, absReal))); + Value max = b.create(absReal, absImag, fmf); + Value min = b.create(absReal, absImag, fmf); + Value ratio = b.create(min, max, fmf); + Value ratioSq = b.create(ratio, ratio, fmf); + Value ratioSqPlusOne = b.create(ratioSq, one, fmf); + Value sqrt = b.create(ratioSqPlusOne, fmf); + Value result = b.create(max, sqrt, fmf); + Value isNaN = + b.create(arith::CmpFPredicate::UNO, result, result, fmf); + rewriter.replaceOpWithNewOp(op, isNaN, min, result); return success(); } diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir index 46dba04a88aa0..a1de61d10bb22 100644 --- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir +++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir @@ -8,29 +8,21 @@ func.func @complex_abs(%arg: complex) -> f32 { return %abs : f32 } -// CHECK: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 // CHECK: %[[REAL:.*]] = complex.re %[[ARG]] : complex // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex -// CHECK: %[[IS_REAL_ZERO:.*]] = arith.cmpf oeq, %[[REAL]], %[[ZERO]] : f32 -// CHECK: %[[IS_IMAG_ZERO:.*]] = arith.cmpf oeq, %[[IMAG]], %[[ZERO]] : f32 -// CHECK: %[[IMAG_DIV_REAL:.*]] = arith.divf %[[IMAG]], %[[REAL]] : f32 -// CHECK: %[[IMAG_SQ:.*]] = arith.mulf %[[IMAG_DIV_REAL]], %[[IMAG_DIV_REAL]] : f32 -// CHECK: %[[IMAG_SQ_PLUS_ONE:.*]] = arith.addf %[[IMAG_SQ]], %[[ONE]] : f32 -// CHECK: %[[IMAG_SQRT:.*]] = math.sqrt %[[IMAG_SQ_PLUS_ONE]] : f32 -// CHECK: %[[REAL_ABS:.*]] = math.absf %[[REAL]] : f32 -// CHECK: %[[ABS_IMAG:.*]] = arith.mulf %[[IMAG_SQRT]], %[[REAL_ABS]] : f32 -// CHECK: %[[REAL_DIV_IMAG:.*]] = arith.divf %[[REAL]], %[[IMAG]] : f32 -// CHECK: %[[REAL_SQ:.*]] = arith.mulf %[[REAL_DIV_IMAG]], %[[REAL_DIV_IMAG]] : f32 -// CHECK: %[[REAL_SQ_PLUS_ONE:.*]] = arith.addf %[[REAL_SQ]], %[[ONE]] : f32 -// CHECK: %[[REAL_SQRT:.*]] = math.sqrt %[[REAL_SQ_PLUS_ONE]] : f32 -// CHECK: %[[IMAG_ABS:.*]] = math.absf %[[IMAG]] : f32 -// CHECK: %[[ABS_REAL:.*]] = arith.mulf %[[REAL_SQRT]], %[[IMAG_ABS]] : f32 -// CHECK: %[[REAL_GT_IMAG:.*]] = arith.cmpf ogt, %[[REAL]], %[[IMAG]] : f32 -// CHECK: %[[ABS1:.*]] = arith.select %[[REAL_GT_IMAG]], %[[ABS_IMAG]], %[[ABS_REAL]] : f32 -// CHECK: %[[ABS2:.*]] = arith.select %[[IS_IMAG_ZERO]], %[[REAL_ABS]], %[[ABS1]] : f32 -// CHECK: %[[ABS3:.*]] = arith.select %[[IS_REAL_ZERO]], %[[IMAG_ABS]], %[[ABS2]] : f32 -// CHECK: return %[[ABS3]] : f32 +// CHECK: %[[ABS_REAL:.*]] = math.absf %[[REAL]] : f32 +// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] : f32 +// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL]], %[[ABS_IMAG]] : f32 +// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL]], %[[ABS_IMAG]] : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] : f32 +// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] : f32 +// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] : f32 +// CHECK: %[[ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[ABS_OR_NAN]] : f32 +// CHECK: return %[[ABS]] : f32 // ----- @@ -258,29 +250,21 @@ func.func @complex_log(%arg: complex) -> complex { %log = complex.log %arg: complex return %log : complex } -// CHECK: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 // CHECK: %[[REAL:.*]] = complex.re %[[ARG]] : complex // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex -// CHECK: %[[IS_REAL_ZERO:.*]] = arith.cmpf oeq, %[[REAL]], %[[ZERO]] : f32 -// CHECK: %[[IS_IMAG_ZERO:.*]] = arith.cmpf oeq, %[[IMAG]], %[[ZERO]] : f32 -// CHECK: %[[IMAG_DIV_REAL:.*]] = arith.divf %[[IMAG]], %[[REAL]] : f32 -// CHECK: %[[IMAG_SQ:.*]] = arith.mulf %[[IMAG_DIV_REAL]], %[[IMAG_DIV_REAL]] : f32 -// CHECK: %[[IMAG_SQ_PLUS_ONE:.*]] = arith.addf %[[IMAG_SQ]], %[[ONE]] : f32 -// CHECK: %[[IMAG_SQRT:.*]] = math.sqrt %[[IMAG_SQ_PLUS_ONE]] : f32 -// CHECK: %[[REAL_ABS:.*]] = math.absf %[[REAL]] : f32 -// CHECK: %[[ABS_IMAG:.*]] = arith.mulf %[[IMAG_SQRT]], %[[REAL_ABS]] : f32 -// CHECK: %[[REAL_DIV_IMAG:.*]] = arith.divf %[[REAL]], %[[IMAG]] : f32 -// CHECK: %[[REAL_SQ:.*]] = arith.mulf %[[REAL_DIV_IMAG]], %[[REAL_DIV_IMAG]] : f32 -// CHECK: %[[REAL_SQ_PLUS_ONE:.*]] = arith.addf %[[REAL_SQ]], %[[ONE]] : f32 -// CHECK: %[[REAL_SQRT:.*]] = math.sqrt %[[REAL_SQ_PLUS_ONE]] : f32 -// CHECK: %[[IMAG_ABS:.*]] = math.absf %[[IMAG]] : f32 -// CHECK: %[[ABS_REAL:.*]] = arith.mulf %[[REAL_SQRT]], %[[IMAG_ABS]] : f32 -// CHECK: %[[REAL_GT_IMAG:.*]] = arith.cmpf ogt, %[[REAL]], %[[IMAG]] : f32 -// CHECK: %[[ABS1:.*]] = arith.select %[[REAL_GT_IMAG]], %[[ABS_IMAG]], %[[ABS_REAL]] : f32 -// CHECK: %[[ABS2:.*]] = arith.select %[[IS_IMAG_ZERO]], %[[REAL_ABS]], %[[ABS1]] : f32 -// CHECK: %[[NORM:.*]] = arith.select %[[IS_REAL_ZERO]], %[[IMAG_ABS]], %[[ABS2]] : f32 -// CHECK: %[[RESULT_REAL:.*]] = math.log %[[NORM]] : f32 +// CHECK: %[[ABS_REAL:.*]] = math.absf %[[REAL]] : f32 +// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] : f32 +// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL]], %[[ABS_IMAG]] : f32 +// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL]], %[[ABS_IMAG]] : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] : f32 +// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] : f32 +// CHECK: %[[RESULT:.*]] = arith.mulf %[[MAX]], %[[SQRT]] : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[RESULT]], %[[RESULT]] : f32 +// CHECK: %[[ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[RESULT]] : f32 +// CHECK: %[[RESULT_REAL:.*]] = math.log %[[ABS]] : f32 // CHECK: %[[REAL2:.*]] = complex.re %[[ARG]] : complex // CHECK: %[[IMAG2:.*]] = complex.im %[[ARG]] : complex // CHECK: %[[RESULT_IMAG:.*]] = math.atan2 %[[IMAG2]], %[[REAL2]] : f32 @@ -509,30 +493,22 @@ func.func @complex_sign(%arg: complex) -> complex { // CHECK: %[[REAL_IS_ZERO:.*]] = arith.cmpf oeq, %[[REAL]], %[[ZERO]] : f32 // CHECK: %[[IMAG_IS_ZERO:.*]] = arith.cmpf oeq, %[[IMAG]], %[[ZERO]] : f32 // CHECK: %[[IS_ZERO:.*]] = arith.andi %[[REAL_IS_ZERO]], %[[IMAG_IS_ZERO]] : i1 -// CHECK: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 // CHECK: %[[REAL2:.*]] = complex.re %[[ARG]] : complex // CHECK: %[[IMAG2:.*]] = complex.im %[[ARG]] : complex -// CHECK: %[[IS_REAL_ZERO:.*]] = arith.cmpf oeq, %[[REAL2]], %[[ZERO]] : f32 -// CHECK: %[[IS_IMAG_ZERO:.*]] = arith.cmpf oeq, %[[IMAG2]], %[[ZERO]] : f32 -// CHECK: %[[IMAG_DIV_REAL:.*]] = arith.divf %[[IMAG2]], %[[REAL2]] : f32 -// CHECK: %[[IMAG_SQ:.*]] = arith.mulf %[[IMAG_DIV_REAL]], %[[IMAG_DIV_REAL]] : f32 -// CHECK: %[[IMAG_SQ_PLUS_ONE:.*]] = arith.addf %[[IMAG_SQ]], %[[ONE]] : f32 -// CHECK: %[[IMAG_SQRT:.*]] = math.sqrt %[[IMAG_SQ_PLUS_ONE]] : f32 -// CHECK: %[[REAL_ABS:.*]] = math.absf %[[REAL2]] : f32 -// CHECK: %[[ABS_IMAG:.*]] = arith.mulf %[[IMAG_SQRT]], %[[REAL_ABS]] : f32 -// CHECK: %[[REAL_DIV_IMAG:.*]] = arith.divf %[[REAL2]], %[[IMAG2]] : f32 -// CHECK: %[[REAL_SQ:.*]] = arith.mulf %[[REAL_DIV_IMAG]], %[[REAL_DIV_IMAG]] : f32 -// CHECK: %[[REAL_SQ_PLUS_ONE:.*]] = arith.addf %[[REAL_SQ]], %[[ONE]] : f32 -// CHECK: %[[REAL_SQRT:.*]] = math.sqrt %[[REAL_SQ_PLUS_ONE]] : f32 -// CHECK: %[[IMAG_ABS:.*]] = math.absf %[[IMAG2]] : f32 -// CHECK: %[[ABS_REAL:.*]] = arith.mulf %[[REAL_SQRT]], %[[IMAG_ABS]] : f32 -// CHECK: %[[REAL_GT_IMAG:.*]] = arith.cmpf ogt, %[[REAL2]], %[[IMAG2]] : f32 -// CHECK: %[[ABS1:.*]] = arith.select %[[REAL_GT_IMAG]], %[[ABS_IMAG]], %[[ABS_REAL]] : f32 -// CHECK: %[[ABS2:.*]] = arith.select %[[IS_IMAG_ZERO]], %[[REAL_ABS]], %[[ABS1]] : f32 -// CHECK: %[[NORM:.*]] = arith.select %[[IS_REAL_ZERO]], %[[IMAG_ABS]], %[[ABS2]] : f32 -// CHECK: %[[REAL_SIGN:.*]] = arith.divf %[[REAL]], %[[NORM]] : f32 -// CHECK: %[[IMAG_SIGN:.*]] = arith.divf %[[IMAG]], %[[NORM]] : f32 +// CHECK: %[[ABS_REAL:.*]] = math.absf %[[REAL2]] : f32 +// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG2]] : f32 +// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL]], %[[ABS_IMAG]] : f32 +// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL]], %[[ABS_IMAG]] : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] : f32 +// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] : f32 +// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] : f32 +// CHECK: %[[ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[ABS_OR_NAN]] : f32 +// CHECK: %[[REAL_SIGN:.*]] = arith.divf %[[REAL]], %[[ABS]] : f32 +// CHECK: %[[IMAG_SIGN:.*]] = arith.divf %[[IMAG]], %[[ABS]] : f32 // CHECK: %[[SIGN:.*]] = complex.create %[[REAL_SIGN]], %[[IMAG_SIGN]] : complex // CHECK: %[[RESULT:.*]] = arith.select %[[IS_ZERO]], %[[ARG]], %[[SIGN]] : complex // CHECK: return %[[RESULT]] : complex @@ -725,29 +701,21 @@ func.func @complex_sqrt(%arg: complex) -> complex { // CHECK: %[[VAR0:.*]] = complex.re %[[ARG]] : complex // CHECK: %[[VAR1:.*]] = complex.im %[[ARG]] : complex // CHECK: %[[VAR2:.*]] = math.absf %[[VAR0]] : f32 -// CHECK: %[[CST0:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[CST1:.*]] = arith.constant 1.000000e+00 : f32 -// CHECK: %[[VAR3:.*]] = complex.re %[[ARG]] : complex -// CHECK: %[[VAR4:.*]] = complex.im %[[ARG]] : complex -// CHECK: %[[VAR5:.*]] = arith.cmpf oeq, %[[VAR3]], %[[CST0]] : f32 -// CHECK: %[[VAR6:.*]] = arith.cmpf oeq, %[[VAR4]], %[[CST0]] : f32 -// CHECK: %[[VAR7:.*]] = arith.divf %[[VAR4]], %[[VAR3]] : f32 -// CHECK: %[[VAR8:.*]] = arith.mulf %[[VAR7]], %[[VAR7]] : f32 -// CHECK: %[[VAR9:.*]] = arith.addf %[[VAR8]], %[[CST1]] : f32 -// CHECK: %[[VAR10:.*]] = math.sqrt %[[VAR9]] : f32 -// CHECK: %[[VAR11:.*]] = math.absf %[[VAR3]] : f32 -// CHECK: %[[VAR12:.*]] = arith.mulf %[[VAR10]], %[[VAR11]] : f32 -// CHECK: %[[VAR13:.*]] = arith.divf %[[VAR3]], %[[VAR4]] : f32 -// CHECK: %[[VAR14:.*]] = arith.mulf %[[VAR13]], %[[VAR13]] : f32 -// CHECK: %[[VAR15:.*]] = arith.addf %[[VAR14]], %[[CST1]] : f32 -// CHECK: %[[VAR16:.*]] = math.sqrt %[[VAR15]] : f32 -// CHECK: %[[VAR17:.*]] = math.absf %[[VAR4]] : f32 -// CHECK: %[[VAR18:.*]] = arith.mulf %[[VAR16]], %[[VAR17]] : f32 -// CHECK: %[[VAR19:.*]] = arith.cmpf ogt, %[[VAR3]], %[[VAR4]] : f32 -// CHECK: %[[VAR20:.*]] = arith.select %[[VAR19]], %[[VAR12]], %[[VAR18]] : f32 -// CHECK: %[[VAR21:.*]] = arith.select %[[VAR6]], %[[VAR11]], %[[VAR20]] : f32 -// CHECK: %[[VAR22:.*]] = arith.select %[[VAR5]], %[[VAR17]], %[[VAR21]] : f32 -// CHECK: %[[VAR23:.*]] = arith.addf %[[VAR2]], %[[VAR22]] : f32 +// CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 +// CHECK: %[[REAL:.*]] = complex.re %[[ARG]] : complex +// CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex +// CHECK: %[[ABS_REAL:.*]] = math.absf %[[REAL]] : f32 +// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] : f32 +// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL]], %[[ABS_IMAG]] : f32 +// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL]], %[[ABS_IMAG]] : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] : f32 +// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] : f32 +// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] : f32 +// CHECK: %[[ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[ABS_OR_NAN]] : f32 +// CHECK: %[[VAR23:.*]] = arith.addf %[[VAR2]], %[[ABS]] : f32 // CHECK: %[[CST2:.*]] = arith.constant 5.000000e-01 : f32 // CHECK: %[[VAR24:.*]] = arith.mulf %[[VAR23]], %[[CST2]] : f32 // CHECK: %[[VAR25:.*]] = math.sqrt %[[VAR24]] : f32 @@ -821,29 +789,21 @@ func.func @complex_abs_with_fmf(%arg: complex) -> f32 { %abs = complex.abs %arg fastmath : complex return %abs : f32 } -// CHECK: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 // CHECK: %[[REAL:.*]] = complex.re %[[ARG]] : complex // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex -// CHECK: %[[IS_REAL_ZERO:.*]] = arith.cmpf oeq, %[[REAL]], %[[ZERO]] : f32 -// CHECK: %[[IS_IMAG_ZERO:.*]] = arith.cmpf oeq, %[[IMAG]], %[[ZERO]] : f32 -// CHECK: %[[IMAG_DIV_REAL:.*]] = arith.divf %[[IMAG]], %[[REAL]] fastmath : f32 -// CHECK: %[[IMAG_SQ:.*]] = arith.mulf %[[IMAG_DIV_REAL]], %[[IMAG_DIV_REAL]] fastmath : f32 -// CHECK: %[[IMAG_SQ_PLUS_ONE:.*]] = arith.addf %[[IMAG_SQ]], %[[ONE]] fastmath : f32 -// CHECK: %[[IMAG_SQRT:.*]] = math.sqrt %[[IMAG_SQ_PLUS_ONE]] fastmath : f32 -// CHECK: %[[REAL_ABS:.*]] = math.absf %[[REAL]] fastmath : f32 -// CHECK: %[[ABS_IMAG:.*]] = arith.mulf %[[IMAG_SQRT]], %[[REAL_ABS]] fastmath : f32 -// CHECK: %[[REAL_DIV_IMAG:.*]] = arith.divf %[[REAL]], %[[IMAG]] fastmath : f32 -// CHECK: %[[REAL_SQ:.*]] = arith.mulf %[[REAL_DIV_IMAG]], %[[REAL_DIV_IMAG]] fastmath : f32 -// CHECK: %[[REAL_SQ_PLUS_ONE:.*]] = arith.addf %[[REAL_SQ]], %[[ONE]] fastmath : f32 -// CHECK: %[[REAL_SQRT:.*]] = math.sqrt %[[REAL_SQ_PLUS_ONE]] fastmath : f32 -// CHECK: %[[IMAG_ABS:.*]] = math.absf %[[IMAG]] fastmath : f32 -// CHECK: %[[ABS_REAL:.*]] = arith.mulf %[[REAL_SQRT]], %[[IMAG_ABS]] fastmath : f32 -// CHECK: %[[REAL_GT_IMAG:.*]] = arith.cmpf ogt, %[[REAL]], %[[IMAG]] : f32 -// CHECK: %[[ABS1:.*]] = arith.select %[[REAL_GT_IMAG]], %[[ABS_IMAG]], %[[ABS_REAL]] : f32 -// CHECK: %[[ABS2:.*]] = arith.select %[[IS_IMAG_ZERO]], %[[REAL_ABS]], %[[ABS1]] : f32 -// CHECK: %[[ABS3:.*]] = arith.select %[[IS_REAL_ZERO]], %[[IMAG_ABS]], %[[ABS2]] : f32 -// CHECK: return %[[ABS3]] : f32 +// CHECK: %[[ABS_REAL:.*]] = math.absf %[[REAL]] fastmath : f32 +// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] fastmath : f32 +// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 +// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] fastmath : f32 +// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] fastmath : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] fastmath : f32 +// CHECK: %[[ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[ABS_OR_NAN]] : f32 +// CHECK: return %[[ABS]] : f32 // ----- @@ -928,29 +888,21 @@ func.func @complex_log_with_fmf(%arg: complex) -> complex { %log = complex.log %arg fastmath : complex return %log : complex } -// CHECK: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 // CHECK: %[[REAL:.*]] = complex.re %[[ARG]] : complex // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex -// CHECK: %[[IS_REAL_ZERO:.*]] = arith.cmpf oeq, %[[REAL]], %[[ZERO]] : f32 -// CHECK: %[[IS_IMAG_ZERO:.*]] = arith.cmpf oeq, %[[IMAG]], %[[ZERO]] : f32 -// CHECK: %[[IMAG_DIV_REAL:.*]] = arith.divf %[[IMAG]], %[[REAL]] fastmath : f32 -// CHECK: %[[IMAG_SQ:.*]] = arith.mulf %[[IMAG_DIV_REAL]], %[[IMAG_DIV_REAL]] fastmath : f32 -// CHECK: %[[IMAG_SQ_PLUS_ONE:.*]] = arith.addf %[[IMAG_SQ]], %[[ONE]] fastmath : f32 -// CHECK: %[[IMAG_SQRT:.*]] = math.sqrt %[[IMAG_SQ_PLUS_ONE]] fastmath : f32 -// CHECK: %[[REAL_ABS:.*]] = math.absf %[[REAL]] fastmath : f32 -// CHECK: %[[ABS_IMAG:.*]] = arith.mulf %[[IMAG_SQRT]], %[[REAL_ABS]] fastmath : f32 -// CHECK: %[[REAL_DIV_IMAG:.*]] = arith.divf %[[REAL]], %[[IMAG]] fastmath : f32 -// CHECK: %[[REAL_SQ:.*]] = arith.mulf %[[REAL_DIV_IMAG]], %[[REAL_DIV_IMAG]] fastmath : f32 -// CHECK: %[[REAL_SQ_PLUS_ONE:.*]] = arith.addf %[[REAL_SQ]], %[[ONE]] fastmath : f32 -// CHECK: %[[REAL_SQRT:.*]] = math.sqrt %[[REAL_SQ_PLUS_ONE]] fastmath : f32 -// CHECK: %[[IMAG_ABS:.*]] = math.absf %[[IMAG]] fastmath : f32 -// CHECK: %[[ABS_REAL:.*]] = arith.mulf %[[REAL_SQRT]], %[[IMAG_ABS]] fastmath : f32 -// CHECK: %[[REAL_GT_IMAG:.*]] = arith.cmpf ogt, %[[REAL]], %[[IMAG]] : f32 -// CHECK: %[[ABS1:.*]] = arith.select %[[REAL_GT_IMAG]], %[[ABS_IMAG]], %[[ABS_REAL]] : f32 -// CHECK: %[[ABS2:.*]] = arith.select %[[IS_IMAG_ZERO]], %[[REAL_ABS]], %[[ABS1]] : f32 -// CHECK: %[[NORM:.*]] = arith.select %[[IS_REAL_ZERO]], %[[IMAG_ABS]], %[[ABS2]] : f32 -// CHECK: %[[RESULT_REAL:.*]] = math.log %[[NORM]] fastmath : f32 +// CHECK: %[[ABS_REAL:.*]] = math.absf %[[REAL]] fastmath : f32 +// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] fastmath : f32 +// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 +// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] fastmath : f32 +// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] fastmath : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] fastmath : f32 +// CHECK: %[[ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[ABS_OR_NAN]] : f32 +// CHECK: %[[RESULT_REAL:.*]] = math.log %[[ABS]] fastmath : f32 // CHECK: %[[REAL2:.*]] = complex.re %[[ARG]] : complex // CHECK: %[[IMAG2:.*]] = complex.im %[[ARG]] : complex // CHECK: %[[RESULT_IMAG:.*]] = math.atan2 %[[IMAG2]], %[[REAL2]] fastmath : f32 @@ -1318,29 +1270,21 @@ func.func @complex_atan2_with_fmf(%lhs: complex, // CHECK: %[[VAR187:.*]] = complex.re %[[VAR186]] : complex // CHECK: %[[VAR188:.*]] = complex.im %[[VAR186]] : complex // CHECK: %[[VAR189:.*]] = math.absf %[[VAR187]] fastmath : f32 -// CHECK: %[[CST_7:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[CST_8:.*]] = arith.constant 1.000000e+00 : f32 -// CHECK: %[[VAR190:.*]] = complex.re %[[VAR186]] : complex -// CHECK: %[[VAR191:.*]] = complex.im %[[VAR186]] : complex -// CHECK: %[[VAR192:.*]] = arith.cmpf oeq, %[[VAR190]], %[[CST_7]] : f32 -// CHECK: %[[VAR193:.*]] = arith.cmpf oeq, %[[VAR191]], %[[CST_7]] : f32 -// CHECK: %[[VAR194:.*]] = arith.divf %[[VAR191]], %[[VAR190]] fastmath : f32 -// CHECK: %[[VAR195:.*]] = arith.mulf %[[VAR194]], %[[VAR194]] fastmath : f32 -// CHECK: %[[VAR196:.*]] = arith.addf %[[VAR195]], %[[CST_8]] fastmath : f32 -// CHECK: %[[VAR197:.*]] = math.sqrt %[[VAR196]] fastmath : f32 -// CHECK: %[[VAR198:.*]] = math.absf %[[VAR190]] fastmath : f32 -// CHECK: %[[VAR199:.*]] = arith.mulf %[[VAR197]], %[[VAR198]] fastmath : f32 -// CHECK: %[[VAR200:.*]] = arith.divf %[[VAR190]], %[[VAR191]] fastmath : f32 -// CHECK: %[[VAR201:.*]] = arith.mulf %[[VAR200]], %[[VAR200]] fastmath : f32 -// CHECK: %[[VAR202:.*]] = arith.addf %[[VAR201]], %[[CST_8]] fastmath : f32 -// CHECK: %[[VAR203:.*]] = math.sqrt %[[VAR202]] fastmath : f32 -// CHECK: %[[VAR204:.*]] = math.absf %[[VAR191]] fastmath : f32 -// CHECK: %[[VAR205:.*]] = arith.mulf %[[VAR203]], %[[VAR204]] fastmath : f32 -// CHECK: %[[VAR206:.*]] = arith.cmpf ogt, %[[VAR190]], %[[VAR191]] : f32 -// CHECK: %[[VAR207:.*]] = arith.select %[[VAR206]], %[[VAR199]], %[[VAR205]] : f32 -// CHECK: %[[VAR208:.*]] = arith.select %[[VAR193]], %[[VAR198]], %[[VAR207]] : f32 -// CHECK: %[[VAR209:.*]] = arith.select %[[VAR192]], %[[VAR204]], %[[VAR208]] : f32 -// CHECK: %[[VAR210:.*]] = arith.addf %[[VAR189]], %[[VAR209]] fastmath : f32 +// CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 +// CHECK: %[[REAL:.*]] = complex.re %[[VAR186]] : complex +// CHECK: %[[IMAG:.*]] = complex.im %[[VAR186]] : complex +// CHECK: %[[ABS_REAL:.*]] = math.absf %[[REAL]] fastmath : f32 +// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] fastmath : f32 +// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 +// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] fastmath : f32 +// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] fastmath : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] fastmath : f32 +// CHECK: %[[ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[ABS_OR_NAN]] : f32 +// CHECK: %[[VAR210:.*]] = arith.addf %[[VAR189]], %[[ABS]] fastmath : f32 // CHECK: %[[CST_9:.*]] = arith.constant 5.000000e-01 : f32 // CHECK: %[[VAR211:.*]] = arith.mulf %[[VAR210]], %[[CST_9]] fastmath : f32 // CHECK: %[[VAR212:.*]] = math.sqrt %[[VAR211]] fastmath : f32 @@ -1556,29 +1500,21 @@ func.func @complex_atan2_with_fmf(%lhs: complex, // CHECK: %[[VAR413:.*]] = arith.select %[[VAR412]], %[[VAR408]], %[[VAR402]] : f32 // CHECK: %[[VAR414:.*]] = arith.select %[[VAR412]], %[[VAR409]], %[[VAR403]] : f32 // CHECK: %[[VAR415:.*]] = complex.create %[[VAR413]], %[[VAR414]] : complex -// CHECK: %[[CST_19:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[CST_20:.*]] = arith.constant 1.000000e+00 : f32 -// CHECK: %[[VAR416:.*]] = complex.re %[[VAR415]] : complex -// CHECK: %[[VAR417:.*]] = complex.im %[[VAR415]] : complex -// CHECK: %[[VAR418:.*]] = arith.cmpf oeq, %[[VAR416]], %[[CST_19]] : f32 -// CHECK: %[[VAR419:.*]] = arith.cmpf oeq, %[[VAR417]], %[[CST_19]] : f32 -// CHECK: %[[VAR420:.*]] = arith.divf %[[VAR417]], %[[VAR416]] fastmath : f32 -// CHECK: %[[VAR421:.*]] = arith.mulf %[[VAR420]], %[[VAR420]] fastmath : f32 -// CHECK: %[[VAR422:.*]] = arith.addf %[[VAR421]], %[[CST_20]] fastmath : f32 -// CHECK: %[[VAR423:.*]] = math.sqrt %[[VAR422]] fastmath : f32 -// CHECK: %[[VAR424:.*]] = math.absf %[[VAR416]] fastmath : f32 -// CHECK: %[[VAR425:.*]] = arith.mulf %[[VAR423]], %[[VAR424]] fastmath : f32 -// CHECK: %[[VAR426:.*]] = arith.divf %[[VAR416]], %[[VAR417]] fastmath : f32 -// CHECK: %[[VAR427:.*]] = arith.mulf %[[VAR426]], %[[VAR426]] fastmath : f32 -// CHECK: %[[VAR428:.*]] = arith.addf %[[VAR427]], %[[CST_20]] fastmath : f32 -// CHECK: %[[VAR429:.*]] = math.sqrt %[[VAR428]] fastmath : f32 -// CHECK: %[[VAR430:.*]] = math.absf %[[VAR417]] fastmath : f32 -// CHECK: %[[VAR431:.*]] = arith.mulf %[[VAR429]], %[[VAR430]] fastmath : f32 -// CHECK: %[[VAR432:.*]] = arith.cmpf ogt, %[[VAR416]], %[[VAR417]] : f32 -// CHECK: %[[VAR433:.*]] = arith.select %[[VAR432]], %[[VAR425]], %[[VAR431]] : f32 -// CHECK: %[[VAR434:.*]] = arith.select %[[VAR419]], %[[VAR424]], %[[VAR433]] : f32 -// CHECK: %[[VAR435:.*]] = arith.select %[[VAR418]], %[[VAR430]], %[[VAR434]] : f32 -// CHECK: %[[VAR436:.*]] = math.log %[[VAR435]] fastmath : f32 +// CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 +// CHECK: %[[REAL:.*]] = complex.re %[[VAR415]] : complex +// CHECK: %[[IMAG:.*]] = complex.im %[[VAR415]] : complex +// CHECK: %[[ABS_REAL:.*]] = math.absf %[[REAL]] fastmath : f32 +// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] fastmath : f32 +// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 +// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] fastmath : f32 +// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] fastmath : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] fastmath : f32 +// CHECK: %[[ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[ABS_OR_NAN]] : f32 +// CHECK: %[[VAR436:.*]] = math.log %[[ABS]] fastmath : f32 // CHECK: %[[VAR437:.*]] = complex.re %[[VAR415]] : complex // CHECK: %[[VAR438:.*]] = complex.im %[[VAR415]] : complex // CHECK: %[[VAR439:.*]] = math.atan2 %[[VAR438]], %[[VAR437]] fastmath : f32 @@ -1805,29 +1741,21 @@ func.func @complex_sqrt_with_fmf(%arg: complex) -> complex { // CHECK: %[[VAR0:.*]] = complex.re %[[ARG]] : complex // CHECK: %[[VAR1:.*]] = complex.im %[[ARG]] : complex // CHECK: %[[VAR2:.*]] = math.absf %[[VAR0]] fastmath : f32 -// CHECK: %[[CST0:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[CST1:.*]] = arith.constant 1.000000e+00 : f32 -// CHECK: %[[VAR3:.*]] = complex.re %[[ARG]] : complex -// CHECK: %[[VAR4:.*]] = complex.im %[[ARG]] : complex -// CHECK: %[[VAR5:.*]] = arith.cmpf oeq, %[[VAR3]], %[[CST0]] : f32 -// CHECK: %[[VAR6:.*]] = arith.cmpf oeq, %[[VAR4]], %[[CST0]] : f32 -// CHECK: %[[VAR7:.*]] = arith.divf %[[VAR4]], %[[VAR3]] fastmath : f32 -// CHECK: %[[VAR8:.*]] = arith.mulf %[[VAR7]], %[[VAR7]] fastmath : f32 -// CHECK: %[[VAR9:.*]] = arith.addf %[[VAR8]], %[[CST1]] fastmath : f32 -// CHECK: %[[VAR10:.*]] = math.sqrt %[[VAR9]] fastmath : f32 -// CHECK: %[[VAR11:.*]] = math.absf %[[VAR3]] fastmath : f32 -// CHECK: %[[VAR12:.*]] = arith.mulf %[[VAR10]], %[[VAR11]] fastmath : f32 -// CHECK: %[[VAR13:.*]] = arith.divf %[[VAR3]], %[[VAR4]] fastmath : f32 -// CHECK: %[[VAR14:.*]] = arith.mulf %[[VAR13]], %[[VAR13]] fastmath : f32 -// CHECK: %[[VAR15:.*]] = arith.addf %[[VAR14]], %[[CST1]] fastmath : f32 -// CHECK: %[[VAR16:.*]] = math.sqrt %[[VAR15]] fastmath : f32 -// CHECK: %[[VAR17:.*]] = math.absf %[[VAR4]] fastmath : f32 -// CHECK: %[[VAR18:.*]] = arith.mulf %[[VAR16]], %[[VAR17]] fastmath : f32 -// CHECK: %[[VAR19:.*]] = arith.cmpf ogt, %[[VAR3]], %[[VAR4]] : f32 -// CHECK: %[[VAR20:.*]] = arith.select %[[VAR19]], %[[VAR12]], %[[VAR18]] : f32 -// CHECK: %[[VAR21:.*]] = arith.select %[[VAR6]], %[[VAR11]], %[[VAR20]] : f32 -// CHECK: %[[VAR22:.*]] = arith.select %[[VAR5]], %[[VAR17]], %[[VAR21]] : f32 -// CHECK: %[[VAR23:.*]] = arith.addf %[[VAR2]], %[[VAR22]] fastmath : f32 +// CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 +// CHECK: %[[REAL:.*]] = complex.re %[[ARG]] : complex +// CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex +// CHECK: %[[ABS_REAL:.*]] = math.absf %[[REAL]] fastmath : f32 +// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] fastmath : f32 +// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 +// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] fastmath : f32 +// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] fastmath : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] fastmath : f32 +// CHECK: %[[ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[ABS_OR_NAN]] : f32 +// CHECK: %[[VAR23:.*]] = arith.addf %[[VAR2]], %[[ABS]] fastmath : f32 // CHECK: %[[CST2:.*]] = arith.constant 5.000000e-01 : f32 // CHECK: %[[VAR24:.*]] = arith.mulf %[[VAR23]], %[[CST2]] fastmath : f32 // CHECK: %[[VAR25:.*]] = math.sqrt %[[VAR24]] fastmath : f32 @@ -1910,30 +1838,22 @@ func.func @complex_sign_with_fmf(%arg: complex) -> complex { // CHECK: %[[REAL_IS_ZERO:.*]] = arith.cmpf oeq, %[[REAL]], %[[ZERO]] : f32 // CHECK: %[[IMAG_IS_ZERO:.*]] = arith.cmpf oeq, %[[IMAG]], %[[ZERO]] : f32 // CHECK: %[[IS_ZERO:.*]] = arith.andi %[[REAL_IS_ZERO]], %[[IMAG_IS_ZERO]] : i1 -// CHECK: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 // CHECK: %[[REAL2:.*]] = complex.re %[[ARG]] : complex // CHECK: %[[IMAG2:.*]] = complex.im %[[ARG]] : complex -// CHECK: %[[IS_REAL_ZERO:.*]] = arith.cmpf oeq, %[[REAL2]], %[[ZERO]] : f32 -// CHECK: %[[IS_IMAG_ZERO:.*]] = arith.cmpf oeq, %[[IMAG2]], %[[ZERO]] : f32 -// CHECK: %[[IMAG_DIV_REAL:.*]] = arith.divf %[[IMAG2]], %[[REAL2]] fastmath : f32 -// CHECK: %[[IMAG_SQ:.*]] = arith.mulf %[[IMAG_DIV_REAL]], %[[IMAG_DIV_REAL]] fastmath : f32 -// CHECK: %[[IMAG_SQ_PLUS_ONE:.*]] = arith.addf %[[IMAG_SQ]], %[[ONE]] fastmath : f32 -// CHECK: %[[IMAG_SQRT:.*]] = math.sqrt %[[IMAG_SQ_PLUS_ONE]] fastmath : f32 -// CHECK: %[[REAL_ABS:.*]] = math.absf %[[REAL2]] fastmath : f32 -// CHECK: %[[ABS_IMAG:.*]] = arith.mulf %[[IMAG_SQRT]], %[[REAL_ABS]] fastmath : f32 -// CHECK: %[[REAL_DIV_IMAG:.*]] = arith.divf %[[REAL2]], %[[IMAG2]] fastmath : f32 -// CHECK: %[[REAL_SQ:.*]] = arith.mulf %[[REAL_DIV_IMAG]], %[[REAL_DIV_IMAG]] fastmath : f32 -// CHECK: %[[REAL_SQ_PLUS_ONE:.*]] = arith.addf %[[REAL_SQ]], %[[ONE]] fastmath : f32 -// CHECK: %[[REAL_SQRT:.*]] = math.sqrt %[[REAL_SQ_PLUS_ONE]] fastmath : f32 -// CHECK: %[[IMAG_ABS:.*]] = math.absf %[[IMAG2]] fastmath : f32 -// CHECK: %[[ABS_REAL:.*]] = arith.mulf %[[REAL_SQRT]], %[[IMAG_ABS]] fastmath : f32 -// CHECK: %[[REAL_GT_IMAG:.*]] = arith.cmpf ogt, %[[REAL2]], %[[IMAG2]] : f32 -// CHECK: %[[ABS1:.*]] = arith.select %[[REAL_GT_IMAG]], %[[ABS_IMAG]], %[[ABS_REAL]] : f32 -// CHECK: %[[ABS2:.*]] = arith.select %[[IS_IMAG_ZERO]], %[[REAL_ABS]], %[[ABS1]] : f32 -// CHECK: %[[NORM:.*]] = arith.select %[[IS_REAL_ZERO]], %[[IMAG_ABS]], %[[ABS2]] : f32 -// CHECK: %[[REAL_SIGN:.*]] = arith.divf %[[REAL]], %[[NORM]] fastmath : f32 -// CHECK: %[[IMAG_SIGN:.*]] = arith.divf %[[IMAG]], %[[NORM]] fastmath : f32 +// CHECK: %[[ABS_REAL:.*]] = math.absf %[[REAL2]] fastmath : f32 +// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG2]] fastmath : f32 +// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 +// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] fastmath : f32 +// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] fastmath : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] fastmath : f32 +// CHECK: %[[ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[ABS_OR_NAN]] : f32 +// CHECK: %[[REAL_SIGN:.*]] = arith.divf %[[REAL]], %[[ABS]] fastmath : f32 +// CHECK: %[[IMAG_SIGN:.*]] = arith.divf %[[IMAG]], %[[ABS]] fastmath : f32 // CHECK: %[[SIGN:.*]] = complex.create %[[REAL_SIGN]], %[[IMAG_SIGN]] : complex // CHECK: %[[RESULT:.*]] = arith.select %[[IS_ZERO]], %[[ARG]], %[[SIGN]] : complex // CHECK: return %[[RESULT]] : complex diff --git a/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir b/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir index 0f23e20167f49..2649d004a76ac 100644 --- a/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir +++ b/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir @@ -6,32 +6,22 @@ func.func @complex_abs(%arg: complex) -> f32 { %abs = complex.abs %arg: complex return %abs : f32 } -// CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32 // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 // CHECK: %[[REAL:.*]] = llvm.extractvalue %[[ARG]][0] : ![[C_TY]] // CHECK: %[[IMAG:.*]] = llvm.extractvalue %[[ARG]][1] : ![[C_TY]] -// CHECK: %[[REAL_IS_ZERO:.*]] = llvm.fcmp "oeq" %[[REAL]], %[[ZERO]] : f32 -// CHECK: %[[IMAG_IS_ZERO:.*]] = llvm.fcmp "oeq" %[[IMAG]], %[[ZERO]] : f32 -// CHECK: %[[IMAG_DIV_REAL:.*]] = llvm.fdiv %[[IMAG]], %[[REAL]] : f32 -// CHECK: %[[IMAG_SQ:.*]] = llvm.fmul %[[IMAG_DIV_REAL]], %[[IMAG_DIV_REAL]] : f32 -// CHECK: %[[IMAG_SQ_PLUS_ONE:.*]] = llvm.fadd %[[IMAG_SQ]], %[[ONE]] : f32 -// CHECK: %[[IMAG_SQRT:.*]] = llvm.intr.sqrt(%[[IMAG_SQ_PLUS_ONE]]) : (f32) -> f32 -// CHECK: %[[REAL_ABS:.*]] = llvm.intr.fabs(%[[REAL]]) : (f32) -> f32 -// CHECK: %[[ABS_IMAG:.*]] = llvm.fmul %[[IMAG_SQRT]], %[[REAL_ABS]] : f32 - -// CHECK: %[[REAL_DIV_IMAG:.*]] = llvm.fdiv %[[REAL]], %[[IMAG]] : f32 -// CHECK: %[[REAL_SQ:.*]] = llvm.fmul %[[REAL_DIV_IMAG]], %[[REAL_DIV_IMAG]] : f32 -// CHECK: %[[REAL_SQ_PLUS_ONE:.*]] = llvm.fadd %[[REAL_SQ]], %[[ONE]] : f32 -// CHECK: %[[REAL_SQRT:.*]] = llvm.intr.sqrt(%[[REAL_SQ_PLUS_ONE]]) : (f32) -> f32 -// CHECK: %[[IMAG_ABS:.*]] = llvm.intr.fabs(%[[IMAG]]) : (f32) -> f32 -// CHECK: %[[ABS_REAL:.*]] = llvm.fmul %[[REAL_SQRT]], %[[IMAG_ABS]] : f32 - -// CHECK: %[[REAL_GT_IMAG:.*]] = llvm.fcmp "ogt" %[[REAL]], %[[IMAG]] : f32 -// CHECK: %[[ABS1:.*]] = llvm.select %[[REAL_GT_IMAG]], %[[ABS_IMAG]], %[[ABS_REAL]] : i1, f32 -// CHECK: %[[ABS2:.*]] = llvm.select %[[IMAG_IS_ZERO]], %[[REAL_ABS]], %[[ABS1]] : i1, f32 -// CHECK: %[[NORM:.*]] = llvm.select %[[REAL_IS_ZERO]], %[[IMAG_ABS]], %[[ABS2]] : i1, f32 -// CHECK: llvm.return %[[NORM]] : f32 +// CHECK: %[[ABS_REAL:.*]] = llvm.intr.fabs(%[[REAL]]) : (f32) -> f32 +// CHECK: %[[ABS_IMAG:.*]] = llvm.intr.fabs(%[[IMAG]]) : (f32) -> f32 +// CHECK: %[[MAX:.*]] = llvm.intr.maximum(%[[ABS_REAL]], %[[ABS_IMAG]]) : (f32, f32) -> f32 +// CHECK: %[[MIN:.*]] = llvm.intr.minimum(%[[ABS_REAL]], %[[ABS_IMAG]]) : (f32, f32) -> f32 +// CHECK: %[[RATIO:.*]] = llvm.fdiv %[[MIN]], %[[MAX]] : f32 +// CHECK: %[[RATIO_SQ:.*]] = llvm.fmul %[[RATIO]], %[[RATIO]] : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = llvm.fadd %[[RATIO_SQ]], %[[ONE]] : f32 +// CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%[[RATIO_SQ_PLUS_ONE]]) : (f32) -> f32 +// CHECK: %[[RESULT:.*]] = llvm.fmul %[[MAX]], %[[SQRT]] : f32 +// CHECK: %[[IS_NAN:.*]] = llvm.fcmp "uno" %[[RESULT]], %11 : f32 +// CHECK: %[[RET:.*]] = llvm.select %[[IS_NAN]], %[[MIN]], %[[RESULT]] : i1, f32 +// CHECK: llvm.return %[[RET]] : f32 // CHECK-LABEL: llvm.func @complex_eq // CHECK-SAME: %[[LHS:.*]]: ![[C_TY:.*]], %[[RHS:.*]]: ![[C_TY:.*]]) From b1094776152b68efa05f69b7b833f9cbc0727efc Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Thu, 11 Apr 2024 19:10:53 +0800 Subject: [PATCH 136/886] [InstCombine] Infer nsw/nuw for trunc (#87910) This patch adds support for inferring trunc's nsw/nuw flags. --- clang/test/CodeGen/ms-intrinsics-other.c | 4 +- clang/test/CodeGen/ms-intrinsics.c | 4 +- clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 2 +- clang/test/Headers/__clang_hip_math.hip | 2 +- .../InstCombine/InstCombineCasts.cpp | 15 +++- .../RISCV/riscv-vsetvli-knownbits.ll | 4 +- .../RISCV/riscv-vsetvlimax-knownbits.ll | 4 +- llvm/test/Transforms/InstCombine/add.ll | 2 +- .../Transforms/InstCombine/binop-itofp.ll | 12 ++-- .../test/Transforms/InstCombine/bswap-fold.ll | 8 +-- llvm/test/Transforms/InstCombine/bswap.ll | 4 +- llvm/test/Transforms/InstCombine/cast.ll | 30 ++++---- .../Transforms/InstCombine/cmp-intrinsic.ll | 4 +- .../Transforms/InstCombine/compare-signs.ll | 4 +- llvm/test/Transforms/InstCombine/ctpop.ll | 2 +- .../extractelement-inseltpoison.ll | 10 +-- .../Transforms/InstCombine/extractelement.ll | 28 ++++---- llvm/test/Transforms/InstCombine/ffs-1.ll | 2 +- llvm/test/Transforms/InstCombine/fls.ll | 2 +- .../InstCombine/fold-log2-ceil-idiom.ll | 2 +- .../high-bit-signmask-with-trunc.ll | 20 +++--- .../Transforms/InstCombine/icmp-mul-zext.ll | 2 +- .../InstCombine/icmp-of-trunc-ext.ll | 20 +++--- .../InstCombine/icmp-topbitssame.ll | 8 +-- .../Transforms/InstCombine/insert-trunc.ll | 14 ++-- .../Transforms/InstCombine/insertelt-trunc.ll | 34 ++++----- .../test/Transforms/InstCombine/known-bits.ll | 2 +- .../Transforms/InstCombine/known-non-zero.ll | 4 +- .../InstCombine/known-phi-recurse.ll | 18 ++--- .../logical-select-inseltpoison.ll | 2 +- .../Transforms/InstCombine/logical-select.ll | 2 +- .../lshr-trunc-sext-to-ashr-sext.ll | 12 ++-- llvm/test/Transforms/InstCombine/lshr.ll | 22 +++--- .../merging-multiple-stores-into-successor.ll | 2 +- llvm/test/Transforms/InstCombine/narrow.ll | 2 +- .../Transforms/InstCombine/negated-bitmask.ll | 4 +- llvm/test/Transforms/InstCombine/pr34349.ll | 2 +- .../InstCombine/reduction-add-sext-zext-i1.ll | 4 +- llvm/test/Transforms/InstCombine/sadd_sat.ll | 38 +++++----- .../InstCombine/select-cmp-cttz-ctlz.ll | 28 ++++---- .../InstCombine/select-imm-canon.ll | 6 +- llvm/test/Transforms/InstCombine/select.ll | 2 +- .../InstCombine/sext-of-trunc-nsw.ll | 6 +- llvm/test/Transforms/InstCombine/sext.ll | 2 +- llvm/test/Transforms/InstCombine/shift-add.ll | 2 +- ...ciation-in-bittest-with-truncation-lshr.ll | 4 +- ...ount-reassociation-with-truncation-ashr.ll | 8 +-- ...ount-reassociation-with-truncation-lshr.ll | 8 +-- .../Transforms/InstCombine/shift-shift.ll | 6 +- llvm/test/Transforms/InstCombine/shift.ll | 6 +- .../test/Transforms/InstCombine/shl-demand.ll | 2 +- ...-test-via-right-shifting-all-other-bits.ll | 4 +- .../Transforms/InstCombine/trunc-demand.ll | 8 +-- .../InstCombine/trunc-inseltpoison.ll | 6 +- .../InstCombine/trunc-shift-trunc.ll | 4 +- llvm/test/Transforms/InstCombine/trunc.ll | 6 +- .../InstCombine/truncating-saturate.ll | 20 +++--- .../Transforms/InstCombine/vector-trunc.ll | 4 +- llvm/test/Transforms/InstCombine/xor-ashr.ll | 4 +- .../zext-ctlz-trunc-to-ctlz-add.ll | 4 +- .../AArch64/deterministic-type-shrinkage.ll | 16 ++--- .../LoopVectorize/AArch64/intrinsiccost.ll | 6 +- .../LoopVectorize/X86/intrinsiccost.ll | 8 +-- .../Transforms/LoopVectorize/reduction.ll | 4 +- .../PhaseOrdering/AArch64/quant_4x4.ll | 72 +++++++++---------- .../Transforms/SLPVectorizer/X86/pr46983.ll | 2 +- 66 files changed, 309 insertions(+), 296 deletions(-) diff --git a/clang/test/CodeGen/ms-intrinsics-other.c b/clang/test/CodeGen/ms-intrinsics-other.c index 36c40dddcbb4f..0e9dfe34b84cc 100644 --- a/clang/test/CodeGen/ms-intrinsics-other.c +++ b/clang/test/CodeGen/ms-intrinsics-other.c @@ -87,7 +87,7 @@ unsigned char test_BitScanForward64(unsigned LONG *Index, unsigned __int64 Mask) // CHECK: ret i8 [[RESULT]] // CHECK: [[ISNOTZERO_LABEL]]: // CHECK: [[INDEX:%[0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %Mask, i1 true) -// CHECK: [[TRUNC_INDEX:%[0-9]+]] = trunc i64 [[INDEX]] to i32 +// CHECK: [[TRUNC_INDEX:%[0-9]+]] = trunc nuw nsw i64 [[INDEX]] to i32 // CHECK: store i32 [[TRUNC_INDEX]], ptr %Index, align 4 // CHECK: br label %[[END_LABEL]] @@ -102,7 +102,7 @@ unsigned char test_BitScanReverse64(unsigned LONG *Index, unsigned __int64 Mask) // CHECK: ret i8 [[RESULT]] // CHECK: [[ISNOTZERO_LABEL]]: // CHECK: [[REVINDEX:%[0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %Mask, i1 true) -// CHECK: [[TRUNC_REVINDEX:%[0-9]+]] = trunc i64 [[REVINDEX]] to i32 +// CHECK: [[TRUNC_REVINDEX:%[0-9]+]] = trunc nuw nsw i64 [[REVINDEX]] to i32 // CHECK: [[INDEX:%[0-9]+]] = xor i32 [[TRUNC_REVINDEX]], 63 // CHECK: store i32 [[INDEX]], ptr %Index, align 4 // CHECK: br label %[[END_LABEL]] diff --git a/clang/test/CodeGen/ms-intrinsics.c b/clang/test/CodeGen/ms-intrinsics.c index 5bb003d1f91fc..6eabd725e2f7c 100644 --- a/clang/test/CodeGen/ms-intrinsics.c +++ b/clang/test/CodeGen/ms-intrinsics.c @@ -189,7 +189,7 @@ unsigned char test_BitScanForward64(unsigned long *Index, unsigned __int64 Mask) // CHECK-ARM-X64: ret i8 [[RESULT]] // CHECK-ARM-X64: [[ISNOTZERO_LABEL]]: // CHECK-ARM-X64: [[INDEX:%[0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %Mask, i1 true) -// CHECK-ARM-X64: [[TRUNC_INDEX:%[0-9]+]] = trunc i64 [[INDEX]] to i32 +// CHECK-ARM-X64: [[TRUNC_INDEX:%[0-9]+]] = trunc nuw nsw i64 [[INDEX]] to i32 // CHECK-ARM-X64: store i32 [[TRUNC_INDEX]], ptr %Index, align 4 // CHECK-ARM-X64: br label %[[END_LABEL]] @@ -204,7 +204,7 @@ unsigned char test_BitScanReverse64(unsigned long *Index, unsigned __int64 Mask) // CHECK-ARM-X64: ret i8 [[RESULT]] // CHECK-ARM-X64: [[ISNOTZERO_LABEL]]: // CHECK-ARM-X64: [[REVINDEX:%[0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %Mask, i1 true) -// CHECK-ARM-X64: [[TRUNC_REVINDEX:%[0-9]+]] = trunc i64 [[REVINDEX]] to i32 +// CHECK-ARM-X64: [[TRUNC_REVINDEX:%[0-9]+]] = trunc nuw nsw i64 [[REVINDEX]] to i32 // CHECK-ARM-X64: [[INDEX:%[0-9]+]] = xor i32 [[TRUNC_REVINDEX]], 63 // CHECK-ARM-X64: store i32 [[INDEX]], ptr %Index, align 4 // CHECK-ARM-X64: br label %[[END_LABEL]] diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 8a4533633706b..bdca97c887867 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -528,7 +528,7 @@ void test_read_exec_lo(global uint* out) { // CHECK-LABEL: @test_read_exec_hi( // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) // CHECK: lshr i64 [[A:%.*]], 32 -// CHECK: trunc i64 [[B:%.*]] to i32 +// CHECK: trunc nuw i64 [[B:%.*]] to i32 void test_read_exec_hi(global uint* out) { *out = __builtin_amdgcn_read_exec_hi(); } diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index 37099de74fb8e..2e5f521a5feae 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -3703,7 +3703,7 @@ extern "C" __device__ BOOL_TYPE test___signbitf(float x) { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast double [[X:%.*]] to i64 // CHECK-NEXT: [[DOTLOBIT:%.*]] = lshr i64 [[TMP0]], 63 -// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[DOTLOBIT]] to i32 +// CHECK-NEXT: [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32 // CHECK-NEXT: ret i32 [[CONV]] // extern "C" __device__ BOOL_TYPE test___signbit(double x) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 0652a8ba80b3f..437e9b92c7032 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -897,7 +897,20 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { } } - return nullptr; + bool Changed = false; + if (!Trunc.hasNoSignedWrap() && + ComputeMaxSignificantBits(Src, /*Depth=*/0, &Trunc) <= DestWidth) { + Trunc.setHasNoSignedWrap(true); + Changed = true; + } + if (!Trunc.hasNoUnsignedWrap() && + MaskedValueIsZero(Src, APInt::getBitsSetFrom(SrcWidth, DestWidth), + /*Depth=*/0, &Trunc)) { + Trunc.setHasNoUnsignedWrap(true); + Changed = true; + } + + return Changed ? &Trunc : nullptr; } Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, diff --git a/llvm/test/Transforms/InstCombine/RISCV/riscv-vsetvli-knownbits.ll b/llvm/test/Transforms/InstCombine/RISCV/riscv-vsetvli-knownbits.ll index 1afae6565fe26..6e0acfd685116 100644 --- a/llvm/test/Transforms/InstCombine/RISCV/riscv-vsetvli-knownbits.ll +++ b/llvm/test/Transforms/InstCombine/RISCV/riscv-vsetvli-knownbits.ll @@ -45,7 +45,7 @@ entry: define signext i32 @vsetvl_sext() nounwind #0 { ; CHECK-LABEL: @vsetvl_sext( ; CHECK-NEXT: [[A:%.*]] = call i64 @llvm.riscv.vsetvli.i64(i64 1, i64 1, i64 1) -; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A]] to i32 +; CHECK-NEXT: [[B:%.*]] = trunc nuw nsw i64 [[A]] to i32 ; CHECK-NEXT: ret i32 [[B]] ; %a = call i64 @llvm.riscv.vsetvli(i64 1, i64 1, i64 1) @@ -56,7 +56,7 @@ define signext i32 @vsetvl_sext() nounwind #0 { define zeroext i32 @vsetvl_zext() nounwind #0 { ; CHECK-LABEL: @vsetvl_zext( ; CHECK-NEXT: [[A:%.*]] = call i64 @llvm.riscv.vsetvli.i64(i64 1, i64 1, i64 1) -; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A]] to i32 +; CHECK-NEXT: [[B:%.*]] = trunc nuw nsw i64 [[A]] to i32 ; CHECK-NEXT: ret i32 [[B]] ; %a = call i64 @llvm.riscv.vsetvli(i64 1, i64 1, i64 1) diff --git a/llvm/test/Transforms/InstCombine/RISCV/riscv-vsetvlimax-knownbits.ll b/llvm/test/Transforms/InstCombine/RISCV/riscv-vsetvlimax-knownbits.ll index 093ba75e87b5a..811a29c7e5624 100644 --- a/llvm/test/Transforms/InstCombine/RISCV/riscv-vsetvlimax-knownbits.ll +++ b/llvm/test/Transforms/InstCombine/RISCV/riscv-vsetvlimax-knownbits.ll @@ -45,7 +45,7 @@ entry: define signext i32 @vsetvlmax_sext() nounwind #0 { ; CHECK-LABEL: @vsetvlmax_sext( ; CHECK-NEXT: [[A:%.*]] = call i64 @llvm.riscv.vsetvlimax.i64(i64 1, i64 1) -; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A]] to i32 +; CHECK-NEXT: [[B:%.*]] = trunc nuw nsw i64 [[A]] to i32 ; CHECK-NEXT: ret i32 [[B]] ; %a = call i64 @llvm.riscv.vsetvlimax(i64 1, i64 1) @@ -56,7 +56,7 @@ define signext i32 @vsetvlmax_sext() nounwind #0 { define zeroext i32 @vsetvlmax_zext() nounwind #0 { ; CHECK-LABEL: @vsetvlmax_zext( ; CHECK-NEXT: [[A:%.*]] = call i64 @llvm.riscv.vsetvlimax.i64(i64 1, i64 1) -; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A]] to i32 +; CHECK-NEXT: [[B:%.*]] = trunc nuw nsw i64 [[A]] to i32 ; CHECK-NEXT: ret i32 [[B]] ; %a = call i64 @llvm.riscv.vsetvlimax(i64 1, i64 1) diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index ec3aca26514ca..23eee8547597e 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -2375,7 +2375,7 @@ define { i64, i64 } @PR57576(i64 noundef %x, i64 noundef %y, i64 noundef %z, i64 ; CHECK-NEXT: [[SUB:%.*]] = sub i128 [[XY]], [[ZZ]] ; CHECK-NEXT: [[T:%.*]] = trunc i128 [[SUB]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i128 [[SUB]], 64 -; CHECK-NEXT: [[DOTTR:%.*]] = trunc i128 [[TMP1]] to i64 +; CHECK-NEXT: [[DOTTR:%.*]] = trunc nuw i128 [[TMP1]] to i64 ; CHECK-NEXT: [[DOTNARROW:%.*]] = sub i64 [[DOTTR]], [[W:%.*]] ; CHECK-NEXT: [[R1:%.*]] = insertvalue { i64, i64 } poison, i64 [[T]], 0 ; CHECK-NEXT: [[R2:%.*]] = insertvalue { i64, i64 } [[R1]], i64 [[DOTNARROW]], 1 diff --git a/llvm/test/Transforms/InstCombine/binop-itofp.ll b/llvm/test/Transforms/InstCombine/binop-itofp.ll index cd9ec1e59203f..d72a54e8babc9 100644 --- a/llvm/test/Transforms/InstCombine/binop-itofp.ll +++ b/llvm/test/Transforms/InstCombine/binop-itofp.ll @@ -1010,7 +1010,7 @@ define float @test_ui_add_with_signed_constant(i32 %shr.i) { define float @missed_nonzero_check_on_constant_for_si_fmul(i1 %c, i1 %.b, ptr %g_2345) { ; CHECK-LABEL: @missed_nonzero_check_on_constant_for_si_fmul( ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264 -; CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc nuw i32 [[SEL]] to i16 ; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp i16 [[CONV_I]] to float ; CHECK-NEXT: [[MUL3_I_I:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[CONV1_I]]) ; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4 @@ -1027,7 +1027,7 @@ define float @missed_nonzero_check_on_constant_for_si_fmul(i1 %c, i1 %.b, ptr %g define <2 x float> @missed_nonzero_check_on_constant_for_si_fmul_vec(i1 %c, i1 %.b, ptr %g_2345) { ; CHECK-LABEL: @missed_nonzero_check_on_constant_for_si_fmul_vec( ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264 -; CHECK-NEXT: [[CONV_I_S:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: [[CONV_I_S:%.*]] = trunc nuw i32 [[SEL]] to i16 ; CHECK-NEXT: [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0 ; CHECK-NEXT: [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float> @@ -1048,7 +1048,7 @@ define <2 x float> @missed_nonzero_check_on_constant_for_si_fmul_vec(i1 %c, i1 % define float @negzero_check_on_constant_for_si_fmul(i1 %c, i1 %.b, ptr %g_2345) { ; CHECK-LABEL: @negzero_check_on_constant_for_si_fmul( ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264 -; CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc nuw i32 [[SEL]] to i16 ; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp i16 [[CONV_I]] to float ; CHECK-NEXT: [[TMP1:%.*]] = fneg float [[CONV1_I]] ; CHECK-NEXT: [[MUL3_I_I:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP1]]) @@ -1066,7 +1066,7 @@ define float @negzero_check_on_constant_for_si_fmul(i1 %c, i1 %.b, ptr %g_2345) define <2 x float> @nonzero_check_on_constant_for_si_fmul_vec_w_undef(i1 %c, i1 %.b, ptr %g_2345) { ; CHECK-LABEL: @nonzero_check_on_constant_for_si_fmul_vec_w_undef( ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264 -; CHECK-NEXT: [[CONV_I_S:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: [[CONV_I_S:%.*]] = trunc nuw i32 [[SEL]] to i16 ; CHECK-NEXT: [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0 ; CHECK-NEXT: [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float> @@ -1087,7 +1087,7 @@ define <2 x float> @nonzero_check_on_constant_for_si_fmul_vec_w_undef(i1 %c, i1 define <2 x float> @nonzero_check_on_constant_for_si_fmul_nz_vec_w_undef(i1 %c, i1 %.b, ptr %g_2345) { ; CHECK-LABEL: @nonzero_check_on_constant_for_si_fmul_nz_vec_w_undef( ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264 -; CHECK-NEXT: [[CONV_I_S:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: [[CONV_I_S:%.*]] = trunc nuw i32 [[SEL]] to i16 ; CHECK-NEXT: [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0 ; CHECK-NEXT: [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float> @@ -1108,7 +1108,7 @@ define <2 x float> @nonzero_check_on_constant_for_si_fmul_nz_vec_w_undef(i1 %c, define <2 x float> @nonzero_check_on_constant_for_si_fmul_negz_vec_w_undef(i1 %c, i1 %.b, ptr %g_2345) { ; CHECK-LABEL: @nonzero_check_on_constant_for_si_fmul_negz_vec_w_undef( ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264 -; CHECK-NEXT: [[CONV_I_S:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: [[CONV_I_S:%.*]] = trunc nuw i32 [[SEL]] to i16 ; CHECK-NEXT: [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0 ; CHECK-NEXT: [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float> diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll index 05933d37057cc..19522168beaf5 100644 --- a/llvm/test/Transforms/InstCombine/bswap-fold.ll +++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll @@ -211,7 +211,7 @@ define i64 @variable_shl_not_masked_enough_i64(i64 %x, i64 %n) { define i16 @test7(i32 %A) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[A:%.*]], 16 -; CHECK-NEXT: [[D:%.*]] = trunc i32 [[TMP1]] to i16 +; CHECK-NEXT: [[D:%.*]] = trunc nuw i32 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[D]] ; %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind @@ -223,7 +223,7 @@ define i16 @test7(i32 %A) { define <2 x i16> @test7_vector(<2 x i32> %A) { ; CHECK-LABEL: @test7_vector( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16> +; CHECK-NEXT: [[D:%.*]] = trunc nuw <2 x i32> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[D]] ; %B = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %A) nounwind @@ -235,7 +235,7 @@ define <2 x i16> @test7_vector(<2 x i32> %A) { define i16 @test8(i64 %A) { ; CHECK-LABEL: @test8( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A:%.*]], 48 -; CHECK-NEXT: [[D:%.*]] = trunc i64 [[TMP1]] to i16 +; CHECK-NEXT: [[D:%.*]] = trunc nuw i64 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[D]] ; %B = tail call i64 @llvm.bswap.i64(i64 %A) nounwind @@ -247,7 +247,7 @@ define i16 @test8(i64 %A) { define <2 x i16> @test8_vector(<2 x i64> %A) { ; CHECK-LABEL: @test8_vector( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i16> +; CHECK-NEXT: [[D:%.*]] = trunc nuw <2 x i64> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[D]] ; %B = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %A) nounwind diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index 21eb170b8c58d..d42583bb5699b 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -43,7 +43,7 @@ define i16 @test1_trunc(i32 %i) { ; CHECK-NEXT: [[T3:%.*]] = lshr i32 [[I]], 8 ; CHECK-NEXT: [[T4:%.*]] = and i32 [[T3]], 65280 ; CHECK-NEXT: [[T5:%.*]] = or disjoint i32 [[T1]], [[T4]] -; CHECK-NEXT: [[T13:%.*]] = trunc i32 [[T5]] to i16 +; CHECK-NEXT: [[T13:%.*]] = trunc nuw i32 [[T5]] to i16 ; CHECK-NEXT: ret i16 [[T13]] ; %t1 = lshr i32 %i, 24 @@ -61,7 +61,7 @@ define i16 @test1_trunc_extra_use(i32 %i) { ; CHECK-NEXT: [[T4:%.*]] = and i32 [[T3]], 65280 ; CHECK-NEXT: [[T5:%.*]] = or disjoint i32 [[T1]], [[T4]] ; CHECK-NEXT: call void @extra_use(i32 [[T5]]) -; CHECK-NEXT: [[T13:%.*]] = trunc i32 [[T5]] to i16 +; CHECK-NEXT: [[T13:%.*]] = trunc nuw i32 [[T5]] to i16 ; CHECK-NEXT: ret i16 [[T13]] ; %t1 = lshr i32 %i, 24 diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index 97554e9462043..d9c93ba277295 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -1471,7 +1471,7 @@ define i64 @test91(i64 %A) { ; ALL-LABEL: @test91( ; ALL-NEXT: [[B:%.*]] = sext i64 [[A:%.*]] to i96 ; ALL-NEXT: [[C:%.*]] = lshr i96 [[B]], 48 -; ALL-NEXT: [[D:%.*]] = trunc i96 [[C]] to i64 +; ALL-NEXT: [[D:%.*]] = trunc nuw nsw i96 [[C]] to i64 ; ALL-NEXT: ret i64 [[D]] ; %B = sext i64 %A to i96 @@ -1676,7 +1676,7 @@ define i8 @trunc_lshr_overshift_sext_uses3(i8 %A) { define i8 @trunc_lshr_sext_wide_input(i16 %A) { ; ALL-LABEL: @trunc_lshr_sext_wide_input( ; ALL-NEXT: [[TMP1:%.*]] = ashr i16 [[A:%.*]], 9 -; ALL-NEXT: [[D:%.*]] = trunc i16 [[TMP1]] to i8 +; ALL-NEXT: [[D:%.*]] = trunc nsw i16 [[TMP1]] to i8 ; ALL-NEXT: ret i8 [[D]] ; %B = sext i16 %A to i32 @@ -1688,7 +1688,7 @@ define i8 @trunc_lshr_sext_wide_input(i16 %A) { define i8 @trunc_lshr_sext_wide_input_exact(i16 %A) { ; ALL-LABEL: @trunc_lshr_sext_wide_input_exact( ; ALL-NEXT: [[TMP1:%.*]] = ashr exact i16 [[A:%.*]], 9 -; ALL-NEXT: [[D:%.*]] = trunc i16 [[TMP1]] to i8 +; ALL-NEXT: [[D:%.*]] = trunc nsw i16 [[TMP1]] to i8 ; ALL-NEXT: ret i8 [[D]] ; %B = sext i16 %A to i32 @@ -1702,7 +1702,7 @@ define <2 x i8> @trunc_lshr_sext_wide_input_uses1(<2 x i16> %A) { ; ALL-NEXT: [[B:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) ; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i16> [[A]], -; ALL-NEXT: [[D:%.*]] = trunc <2 x i16> [[TMP1]] to <2 x i8> +; ALL-NEXT: [[D:%.*]] = trunc nsw <2 x i16> [[TMP1]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i16> %A to <2 x i32> @@ -1747,7 +1747,7 @@ define <2 x i8> @trunc_lshr_sext_wide_input_uses3(<2 x i16> %A) { define <2 x i8> @trunc_lshr_overshift_wide_input_sext(<2 x i16> %A) { ; ALL-LABEL: @trunc_lshr_overshift_wide_input_sext( ; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i16> [[A:%.*]], -; ALL-NEXT: [[D:%.*]] = trunc <2 x i16> [[TMP1]] to <2 x i8> +; ALL-NEXT: [[D:%.*]] = trunc nsw <2 x i16> [[TMP1]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i16> %A to <2 x i32> @@ -1761,7 +1761,7 @@ define i8 @trunc_lshr_overshift_sext_wide_input_uses1(i16 %A) { ; ALL-NEXT: [[B:%.*]] = sext i16 [[A:%.*]] to i32 ; ALL-NEXT: call void @use_i32(i32 [[B]]) ; ALL-NEXT: [[TMP1:%.*]] = ashr i16 [[A]], 15 -; ALL-NEXT: [[D:%.*]] = trunc i16 [[TMP1]] to i8 +; ALL-NEXT: [[D:%.*]] = trunc nsw i16 [[TMP1]] to i8 ; ALL-NEXT: ret i8 [[D]] ; %B = sext i16 %A to i32 @@ -1776,7 +1776,7 @@ define <2 x i8> @trunc_lshr_overshift_sext_wide_input_uses2(<2 x i16> %A) { ; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i16> [[A:%.*]], ; ALL-NEXT: [[C:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) -; ALL-NEXT: [[D:%.*]] = trunc <2 x i16> [[TMP1]] to <2 x i8> +; ALL-NEXT: [[D:%.*]] = trunc nsw <2 x i16> [[TMP1]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i16> %A to <2 x i32> @@ -1925,7 +1925,7 @@ define <2 x i8> @trunc_lshr_overshift2_sext(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_overshift2_sext( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> ; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], -; ALL-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> +; ALL-NEXT: [[D:%.*]] = trunc nuw nsw <2 x i32> [[C]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i8> %A to <2 x i32> @@ -1939,7 +1939,7 @@ define i8 @trunc_lshr_overshift2_sext_uses1(i8 %A) { ; ALL-NEXT: [[B:%.*]] = sext i8 [[A:%.*]] to i32 ; ALL-NEXT: call void @use_i32(i32 [[B]]) ; ALL-NEXT: [[C:%.*]] = lshr i32 [[B]], 25 -; ALL-NEXT: [[D:%.*]] = trunc i32 [[C]] to i8 +; ALL-NEXT: [[D:%.*]] = trunc nuw nsw i32 [[C]] to i8 ; ALL-NEXT: ret i8 [[D]] ; %B = sext i8 %A to i32 @@ -1954,7 +1954,7 @@ define <2 x i8> @trunc_lshr_overshift2_sext_uses2(<2 x i8> %A) { ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> ; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) -; ALL-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> +; ALL-NEXT: [[D:%.*]] = trunc nuw nsw <2 x i32> [[C]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i8> %A to <2 x i32> @@ -1970,7 +1970,7 @@ define i8 @trunc_lshr_overshift2_sext_uses3(i8 %A) { ; ALL-NEXT: call void @use_i32(i32 [[B]]) ; ALL-NEXT: [[C:%.*]] = lshr i32 [[B]], 25 ; ALL-NEXT: call void @use_i32(i32 [[C]]) -; ALL-NEXT: [[D:%.*]] = trunc i32 [[C]] to i8 +; ALL-NEXT: [[D:%.*]] = trunc nuw nsw i32 [[C]] to i8 ; ALL-NEXT: ret i8 [[D]] ; %B = sext i8 %A to i32 @@ -2018,7 +2018,7 @@ define <2 x i8> @trunc_lshr_zext_uniform_undef(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_zext_uniform_undef( ; ALL-NEXT: [[B:%.*]] = zext <2 x i8> [[A:%.*]] to <2 x i32> ; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], -; ALL-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> +; ALL-NEXT: [[D:%.*]] = trunc nuw <2 x i32> [[C]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = zext <2 x i8> %A to <2 x i32> @@ -2042,7 +2042,7 @@ define <3 x i8> @trunc_lshr_zext_nonuniform_undef(<3 x i8> %A) { ; ALL-LABEL: @trunc_lshr_zext_nonuniform_undef( ; ALL-NEXT: [[B:%.*]] = zext <3 x i8> [[A:%.*]] to <3 x i32> ; ALL-NEXT: [[C:%.*]] = lshr <3 x i32> [[B]], -; ALL-NEXT: [[D:%.*]] = trunc <3 x i32> [[C]] to <3 x i8> +; ALL-NEXT: [[D:%.*]] = trunc nuw <3 x i32> [[C]] to <3 x i8> ; ALL-NEXT: ret <3 x i8> [[D]] ; %B = zext <3 x i8> %A to <3 x i32> @@ -2095,7 +2095,7 @@ define i4 @pr33078_3(i8 %A) { ; ALL-LABEL: @pr33078_3( ; ALL-NEXT: [[B:%.*]] = sext i8 [[A:%.*]] to i16 ; ALL-NEXT: [[C:%.*]] = lshr i16 [[B]], 12 -; ALL-NEXT: [[D:%.*]] = trunc i16 [[C]] to i4 +; ALL-NEXT: [[D:%.*]] = trunc nuw i16 [[C]] to i4 ; ALL-NEXT: ret i4 [[D]] ; %B = sext i8 %A to i16 @@ -2109,7 +2109,7 @@ define i8 @pr33078_4(i3 %x) { ; ALL-LABEL: @pr33078_4( ; ALL-NEXT: [[B:%.*]] = sext i3 [[X:%.*]] to i16 ; ALL-NEXT: [[C:%.*]] = lshr i16 [[B]], 13 -; ALL-NEXT: [[D:%.*]] = trunc i16 [[C]] to i8 +; ALL-NEXT: [[D:%.*]] = trunc nuw nsw i16 [[C]] to i8 ; ALL-NEXT: ret i8 [[D]] ; %B = sext i3 %x to i16 diff --git a/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll b/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll index 5955650167c21..66cbb2636cbc2 100644 --- a/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll @@ -618,7 +618,7 @@ define i1 @trunc_cttz_false_ult_other_i32_i6(i32 %x) { define i1 @trunc_cttz_false_ult_other_i32_i6_extra_use(i32 %x) { ; CHECK-LABEL: @trunc_cttz_false_ult_other_i32_i6_extra_use( ; CHECK-NEXT: [[TZ:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range [[RNG0]] -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[TZ]] to i6 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i32 [[TZ]] to i6 ; CHECK-NEXT: call void @use6(i6 [[TRUNC]]) ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i6 [[TRUNC]], 7 ; CHECK-NEXT: ret i1 [[CMP]] @@ -720,7 +720,7 @@ define i1 @trunc_ctlz_false_ugt_other_i32_i6(i32 %x) { define i1 @trunc_ctlz_false_ugt_other_i32_i6_extra_use(i32 %x) { ; CHECK-LABEL: @trunc_ctlz_false_ugt_other_i32_i6_extra_use( ; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range [[RNG0]] -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[LZ]] to i6 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i32 [[LZ]] to i6 ; CHECK-NEXT: call void @use6(i6 [[TRUNC]]) ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i6 [[TRUNC]], 4 ; CHECK-NEXT: ret i1 [[CMP]] diff --git a/llvm/test/Transforms/InstCombine/compare-signs.ll b/llvm/test/Transforms/InstCombine/compare-signs.ll index d7aa710e1ef03..3730d46d5f0f4 100644 --- a/llvm/test/Transforms/InstCombine/compare-signs.ll +++ b/llvm/test/Transforms/InstCombine/compare-signs.ll @@ -223,7 +223,7 @@ define <2 x i1> @shift_trunc_signbit_test_vec_uses(<2 x i17> %x, ptr %p1, ptr %p ; CHECK-LABEL: @shift_trunc_signbit_test_vec_uses( ; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i17> [[X:%.*]], ; CHECK-NEXT: store <2 x i17> [[SH]], ptr [[P1:%.*]], align 8 -; CHECK-NEXT: [[TR:%.*]] = trunc <2 x i17> [[SH]] to <2 x i13> +; CHECK-NEXT: [[TR:%.*]] = trunc nuw <2 x i17> [[SH]] to <2 x i13> ; CHECK-NEXT: store <2 x i13> [[TR]], ptr [[P2:%.*]], align 4 ; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i17> [[X]], ; CHECK-NEXT: ret <2 x i1> [[R]] @@ -255,7 +255,7 @@ define i1 @shift_trunc_wrong_shift(i32 %x) { define i1 @shift_trunc_wrong_cmp(i32 %x) { ; CHECK-LABEL: @shift_trunc_wrong_cmp( ; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 24 -; CHECK-NEXT: [[TR:%.*]] = trunc i32 [[SH]] to i8 +; CHECK-NEXT: [[TR:%.*]] = trunc nuw i32 [[SH]] to i8 ; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[TR]], 1 ; CHECK-NEXT: ret i1 [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll index dcea5fa87479e..27194724b7d83 100644 --- a/llvm/test/Transforms/InstCombine/ctpop.ll +++ b/llvm/test/Transforms/InstCombine/ctpop.ll @@ -397,7 +397,7 @@ define i32 @parity_xor_trunc(i64 %arg, i64 %arg1) { ; CHECK-LABEL: @parity_xor_trunc( ; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[ARG1:%.*]], [[ARG:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]), !range [[RNG5:![0-9]+]] -; CHECK-NEXT: [[I4:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[I4:%.*]] = trunc nuw nsw i64 [[TMP2]] to i32 ; CHECK-NEXT: [[I5:%.*]] = and i32 [[I4]], 1 ; CHECK-NEXT: ret i32 [[I5]] ; diff --git a/llvm/test/Transforms/InstCombine/extractelement-inseltpoison.ll b/llvm/test/Transforms/InstCombine/extractelement-inseltpoison.ll index 877aa2e523a31..57e81d2da8989 100644 --- a/llvm/test/Transforms/InstCombine/extractelement-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/extractelement-inseltpoison.ll @@ -48,7 +48,7 @@ define i32 @bitcasted_inselt_wide_source_zero_elt(i64 %x) { ; ; BE-LABEL: @bitcasted_inselt_wide_source_zero_elt( ; BE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32 -; BE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32 +; BE-NEXT: [[R:%.*]] = trunc nuw i64 [[TMP1]] to i32 ; BE-NEXT: ret i32 [[R]] ; %i = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 @@ -64,7 +64,7 @@ define i16 @bitcasted_inselt_wide_source_modulo_elt(i64 %x) { ; ; BE-LABEL: @bitcasted_inselt_wide_source_modulo_elt( ; BE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 48 -; BE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i16 +; BE-NEXT: [[R:%.*]] = trunc nuw i64 [[TMP1]] to i16 ; BE-NEXT: ret i16 [[R]] ; %i = insertelement <2 x i64> poison, i64 %x, i32 1 @@ -76,7 +76,7 @@ define i16 @bitcasted_inselt_wide_source_modulo_elt(i64 %x) { define i32 @bitcasted_inselt_wide_source_not_modulo_elt(i64 %x) { ; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt( ; LE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32 -; LE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32 +; LE-NEXT: [[R:%.*]] = trunc nuw i64 [[TMP1]] to i32 ; LE-NEXT: ret i32 [[R]] ; ; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt( @@ -166,7 +166,7 @@ define i8 @bitcasted_inselt_wide_source_uses(i32 %x) { define float @bitcasted_inselt_to_FP(i64 %x) { ; LE-LABEL: @bitcasted_inselt_to_FP( ; LE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32 -; LE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; LE-NEXT: [[TMP2:%.*]] = trunc nuw i64 [[TMP1]] to i32 ; LE-NEXT: [[R:%.*]] = bitcast i32 [[TMP2]] to float ; LE-NEXT: ret float [[R]] ; @@ -218,7 +218,7 @@ define i32 @bitcasted_inselt_from_FP(double %x) { ; LE-LABEL: @bitcasted_inselt_from_FP( ; LE-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64 ; LE-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 32 -; LE-NEXT: [[R:%.*]] = trunc i64 [[TMP2]] to i32 +; LE-NEXT: [[R:%.*]] = trunc nuw i64 [[TMP2]] to i32 ; LE-NEXT: ret i32 [[R]] ; ; BE-LABEL: @bitcasted_inselt_from_FP( diff --git a/llvm/test/Transforms/InstCombine/extractelement.ll b/llvm/test/Transforms/InstCombine/extractelement.ll index bc5dd060a540a..28a4702559c46 100644 --- a/llvm/test/Transforms/InstCombine/extractelement.ll +++ b/llvm/test/Transforms/InstCombine/extractelement.ll @@ -50,7 +50,7 @@ define i32 @bitcasted_inselt_wide_source_zero_elt(i64 %x) { ; ; ANYBE-LABEL: @bitcasted_inselt_wide_source_zero_elt( ; ANYBE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32 -; ANYBE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32 +; ANYBE-NEXT: [[R:%.*]] = trunc nuw i64 [[TMP1]] to i32 ; ANYBE-NEXT: ret i32 [[R]] ; %i = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 @@ -66,7 +66,7 @@ define i16 @bitcasted_inselt_wide_source_modulo_elt(i64 %x) { ; ; ANYBE-LABEL: @bitcasted_inselt_wide_source_modulo_elt( ; ANYBE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 48 -; ANYBE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i16 +; ANYBE-NEXT: [[R:%.*]] = trunc nuw i64 [[TMP1]] to i16 ; ANYBE-NEXT: ret i16 [[R]] ; %i = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -78,7 +78,7 @@ define i16 @bitcasted_inselt_wide_source_modulo_elt(i64 %x) { define i32 @bitcasted_inselt_wide_source_not_modulo_elt(i64 %x) { ; ANYLE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt( ; ANYLE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32 -; ANYLE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32 +; ANYLE-NEXT: [[R:%.*]] = trunc nuw i64 [[TMP1]] to i32 ; ANYLE-NEXT: ret i32 [[R]] ; ; ANYBE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt( @@ -168,7 +168,7 @@ define i8 @bitcasted_inselt_wide_source_uses(i32 %x) { define float @bitcasted_inselt_to_FP(i64 %x) { ; ANYLE-LABEL: @bitcasted_inselt_to_FP( ; ANYLE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32 -; ANYLE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; ANYLE-NEXT: [[TMP2:%.*]] = trunc nuw i64 [[TMP1]] to i32 ; ANYLE-NEXT: [[R:%.*]] = bitcast i32 [[TMP2]] to float ; ANYLE-NEXT: ret float [[R]] ; @@ -220,7 +220,7 @@ define i32 @bitcasted_inselt_from_FP(double %x) { ; ANYLE-LABEL: @bitcasted_inselt_from_FP( ; ANYLE-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64 ; ANYLE-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 32 -; ANYLE-NEXT: [[R:%.*]] = trunc i64 [[TMP2]] to i32 +; ANYLE-NEXT: [[R:%.*]] = trunc nuw i64 [[TMP2]] to i32 ; ANYLE-NEXT: ret i32 [[R]] ; ; ANYBE-LABEL: @bitcasted_inselt_from_FP( @@ -341,7 +341,7 @@ define i8 @bitcast_scalar_supported_type_index0(i32 %x) { ; ; ANYBE-LABEL: @bitcast_scalar_supported_type_index0( ; ANYBE-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i32 [[X:%.*]], 24 -; ANYBE-NEXT: [[R:%.*]] = trunc i32 [[EXTELT_OFFSET]] to i8 +; ANYBE-NEXT: [[R:%.*]] = trunc nuw i32 [[EXTELT_OFFSET]] to i8 ; ANYBE-NEXT: ret i8 [[R]] ; %v = bitcast i32 %x to <4 x i8> @@ -443,7 +443,7 @@ define half @bitcast_fp16vec_index0(i32 %x) { ; ; ANYBE-LABEL: @bitcast_fp16vec_index0( ; ANYBE-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i32 [[X:%.*]], 16 -; ANYBE-NEXT: [[TMP1:%.*]] = trunc i32 [[EXTELT_OFFSET]] to i16 +; ANYBE-NEXT: [[TMP1:%.*]] = trunc nuw i32 [[EXTELT_OFFSET]] to i16 ; ANYBE-NEXT: [[R:%.*]] = bitcast i16 [[TMP1]] to half ; ANYBE-NEXT: ret half [[R]] ; @@ -455,7 +455,7 @@ define half @bitcast_fp16vec_index0(i32 %x) { define half @bitcast_fp16vec_index1(i32 %x) { ; ANYLE-LABEL: @bitcast_fp16vec_index1( ; ANYLE-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i32 [[X:%.*]], 16 -; ANYLE-NEXT: [[TMP1:%.*]] = trunc i32 [[EXTELT_OFFSET]] to i16 +; ANYLE-NEXT: [[TMP1:%.*]] = trunc nuw i32 [[EXTELT_OFFSET]] to i16 ; ANYLE-NEXT: [[R:%.*]] = bitcast i16 [[TMP1]] to half ; ANYLE-NEXT: ret half [[R]] ; @@ -477,7 +477,7 @@ define bfloat @bitcast_bfp16vec_index0(i32 %x) { ; ; ANYBE-LABEL: @bitcast_bfp16vec_index0( ; ANYBE-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i32 [[X:%.*]], 16 -; ANYBE-NEXT: [[TMP1:%.*]] = trunc i32 [[EXTELT_OFFSET]] to i16 +; ANYBE-NEXT: [[TMP1:%.*]] = trunc nuw i32 [[EXTELT_OFFSET]] to i16 ; ANYBE-NEXT: [[R:%.*]] = bitcast i16 [[TMP1]] to bfloat ; ANYBE-NEXT: ret bfloat [[R]] ; @@ -489,7 +489,7 @@ define bfloat @bitcast_bfp16vec_index0(i32 %x) { define bfloat @bitcast_bfp16vec_index1(i32 %x) { ; ANYLE-LABEL: @bitcast_bfp16vec_index1( ; ANYLE-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i32 [[X:%.*]], 16 -; ANYLE-NEXT: [[TMP1:%.*]] = trunc i32 [[EXTELT_OFFSET]] to i16 +; ANYLE-NEXT: [[TMP1:%.*]] = trunc nuw i32 [[EXTELT_OFFSET]] to i16 ; ANYLE-NEXT: [[R:%.*]] = bitcast i16 [[TMP1]] to bfloat ; ANYLE-NEXT: ret bfloat [[R]] ; @@ -511,7 +511,7 @@ define float @bitcast_fp32vec_index0(i64 %x) { ; ; BE64-LABEL: @bitcast_fp32vec_index0( ; BE64-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i64 [[X:%.*]], 32 -; BE64-NEXT: [[TMP1:%.*]] = trunc i64 [[EXTELT_OFFSET]] to i32 +; BE64-NEXT: [[TMP1:%.*]] = trunc nuw i64 [[EXTELT_OFFSET]] to i32 ; BE64-NEXT: [[R:%.*]] = bitcast i32 [[TMP1]] to float ; BE64-NEXT: ret float [[R]] ; @@ -528,7 +528,7 @@ define float @bitcast_fp32vec_index0(i64 %x) { define float @bitcast_fp32vec_index1(i64 %x) { ; LE64-LABEL: @bitcast_fp32vec_index1( ; LE64-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i64 [[X:%.*]], 32 -; LE64-NEXT: [[TMP1:%.*]] = trunc i64 [[EXTELT_OFFSET]] to i32 +; LE64-NEXT: [[TMP1:%.*]] = trunc nuw i64 [[EXTELT_OFFSET]] to i32 ; LE64-NEXT: [[R:%.*]] = bitcast i32 [[TMP1]] to float ; LE64-NEXT: ret float [[R]] ; @@ -570,7 +570,7 @@ define double @bitcast_fp64vec_index0(i128 %x) { ; ; BE128-LABEL: @bitcast_fp64vec_index0( ; BE128-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i128 [[X:%.*]], 64 -; BE128-NEXT: [[TMP1:%.*]] = trunc i128 [[EXTELT_OFFSET]] to i64 +; BE128-NEXT: [[TMP1:%.*]] = trunc nuw i128 [[EXTELT_OFFSET]] to i64 ; BE128-NEXT: [[R:%.*]] = bitcast i64 [[TMP1]] to double ; BE128-NEXT: ret double [[R]] ; @@ -587,7 +587,7 @@ define double @bitcast_fp64vec_index1(i128 %x) { ; ; LE128-LABEL: @bitcast_fp64vec_index1( ; LE128-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i128 [[X:%.*]], 64 -; LE128-NEXT: [[TMP1:%.*]] = trunc i128 [[EXTELT_OFFSET]] to i64 +; LE128-NEXT: [[TMP1:%.*]] = trunc nuw i128 [[EXTELT_OFFSET]] to i64 ; LE128-NEXT: [[R:%.*]] = bitcast i64 [[TMP1]] to double ; LE128-NEXT: ret double [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/ffs-1.ll b/llvm/test/Transforms/InstCombine/ffs-1.ll index a610376da8b59..7cf080765bb1b 100644 --- a/llvm/test/Transforms/InstCombine/ffs-1.ll +++ b/llvm/test/Transforms/InstCombine/ffs-1.ll @@ -181,7 +181,7 @@ define i32 @test_simplify15(i64 %x) { ; ; TARGET-LABEL: @test_simplify15( ; TARGET-NEXT: [[CTTZ:%.*]] = call i64 @llvm.cttz.i64(i64 %x, i1 true), !range !1 -; TARGET-NEXT: [[TMP1:%.*]] = trunc i64 [[CTTZ]] to i32 +; TARGET-NEXT: [[TMP1:%.*]] = trunc nuw nsw i64 [[CTTZ]] to i32 ; TARGET-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1 ; TARGET-NEXT: [[TMP3:%.*]] = icmp eq i64 %x, 0 ; TARGET-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]] diff --git a/llvm/test/Transforms/InstCombine/fls.ll b/llvm/test/Transforms/InstCombine/fls.ll index 8b25a313b6b82..7710093e195a1 100644 --- a/llvm/test/Transforms/InstCombine/fls.ll +++ b/llvm/test/Transforms/InstCombine/fls.ll @@ -32,7 +32,7 @@ define i32 @myflsll() { define i32 @flsnotconst(i64 %z) { ; CHECK-LABEL: @flsnotconst( ; CHECK-NEXT: [[CTLZ:%.*]] = call i64 @llvm.ctlz.i64(i64 [[Z:%.*]], i1 false), !range [[RNG0:![0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[CTLZ]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw nsw i64 [[CTLZ]] to i32 ; CHECK-NEXT: [[GOO:%.*]] = sub nsw i32 64, [[TMP1]] ; CHECK-NEXT: ret i32 [[GOO]] ; diff --git a/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll b/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll index 434d98449f99c..a631aacd97ff9 100644 --- a/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll +++ b/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll @@ -282,7 +282,7 @@ define i5 @log2_ceil_idiom_trunc_multiuse4(i32 %x) { ; CHECK-LABEL: define i5 @log2_ceil_idiom_trunc_multiuse4( ; CHECK-SAME: i32 [[X:%.*]]) { ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]] -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[CTLZ]] to i5 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i32 [[CTLZ]] to i5 ; CHECK-NEXT: call void @use5(i5 [[TRUNC]]) ; CHECK-NEXT: [[XOR:%.*]] = xor i5 [[TRUNC]], -1 ; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]] diff --git a/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll b/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll index e87d90909e84a..3ebab115f6543 100644 --- a/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll +++ b/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll @@ -4,7 +4,7 @@ define i32 @t0(i64 %x) { ; CHECK-LABEL: @t0( ; CHECK-NEXT: [[T0_NEG:%.*]] = ashr i64 [[X:%.*]], 63 -; CHECK-NEXT: [[T1_NEG:%.*]] = trunc i64 [[T0_NEG]] to i32 +; CHECK-NEXT: [[T1_NEG:%.*]] = trunc nsw i64 [[T0_NEG]] to i32 ; CHECK-NEXT: ret i32 [[T1_NEG]] ; %t0 = lshr i64 %x, 63 @@ -15,7 +15,7 @@ define i32 @t0(i64 %x) { define i32 @t1_exact(i64 %x) { ; CHECK-LABEL: @t1_exact( ; CHECK-NEXT: [[T0_NEG:%.*]] = ashr exact i64 [[X:%.*]], 63 -; CHECK-NEXT: [[T1_NEG:%.*]] = trunc i64 [[T0_NEG]] to i32 +; CHECK-NEXT: [[T1_NEG:%.*]] = trunc nsw i64 [[T0_NEG]] to i32 ; CHECK-NEXT: ret i32 [[T1_NEG]] ; %t0 = lshr exact i64 %x, 63 @@ -26,7 +26,7 @@ define i32 @t1_exact(i64 %x) { define i32 @t2(i64 %x) { ; CHECK-LABEL: @t2( ; CHECK-NEXT: [[T0_NEG:%.*]] = lshr i64 [[X:%.*]], 63 -; CHECK-NEXT: [[T1_NEG:%.*]] = trunc i64 [[T0_NEG]] to i32 +; CHECK-NEXT: [[T1_NEG:%.*]] = trunc nuw nsw i64 [[T0_NEG]] to i32 ; CHECK-NEXT: ret i32 [[T1_NEG]] ; %t0 = ashr i64 %x, 63 @@ -37,7 +37,7 @@ define i32 @t2(i64 %x) { define i32 @t3_exact(i64 %x) { ; CHECK-LABEL: @t3_exact( ; CHECK-NEXT: [[T0_NEG:%.*]] = lshr exact i64 [[X:%.*]], 63 -; CHECK-NEXT: [[T1_NEG:%.*]] = trunc i64 [[T0_NEG]] to i32 +; CHECK-NEXT: [[T1_NEG:%.*]] = trunc nuw nsw i64 [[T0_NEG]] to i32 ; CHECK-NEXT: ret i32 [[T1_NEG]] ; %t0 = ashr exact i64 %x, 63 @@ -49,7 +49,7 @@ define i32 @t3_exact(i64 %x) { define <2 x i32> @t4(<2 x i64> %x) { ; CHECK-LABEL: @t4( ; CHECK-NEXT: [[T0_NEG:%.*]] = ashr <2 x i64> [[X:%.*]], -; CHECK-NEXT: [[T1_NEG:%.*]] = trunc <2 x i64> [[T0_NEG]] to <2 x i32> +; CHECK-NEXT: [[T1_NEG:%.*]] = trunc nsw <2 x i64> [[T0_NEG]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T1_NEG]] ; %t0 = lshr <2 x i64> %x, @@ -79,7 +79,7 @@ define i32 @t6(i64 %x) { ; CHECK-NEXT: [[T0_NEG:%.*]] = ashr i64 [[X:%.*]], 63 ; CHECK-NEXT: [[T0:%.*]] = lshr i64 [[X]], 63 ; CHECK-NEXT: call void @use64(i64 [[T0]]) -; CHECK-NEXT: [[T1_NEG:%.*]] = trunc i64 [[T0_NEG]] to i32 +; CHECK-NEXT: [[T1_NEG:%.*]] = trunc nsw i64 [[T0_NEG]] to i32 ; CHECK-NEXT: ret i32 [[T1_NEG]] ; %t0 = lshr i64 %x, 63 @@ -92,7 +92,7 @@ define i32 @t6(i64 %x) { define i32 @n7(i64 %x) { ; CHECK-LABEL: @n7( ; CHECK-NEXT: [[T0:%.*]] = lshr i64 [[X:%.*]], 63 -; CHECK-NEXT: [[T1:%.*]] = trunc i64 [[T0]] to i32 +; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw i64 [[T0]] to i32 ; CHECK-NEXT: call void @use32(i32 [[T1]]) ; CHECK-NEXT: [[R:%.*]] = sub nsw i32 0, [[T1]] ; CHECK-NEXT: ret i32 [[R]] @@ -108,7 +108,7 @@ define i32 @n8(i64 %x) { ; CHECK-LABEL: @n8( ; CHECK-NEXT: [[T0:%.*]] = lshr i64 [[X:%.*]], 63 ; CHECK-NEXT: call void @use64(i64 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = trunc i64 [[T0]] to i32 +; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw i64 [[T0]] to i32 ; CHECK-NEXT: call void @use32(i32 [[T1]]) ; CHECK-NEXT: [[R:%.*]] = sub nsw i32 0, [[T1]] ; CHECK-NEXT: ret i32 [[R]] @@ -124,7 +124,7 @@ define i32 @n8(i64 %x) { define i32 @n9(i64 %x) { ; CHECK-LABEL: @n9( ; CHECK-NEXT: [[T0:%.*]] = lshr i64 [[X:%.*]], 62 -; CHECK-NEXT: [[T1:%.*]] = trunc i64 [[T0]] to i32 +; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw i64 [[T0]] to i32 ; CHECK-NEXT: [[R:%.*]] = sub nsw i32 0, [[T1]] ; CHECK-NEXT: ret i32 [[R]] ; @@ -137,7 +137,7 @@ define i32 @n9(i64 %x) { define i32 @n10(i64 %x) { ; CHECK-LABEL: @n10( ; CHECK-NEXT: [[T0_NEG:%.*]] = ashr i64 [[X:%.*]], 63 -; CHECK-NEXT: [[T1_NEG:%.*]] = trunc i64 [[T0_NEG]] to i32 +; CHECK-NEXT: [[T1_NEG:%.*]] = trunc nsw i64 [[T0_NEG]] to i32 ; CHECK-NEXT: [[R:%.*]] = add nsw i32 [[T1_NEG]], 1 ; CHECK-NEXT: ret i32 [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll b/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll index d858c91becb57..aa23a6d27f69b 100644 --- a/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll +++ b/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll @@ -60,7 +60,7 @@ define void @PR33765(i8 %beth) { ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[BETH:%.*]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]] ; CHECK-NEXT: [[TINKY:%.*]] = load i16, ptr @glob, align 2 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[MUL]] to i16 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw i32 [[MUL]] to i16 ; CHECK-NEXT: [[CONV14:%.*]] = and i16 [[TINKY]], [[TMP1]] ; CHECK-NEXT: store i16 [[CONV14]], ptr @glob, align 2 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InstCombine/icmp-of-trunc-ext.ll b/llvm/test/Transforms/InstCombine/icmp-of-trunc-ext.ll index 85f67bfa335bb..7f616bbb2a837 100644 --- a/llvm/test/Transforms/InstCombine/icmp-of-trunc-ext.ll +++ b/llvm/test/Transforms/InstCombine/icmp-of-trunc-ext.ll @@ -28,8 +28,8 @@ define i1 @icmp_trunc_x_trunc_y_fail_from_illegal1(i256 %x, i256 %y) { ; CHECK-NEXT: [[Y_LB_ONLY:%.*]] = icmp ult i256 [[Y:%.*]], 65536 ; CHECK-NEXT: call void @llvm.assume(i1 [[X_LB_ONLY]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[Y_LB_ONLY]]) -; CHECK-NEXT: [[X16:%.*]] = trunc i256 [[X]] to i16 -; CHECK-NEXT: [[Y16:%.*]] = trunc i256 [[Y]] to i16 +; CHECK-NEXT: [[X16:%.*]] = trunc nuw i256 [[X]] to i16 +; CHECK-NEXT: [[Y16:%.*]] = trunc nuw i256 [[Y]] to i16 ; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[X16]], [[Y16]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -49,7 +49,7 @@ define i1 @icmp_trunc_x_trunc_y_illegal_trunc_to_legal_anyways(i123 %x, i32 %y) ; CHECK-NEXT: [[Y_LB_ONLY:%.*]] = icmp ult i32 [[Y:%.*]], 65536 ; CHECK-NEXT: call void @llvm.assume(i1 [[X_LB_ONLY]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[Y_LB_ONLY]]) -; CHECK-NEXT: [[TMP1:%.*]] = trunc i123 [[X]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw nsw i123 [[X]] to i32 ; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], [[Y]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -89,7 +89,7 @@ define i1 @icmp_trunc_x_trunc_y_3(i64 %x, i32 %y) { ; CHECK-NEXT: [[Y_LB_ONLY:%.*]] = icmp ult i32 [[Y:%.*]], 256 ; CHECK-NEXT: call void @llvm.assume(i1 [[X_LB_ONLY]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[Y_LB_ONLY]]) -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw nsw i64 [[X]] to i32 ; CHECK-NEXT: [[R:%.*]] = icmp uge i32 [[TMP1]], [[Y]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -109,7 +109,7 @@ define i1 @icmp_trunc_x_trunc_y_fail_maybe_dirty_upper(i32 %x, i32 %y) { ; CHECK-NEXT: [[Y_LB_ONLY:%.*]] = icmp ult i32 [[Y:%.*]], 65537 ; CHECK-NEXT: call void @llvm.assume(i1 [[X_LB_ONLY]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[Y_LB_ONLY]]) -; CHECK-NEXT: [[X16:%.*]] = trunc i32 [[X]] to i16 +; CHECK-NEXT: [[X16:%.*]] = trunc nuw i32 [[X]] to i16 ; CHECK-NEXT: [[Y16:%.*]] = trunc i32 [[Y]] to i16 ; CHECK-NEXT: [[R:%.*]] = icmp ne i16 [[X16]], [[Y16]] ; CHECK-NEXT: ret i1 [[R]] @@ -131,7 +131,7 @@ define i1 @icmp_trunc_x_trunc_y_fail_maybe_dirty_upper_2(i32 %x, i32 %y) { ; CHECK-NEXT: call void @llvm.assume(i1 [[X_LB_ONLY]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[Y_LB_ONLY]]) ; CHECK-NEXT: [[X16:%.*]] = trunc i32 [[X]] to i16 -; CHECK-NEXT: [[Y16:%.*]] = trunc i32 [[Y]] to i16 +; CHECK-NEXT: [[Y16:%.*]] = trunc nuw i32 [[Y]] to i16 ; CHECK-NEXT: [[R:%.*]] = icmp ne i16 [[X16]], [[Y16]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -151,7 +151,7 @@ define i1 @icmp_trunc_x_trunc_y_swap0(i33 %x, i32 %y) { ; CHECK-NEXT: [[Y_LB_ONLY:%.*]] = icmp ult i32 [[Y:%.*]], 65536 ; CHECK-NEXT: call void @llvm.assume(i1 [[X_LB_ONLY]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[Y_LB_ONLY]]) -; CHECK-NEXT: [[TMP1:%.*]] = trunc i33 [[X]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw nsw i33 [[X]] to i32 ; CHECK-NEXT: [[R:%.*]] = icmp ule i32 [[TMP1]], [[Y]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -171,7 +171,7 @@ define i1 @icmp_trunc_x_trunc_y_swap1(i33 %x, i32 %y) { ; CHECK-NEXT: [[Y_LB_ONLY:%.*]] = icmp ult i32 [[Y:%.*]], 65536 ; CHECK-NEXT: call void @llvm.assume(i1 [[X_LB_ONLY]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[Y_LB_ONLY]]) -; CHECK-NEXT: [[TMP1:%.*]] = trunc i33 [[X]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw nsw i33 [[X]] to i32 ; CHECK-NEXT: [[R:%.*]] = icmp uge i32 [[TMP1]], [[Y]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -238,7 +238,7 @@ define i1 @icmp_trunc_x_zext_y_3_fail_illegal(i6 %x, i45 %y) { ; CHECK-NEXT: [[Y_LB_ONLY:%.*]] = icmp ult i45 [[Y:%.*]], 65536 ; CHECK-NEXT: call void @llvm.assume(i1 [[Y_LB_ONLY]]) ; CHECK-NEXT: [[X16:%.*]] = zext i6 [[X:%.*]] to i16 -; CHECK-NEXT: [[Y16:%.*]] = trunc i45 [[Y]] to i16 +; CHECK-NEXT: [[Y16:%.*]] = trunc nuw i45 [[Y]] to i16 ; CHECK-NEXT: [[R:%.*]] = icmp ne i16 [[Y16]], [[X16]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -254,7 +254,7 @@ define i1 @icmp_trunc_x_zext_y_fail_multiuse(i32 %x, i8 %y) { ; CHECK-LABEL: @icmp_trunc_x_zext_y_fail_multiuse( ; CHECK-NEXT: [[X_LB_ONLY:%.*]] = icmp ult i32 [[X:%.*]], 65536 ; CHECK-NEXT: call void @llvm.assume(i1 [[X_LB_ONLY]]) -; CHECK-NEXT: [[X16:%.*]] = trunc i32 [[X]] to i16 +; CHECK-NEXT: [[X16:%.*]] = trunc nuw i32 [[X]] to i16 ; CHECK-NEXT: [[Y16:%.*]] = zext i8 [[Y:%.*]] to i16 ; CHECK-NEXT: call void @use(i16 [[Y16]]) ; CHECK-NEXT: [[R:%.*]] = icmp ule i16 [[X16]], [[Y16]] diff --git a/llvm/test/Transforms/InstCombine/icmp-topbitssame.ll b/llvm/test/Transforms/InstCombine/icmp-topbitssame.ll index 284dc036d11d5..4e11ecbcb8897 100644 --- a/llvm/test/Transforms/InstCombine/icmp-topbitssame.ll +++ b/llvm/test/Transforms/InstCombine/icmp-topbitssame.ll @@ -128,7 +128,7 @@ define i1 @wrongimm1(i16 %add) { define i1 @wrongimm2(i16 %add) { ; CHECK-LABEL: @wrongimm2( ; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8 -; CHECK-NEXT: [[CONV_I:%.*]] = trunc i16 [[SH]] to i8 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc nuw i16 [[SH]] to i8 ; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8 ; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 6 ; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]] @@ -145,7 +145,7 @@ define i1 @wrongimm2(i16 %add) { define i1 @slt(i64 %add) { ; CHECK-LABEL: @slt( ; CHECK-NEXT: [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32 -; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[SH]] to i32 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc nuw i64 [[SH]] to i32 ; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32 ; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31 ; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp slt i32 [[SHR2_I]], [[CONV_I]] @@ -182,7 +182,7 @@ define i1 @extrause_a(i16 %add) { define i1 @extrause_l(i16 %add) { ; CHECK-LABEL: @extrause_l( ; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8 -; CHECK-NEXT: [[CONV_I:%.*]] = trunc i16 [[SH]] to i8 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc nuw i16 [[SH]] to i8 ; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[ADD]], 128 ; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult i16 [[TMP1]], 256 ; CHECK-NEXT: call void @use(i8 [[CONV_I]]) @@ -200,7 +200,7 @@ define i1 @extrause_l(i16 %add) { define i1 @extrause_la(i16 %add) { ; CHECK-LABEL: @extrause_la( ; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8 -; CHECK-NEXT: [[CONV_I:%.*]] = trunc i16 [[SH]] to i8 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc nuw i16 [[SH]] to i8 ; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8 ; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7 ; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]] diff --git a/llvm/test/Transforms/InstCombine/insert-trunc.ll b/llvm/test/Transforms/InstCombine/insert-trunc.ll index 3ae128e55b43b..3a160513ccb19 100644 --- a/llvm/test/Transforms/InstCombine/insert-trunc.ll +++ b/llvm/test/Transforms/InstCombine/insert-trunc.ll @@ -146,7 +146,7 @@ define <4 x i16> @lshr_same_length_poison_basevec_be(i64 %x) { define <4 x i16> @lshr_same_length_poison_basevec_both_endian(i64 %x) { ; ALL-LABEL: @lshr_same_length_poison_basevec_both_endian( ; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 +; ALL-NEXT: [[T:%.*]] = trunc nuw i64 [[S]] to i16 ; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 0 ; ALL-NEXT: ret <4 x i16> [[R]] ; @@ -159,7 +159,7 @@ define <4 x i16> @lshr_same_length_poison_basevec_both_endian(i64 %x) { define <4 x i16> @lshr_wrong_index_same_length_poison_basevec(i64 %x) { ; ALL-LABEL: @lshr_wrong_index_same_length_poison_basevec( ; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 +; ALL-NEXT: [[T:%.*]] = trunc nuw i64 [[S]] to i16 ; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 1 ; ALL-NEXT: ret <4 x i16> [[R]] ; @@ -172,7 +172,7 @@ define <4 x i16> @lshr_wrong_index_same_length_poison_basevec(i64 %x) { define <8 x i16> @lshr_longer_length_poison_basevec_le(i64 %x) { ; ALL-LABEL: @lshr_longer_length_poison_basevec_le( ; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 +; ALL-NEXT: [[T:%.*]] = trunc nuw i64 [[S]] to i16 ; ALL-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 3 ; ALL-NEXT: ret <8 x i16> [[R]] ; @@ -250,7 +250,7 @@ define <4 x i8> @lshr_wrong_index_shorter_length_poison_basevec(i64 %x) { define <4 x i8> @lshr_wrong_shift_shorter_length_poison_basevec(i64 %x) { ; ALL-LABEL: @lshr_wrong_shift_shorter_length_poison_basevec( ; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 57 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i8 +; ALL-NEXT: [[T:%.*]] = trunc nuw nsw i64 [[S]] to i8 ; ALL-NEXT: [[R:%.*]] = insertelement <4 x i8> poison, i8 [[T]], i64 0 ; ALL-NEXT: ret <4 x i8> [[R]] ; @@ -392,7 +392,7 @@ define <4 x i16> @lshr_same_length_basevec_be(i64 %x, <4 x i16> %v) { define <4 x i16> @lshr_same_length_basevec_both_endian(i64 %x, <4 x i16> %v) { ; ALL-LABEL: @lshr_same_length_basevec_both_endian( ; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 +; ALL-NEXT: [[T:%.*]] = trunc nuw i64 [[S]] to i16 ; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[T]], i64 3 ; ALL-NEXT: ret <4 x i16> [[R]] ; @@ -405,7 +405,7 @@ define <4 x i16> @lshr_same_length_basevec_both_endian(i64 %x, <4 x i16> %v) { define <4 x i16> @lshr_wrong_index_same_length_basevec(i64 %x, <4 x i16> %v) { ; ALL-LABEL: @lshr_wrong_index_same_length_basevec( ; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 +; ALL-NEXT: [[T:%.*]] = trunc nuw i64 [[S]] to i16 ; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[T]], i64 1 ; ALL-NEXT: ret <4 x i16> [[R]] ; @@ -418,7 +418,7 @@ define <4 x i16> @lshr_wrong_index_same_length_basevec(i64 %x, <4 x i16> %v) { define <8 x i16> @lshr_longer_length_basevec_le(i64 %x, <8 x i16> %v) { ; ALL-LABEL: @lshr_longer_length_basevec_le( ; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 +; ALL-NEXT: [[T:%.*]] = trunc nuw i64 [[S]] to i16 ; ALL-NEXT: [[R:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[T]], i64 3 ; ALL-NEXT: ret <8 x i16> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/insertelt-trunc.ll b/llvm/test/Transforms/InstCombine/insertelt-trunc.ll index a2721bf13e743..f5f1051ea2014 100644 --- a/llvm/test/Transforms/InstCombine/insertelt-trunc.ll +++ b/llvm/test/Transforms/InstCombine/insertelt-trunc.ll @@ -9,7 +9,7 @@ declare void @use_vec(<8 x i16>) define <4 x i16> @insert_01_poison_v4i16(i32 %x) { ; BE-LABEL: @insert_01_poison_v4i16( ; BE-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 -; BE-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 +; BE-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16 ; BE-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; BE-NEXT: [[INS0:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0 ; BE-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 1 @@ -36,7 +36,7 @@ define <8 x i16> @insert_10_poison_v8i16(i32 %x) { ; ; LE-LABEL: @insert_10_poison_v8i16( ; LE-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 -; LE-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 +; LE-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16 ; LE-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; LE-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[HI16]], i64 0 ; LE-NEXT: [[INS1:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[LO16]], i64 1 @@ -55,7 +55,7 @@ define <8 x i16> @insert_10_poison_v8i16(i32 %x) { define <4 x i32> @insert_12_poison_v4i32(i64 %x) { ; ALL-LABEL: @insert_12_poison_v4i32( ; ALL-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32 -; ALL-NEXT: [[HI32:%.*]] = trunc i64 [[HI64]] to i32 +; ALL-NEXT: [[HI32:%.*]] = trunc nuw i64 [[HI64]] to i32 ; ALL-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32 ; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i32> poison, i32 [[LO32]], i64 1 ; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 2 @@ -74,7 +74,7 @@ define <4 x i32> @insert_12_poison_v4i32(i64 %x) { define <4 x i16> @insert_21_poison_v4i16(i32 %x) { ; ALL-LABEL: @insert_21_poison_v4i16( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 -; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 +; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16 ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; ALL-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[HI16]], i64 1 ; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 2 @@ -91,7 +91,7 @@ define <4 x i16> @insert_21_poison_v4i16(i32 %x) { define <4 x i32> @insert_23_poison_v4i32(i64 %x) { ; BE-LABEL: @insert_23_poison_v4i32( ; BE-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32 -; BE-NEXT: [[HI32:%.*]] = trunc i64 [[HI64]] to i32 +; BE-NEXT: [[HI32:%.*]] = trunc nuw i64 [[HI64]] to i32 ; BE-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32 ; BE-NEXT: [[INS0:%.*]] = insertelement <4 x i32> poison, i32 [[LO32]], i64 2 ; BE-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 3 @@ -118,7 +118,7 @@ define <4 x i16> @insert_32_poison_v4i16(i32 %x) { ; ; LE-LABEL: @insert_32_poison_v4i16( ; LE-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 -; LE-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 +; LE-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16 ; LE-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; LE-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[HI16]], i64 2 ; LE-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 3 @@ -140,7 +140,7 @@ define <4 x i16> @insert_32_poison_v4i16(i32 %x) { define <2 x i16> @insert_01_v2i16(i32 %x, <2 x i16> %v) { ; BE-LABEL: @insert_01_v2i16( ; BE-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 -; BE-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 +; BE-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16 ; BE-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; BE-NEXT: [[INS0:%.*]] = insertelement <2 x i16> poison, i16 [[LO16]], i64 0 ; BE-NEXT: [[INS1:%.*]] = insertelement <2 x i16> [[INS0]], i16 [[HI16]], i64 1 @@ -163,7 +163,7 @@ define <2 x i16> @insert_01_v2i16(i32 %x, <2 x i16> %v) { define <8 x i16> @insert_10_v8i16(i32 %x, <8 x i16> %v) { ; ALL-LABEL: @insert_10_v8i16( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 -; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 +; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16 ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; ALL-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[HI16]], i64 0 ; ALL-NEXT: [[INS1:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[LO16]], i64 1 @@ -182,7 +182,7 @@ define <8 x i16> @insert_10_v8i16(i32 %x, <8 x i16> %v) { define <4 x i32> @insert_12_v4i32(i64 %x, <4 x i32> %v) { ; ALL-LABEL: @insert_12_v4i32( ; ALL-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32 -; ALL-NEXT: [[HI32:%.*]] = trunc i64 [[HI64]] to i32 +; ALL-NEXT: [[HI32:%.*]] = trunc nuw i64 [[HI64]] to i32 ; ALL-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32 ; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i32> [[V:%.*]], i32 [[LO32]], i64 1 ; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 2 @@ -201,7 +201,7 @@ define <4 x i32> @insert_12_v4i32(i64 %x, <4 x i32> %v) { define <4 x i16> @insert_21_v4i16(i32 %x, <4 x i16> %v) { ; ALL-LABEL: @insert_21_v4i16( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 -; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 +; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16 ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; ALL-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[HI16]], i64 1 ; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 2 @@ -220,7 +220,7 @@ define <4 x i16> @insert_21_v4i16(i32 %x, <4 x i16> %v) { define <4 x i32> @insert_23_v4i32(i64 %x, <4 x i32> %v) { ; ALL-LABEL: @insert_23_v4i32( ; ALL-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32 -; ALL-NEXT: [[HI32:%.*]] = trunc i64 [[HI64]] to i32 +; ALL-NEXT: [[HI32:%.*]] = trunc nuw i64 [[HI64]] to i32 ; ALL-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32 ; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i32> [[V:%.*]], i32 [[LO32]], i64 2 ; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 3 @@ -239,7 +239,7 @@ define <4 x i32> @insert_23_v4i32(i64 %x, <4 x i32> %v) { define <4 x i16> @insert_32_v4i16(i32 %x, <4 x i16> %v) { ; ALL-LABEL: @insert_32_v4i16( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 -; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 +; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16 ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; ALL-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[HI16]], i64 2 ; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 3 @@ -277,7 +277,7 @@ define <4 x i16> @insert_01_v4i16_wrong_shift1(i32 %x) { define <4 x i16> @insert_01_v4i16_wrong_op(i32 %x, i32 %y) { ; ALL-LABEL: @insert_01_v4i16_wrong_op( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 -; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 +; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16 ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[Y:%.*]] to i16 ; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0 ; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 1 @@ -296,7 +296,7 @@ define <4 x i16> @insert_01_v4i16_wrong_op(i32 %x, i32 %y) { define <8 x i16> @insert_67_v4i16_uses1(i32 %x, <8 x i16> %v) { ; ALL-LABEL: @insert_67_v4i16_uses1( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 -; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 +; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16 ; ALL-NEXT: call void @use(i16 [[HI16]]) ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; ALL-NEXT: [[INS0:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[LO16]], i64 6 @@ -318,7 +318,7 @@ define <8 x i16> @insert_67_v4i16_uses1(i32 %x, <8 x i16> %v) { define <8 x i16> @insert_76_v4i16_uses2(i32 %x, <8 x i16> %v) { ; ALL-LABEL: @insert_76_v4i16_uses2( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 -; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 +; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16 ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; ALL-NEXT: call void @use(i16 [[LO16]]) ; ALL-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[HI16]], i64 6 @@ -339,7 +339,7 @@ define <8 x i16> @insert_76_v4i16_uses2(i32 %x, <8 x i16> %v) { define <8 x i16> @insert_67_v4i16_uses3(i32 %x, <8 x i16> %v) { ; ALL-LABEL: @insert_67_v4i16_uses3( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 -; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 +; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16 ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; ALL-NEXT: [[INS0:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[LO16]], i64 6 ; ALL-NEXT: call void @use_vec(<8 x i16> [[INS0]]) @@ -360,7 +360,7 @@ define <8 x i16> @insert_67_v4i16_uses3(i32 %x, <8 x i16> %v) { define <4 x i16> @insert_01_poison_v4i16_high_first(i32 %x) { ; BE-LABEL: @insert_01_poison_v4i16_high_first( ; BE-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 -; BE-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 +; BE-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16 ; BE-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; BE-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0 ; BE-NEXT: [[INS0:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[HI16]], i64 1 diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll index e27e4a3eddfbb..d210b19bb7faf 100644 --- a/llvm/test/Transforms/InstCombine/known-bits.ll +++ b/llvm/test/Transforms/InstCombine/known-bits.ll @@ -455,7 +455,7 @@ define i64 @test_icmp_trunc5(i64 %n) { ; CHECK-LABEL: @test_icmp_trunc5( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SHR:%.*]] = ashr i64 [[N:%.*]], 47 -; CHECK-NEXT: [[CONV1:%.*]] = trunc i64 [[SHR]] to i32 +; CHECK-NEXT: [[CONV1:%.*]] = trunc nsw i64 [[SHR]] to i32 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CONV1]], -13 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: diff --git a/llvm/test/Transforms/InstCombine/known-non-zero.ll b/llvm/test/Transforms/InstCombine/known-non-zero.ll index 7965b47911c41..f1c757cafefb0 100644 --- a/llvm/test/Transforms/InstCombine/known-non-zero.ll +++ b/llvm/test/Transforms/InstCombine/known-non-zero.ll @@ -14,7 +14,7 @@ define i32 @test0(i64 %x) { ; CHECK-NEXT: br i1 [[C]], label [[EXIT:%.*]], label [[NON_ZERO:%.*]] ; CHECK: non_zero: ; CHECK-NEXT: [[CTZ:%.*]] = call i64 @llvm.cttz.i64(i64 [[X]], i1 true), !range [[RNG0:![0-9]+]] -; CHECK-NEXT: [[CTZ32:%.*]] = trunc i64 [[CTZ]] to i32 +; CHECK-NEXT: [[CTZ32:%.*]] = trunc nuw nsw i64 [[CTZ]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[CTZ32]], [[NON_ZERO]] ], [ 0, [[START:%.*]] ] @@ -41,7 +41,7 @@ define i32 @test1(i64 %x) { ; CHECK-NEXT: br i1 [[C]], label [[EXIT:%.*]], label [[NON_ZERO:%.*]] ; CHECK: non_zero: ; CHECK-NEXT: [[CTZ:%.*]] = call i64 @llvm.ctlz.i64(i64 [[X]], i1 true), !range [[RNG0]] -; CHECK-NEXT: [[CTZ32:%.*]] = trunc i64 [[CTZ]] to i32 +; CHECK-NEXT: [[CTZ32:%.*]] = trunc nuw nsw i64 [[CTZ]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[CTZ32]], [[NON_ZERO]] ], [ 0, [[START:%.*]] ] diff --git a/llvm/test/Transforms/InstCombine/known-phi-recurse.ll b/llvm/test/Transforms/InstCombine/known-phi-recurse.ll index 78654155c36cd..d33e08ffaf9b7 100644 --- a/llvm/test/Transforms/InstCombine/known-phi-recurse.ll +++ b/llvm/test/Transforms/InstCombine/known-phi-recurse.ll @@ -17,7 +17,7 @@ define i32 @single_entry_phi(i64 %x, i1 %c) { ; CHECK-NEXT: br i1 [[C:%.*]], label [[END:%.*]], label [[BODY]] ; CHECK: end: ; CHECK-NEXT: [[Y:%.*]] = call i64 @llvm.ctpop.i64(i64 [[X:%.*]]), !range [[RNG0:![0-9]+]] -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[Y]] to i32 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw nsw i64 [[Y]] to i32 ; CHECK-NEXT: ret i32 [[TRUNC]] ; entry: @@ -37,7 +37,7 @@ define i32 @two_entry_phi_with_constant(i64 %x, i1 %c) { ; CHECK-LABEL: @two_entry_phi_with_constant( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Y:%.*]] = call i64 @llvm.ctpop.i64(i64 [[X:%.*]]), !range [[RNG0]] -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[Y]] to i32 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw nsw i64 [[Y]] to i32 ; CHECK-NEXT: br i1 [[C:%.*]], label [[END:%.*]], label [[BODY:%.*]] ; CHECK: body: ; CHECK-NEXT: br label [[END]] @@ -62,11 +62,11 @@ define i32 @two_entry_phi_non_constant(i64 %x, i64 %x2, i1 %c) { ; CHECK-LABEL: @two_entry_phi_non_constant( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Y:%.*]] = call i64 @llvm.ctpop.i64(i64 [[X:%.*]]), !range [[RNG0]] -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[Y]] to i32 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw nsw i64 [[Y]] to i32 ; CHECK-NEXT: br i1 [[C:%.*]], label [[END:%.*]], label [[BODY:%.*]] ; CHECK: body: ; CHECK-NEXT: [[Y2:%.*]] = call i64 @llvm.ctpop.i64(i64 [[X2:%.*]]), !range [[RNG0]] -; CHECK-NEXT: [[TRUNC2:%.*]] = trunc i64 [[Y2]] to i32 +; CHECK-NEXT: [[TRUNC2:%.*]] = trunc nuw nsw i64 [[Y2]] to i32 ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TRUNC]], [[ENTRY:%.*]] ], [ [[TRUNC2]], [[BODY]] ] @@ -91,12 +91,12 @@ define i32 @neg_many_branches(i64 %x) { ; CHECK-LABEL: @neg_many_branches( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Y:%.*]] = call i64 @llvm.ctpop.i64(i64 [[X:%.*]]), !range [[RNG0]] -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[Y]] to i32 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw nsw i64 [[Y]] to i32 ; CHECK-NEXT: switch i32 [[TRUNC]], label [[END:%.*]] [ -; CHECK-NEXT: i32 1, label [[ONE:%.*]] -; CHECK-NEXT: i32 2, label [[TWO:%.*]] -; CHECK-NEXT: i32 3, label [[THREE:%.*]] -; CHECK-NEXT: i32 4, label [[FOUR:%.*]] +; CHECK-NEXT: i32 1, label [[ONE:%.*]] +; CHECK-NEXT: i32 2, label [[TWO:%.*]] +; CHECK-NEXT: i32 3, label [[THREE:%.*]] +; CHECK-NEXT: i32 4, label [[FOUR:%.*]] ; CHECK-NEXT: ] ; CHECK: one: ; CHECK-NEXT: [[A:%.*]] = add nuw nsw i32 [[TRUNC]], 1 diff --git a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll index b3d147621b59e..20d60206ebcdf 100644 --- a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll @@ -647,7 +647,7 @@ define <4 x i32> @computesignbits_through_shuffles(<4 x float> %x, <4 x float> % ; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[S4:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[SHUF_OR2:%.*]] = or <4 x i32> [[S3]], [[S4]] -; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[SHUF_OR2]] to <4 x i1> +; CHECK-NEXT: [[TMP1:%.*]] = trunc nsw <4 x i32> [[SHUF_OR2]] to <4 x i1> ; CHECK-NEXT: [[SEL_V:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[Z:%.*]], <4 x float> [[X]] ; CHECK-NEXT: [[SEL:%.*]] = bitcast <4 x float> [[SEL_V]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SEL]] diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll index c850b87bb2dd4..6e2ed6bf796d0 100644 --- a/llvm/test/Transforms/InstCombine/logical-select.ll +++ b/llvm/test/Transforms/InstCombine/logical-select.ll @@ -683,7 +683,7 @@ define <4 x i32> @computesignbits_through_shuffles(<4 x float> %x, <4 x float> % ; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[S4:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[SHUF_OR2:%.*]] = or <4 x i32> [[S3]], [[S4]] -; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[SHUF_OR2]] to <4 x i1> +; CHECK-NEXT: [[TMP1:%.*]] = trunc nsw <4 x i32> [[SHUF_OR2]] to <4 x i1> ; CHECK-NEXT: [[SEL_V:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[Z:%.*]], <4 x float> [[X]] ; CHECK-NEXT: [[SEL:%.*]] = bitcast <4 x float> [[SEL_V]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SEL]] diff --git a/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll b/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll index 8d82213d8022f..8e7491ee40370 100644 --- a/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll +++ b/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll @@ -91,7 +91,7 @@ define <2 x i16> @t5_vec_undef(<2 x i8> %x) { define i16 @t6_extrause0(i8 %x) { ; CHECK-LABEL: @t6_extrause0( ; CHECK-NEXT: [[A:%.*]] = lshr i8 [[X:%.*]], 4 -; CHECK-NEXT: [[B:%.*]] = trunc i8 [[A]] to i4 +; CHECK-NEXT: [[B:%.*]] = trunc nuw i8 [[A]] to i4 ; CHECK-NEXT: call void @use4(i4 [[B]]) ; CHECK-NEXT: [[C:%.*]] = sext i4 [[B]] to i16 ; CHECK-NEXT: ret i16 [[C]] @@ -157,7 +157,7 @@ define i16 @t10_extrause2(i8 %x) { ; CHECK-LABEL: @t10_extrause2( ; CHECK-NEXT: [[A:%.*]] = lshr i8 [[X:%.*]], 4 ; CHECK-NEXT: call void @use8(i8 [[A]]) -; CHECK-NEXT: [[B:%.*]] = trunc i8 [[A]] to i4 +; CHECK-NEXT: [[B:%.*]] = trunc nuw i8 [[A]] to i4 ; CHECK-NEXT: call void @use4(i4 [[B]]) ; CHECK-NEXT: [[C:%.*]] = sext i4 [[B]] to i16 ; CHECK-NEXT: ret i16 [[C]] @@ -189,7 +189,7 @@ define <2 x i16> @t11_extrause2_vec_undef(<2 x i8> %x) { define <2 x i10> @wide_source_shifted_signbit(<2 x i32> %x) { ; CHECK-LABEL: @wide_source_shifted_signbit( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[C:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i10> +; CHECK-NEXT: [[C:%.*]] = trunc nsw <2 x i32> [[TMP1]] to <2 x i10> ; CHECK-NEXT: ret <2 x i10> [[C]] ; %a = lshr <2 x i32> %x, @@ -203,7 +203,7 @@ define i10 @wide_source_shifted_signbit_use1(i32 %x) { ; CHECK-NEXT: [[A:%.*]] = lshr i32 [[X:%.*]], 24 ; CHECK-NEXT: call void @use32(i32 [[A]]) ; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X]], 24 -; CHECK-NEXT: [[C:%.*]] = trunc i32 [[TMP1]] to i10 +; CHECK-NEXT: [[C:%.*]] = trunc nsw i32 [[TMP1]] to i10 ; CHECK-NEXT: ret i10 [[C]] ; %a = lshr i32 %x, 24 @@ -216,7 +216,7 @@ define i10 @wide_source_shifted_signbit_use1(i32 %x) { define i10 @wide_source_shifted_signbit_use2(i32 %x) { ; CHECK-LABEL: @wide_source_shifted_signbit_use2( ; CHECK-NEXT: [[A:%.*]] = lshr i32 [[X:%.*]], 24 -; CHECK-NEXT: [[B:%.*]] = trunc i32 [[A]] to i8 +; CHECK-NEXT: [[B:%.*]] = trunc nuw i32 [[A]] to i8 ; CHECK-NEXT: call void @use8(i8 [[B]]) ; CHECK-NEXT: [[C:%.*]] = sext i8 [[B]] to i10 ; CHECK-NEXT: ret i10 [[C]] @@ -256,7 +256,7 @@ define i32 @same_source_shifted_signbit_use1(i32 %x) { define i32 @same_source_shifted_signbit_use2(i32 %x) { ; CHECK-LABEL: @same_source_shifted_signbit_use2( ; CHECK-NEXT: [[A:%.*]] = lshr i32 [[X:%.*]], 24 -; CHECK-NEXT: [[B:%.*]] = trunc i32 [[A]] to i8 +; CHECK-NEXT: [[B:%.*]] = trunc nuw i32 [[A]] to i8 ; CHECK-NEXT: call void @use8(i8 [[B]]) ; CHECK-NEXT: [[C:%.*]] = sext i8 [[B]] to i32 ; CHECK-NEXT: ret i32 [[C]] diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll index 02c2bbc2819b8..7d611ba188d6b 100644 --- a/llvm/test/Transforms/InstCombine/lshr.ll +++ b/llvm/test/Transforms/InstCombine/lshr.ll @@ -476,7 +476,7 @@ define i32 @srem2_lshr30(i32 %x) { define i12 @trunc_sandwich(i32 %x) { ; CHECK-LABEL: @trunc_sandwich( ; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X:%.*]], 30 -; CHECK-NEXT: [[R1:%.*]] = trunc i32 [[SUM_SHIFT]] to i12 +; CHECK-NEXT: [[R1:%.*]] = trunc nuw nsw i32 [[SUM_SHIFT]] to i12 ; CHECK-NEXT: ret i12 [[R1]] ; %sh = lshr i32 %x, 28 @@ -488,7 +488,7 @@ define i12 @trunc_sandwich(i32 %x) { define <2 x i12> @trunc_sandwich_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @trunc_sandwich_splat_vec( ; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R1:%.*]] = trunc <2 x i32> [[SUM_SHIFT]] to <2 x i12> +; CHECK-NEXT: [[R1:%.*]] = trunc nuw nsw <2 x i32> [[SUM_SHIFT]] to <2 x i12> ; CHECK-NEXT: ret <2 x i12> [[R1]] ; %sh = lshr <2 x i32> %x, @@ -500,7 +500,7 @@ define <2 x i12> @trunc_sandwich_splat_vec(<2 x i32> %x) { define i12 @trunc_sandwich_min_shift1(i32 %x) { ; CHECK-LABEL: @trunc_sandwich_min_shift1( ; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X:%.*]], 21 -; CHECK-NEXT: [[R1:%.*]] = trunc i32 [[SUM_SHIFT]] to i12 +; CHECK-NEXT: [[R1:%.*]] = trunc nuw nsw i32 [[SUM_SHIFT]] to i12 ; CHECK-NEXT: ret i12 [[R1]] ; %sh = lshr i32 %x, 20 @@ -512,7 +512,7 @@ define i12 @trunc_sandwich_min_shift1(i32 %x) { define i12 @trunc_sandwich_small_shift1(i32 %x) { ; CHECK-LABEL: @trunc_sandwich_small_shift1( ; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X:%.*]], 20 -; CHECK-NEXT: [[R1:%.*]] = trunc i32 [[SUM_SHIFT]] to i12 +; CHECK-NEXT: [[R1:%.*]] = trunc nuw i32 [[SUM_SHIFT]] to i12 ; CHECK-NEXT: [[R:%.*]] = and i12 [[R1]], 2047 ; CHECK-NEXT: ret i12 [[R]] ; @@ -525,7 +525,7 @@ define i12 @trunc_sandwich_small_shift1(i32 %x) { define i12 @trunc_sandwich_max_sum_shift(i32 %x) { ; CHECK-LABEL: @trunc_sandwich_max_sum_shift( ; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X:%.*]], 31 -; CHECK-NEXT: [[R1:%.*]] = trunc i32 [[SUM_SHIFT]] to i12 +; CHECK-NEXT: [[R1:%.*]] = trunc nuw nsw i32 [[SUM_SHIFT]] to i12 ; CHECK-NEXT: ret i12 [[R1]] ; %sh = lshr i32 %x, 20 @@ -537,7 +537,7 @@ define i12 @trunc_sandwich_max_sum_shift(i32 %x) { define i12 @trunc_sandwich_max_sum_shift2(i32 %x) { ; CHECK-LABEL: @trunc_sandwich_max_sum_shift2( ; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X:%.*]], 31 -; CHECK-NEXT: [[R1:%.*]] = trunc i32 [[SUM_SHIFT]] to i12 +; CHECK-NEXT: [[R1:%.*]] = trunc nuw nsw i32 [[SUM_SHIFT]] to i12 ; CHECK-NEXT: ret i12 [[R1]] ; %sh = lshr i32 %x, 30 @@ -571,7 +571,7 @@ define i12 @trunc_sandwich_use1(i32 %x) { ; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 28 ; CHECK-NEXT: call void @use(i32 [[SH]]) ; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X]], 30 -; CHECK-NEXT: [[R1:%.*]] = trunc i32 [[SUM_SHIFT]] to i12 +; CHECK-NEXT: [[R1:%.*]] = trunc nuw nsw i32 [[SUM_SHIFT]] to i12 ; CHECK-NEXT: ret i12 [[R1]] ; %sh = lshr i32 %x, 28 @@ -586,7 +586,7 @@ define <3 x i9> @trunc_sandwich_splat_vec_use1(<3 x i14> %x) { ; CHECK-NEXT: [[SH:%.*]] = lshr <3 x i14> [[X:%.*]], ; CHECK-NEXT: call void @usevec(<3 x i14> [[SH]]) ; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <3 x i14> [[X]], -; CHECK-NEXT: [[R1:%.*]] = trunc <3 x i14> [[SUM_SHIFT]] to <3 x i9> +; CHECK-NEXT: [[R1:%.*]] = trunc nuw nsw <3 x i14> [[SUM_SHIFT]] to <3 x i9> ; CHECK-NEXT: ret <3 x i9> [[R1]] ; %sh = lshr <3 x i14> %x, @@ -601,7 +601,7 @@ define i12 @trunc_sandwich_min_shift1_use1(i32 %x) { ; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 20 ; CHECK-NEXT: call void @use(i32 [[SH]]) ; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X]], 21 -; CHECK-NEXT: [[R1:%.*]] = trunc i32 [[SUM_SHIFT]] to i12 +; CHECK-NEXT: [[R1:%.*]] = trunc nuw nsw i32 [[SUM_SHIFT]] to i12 ; CHECK-NEXT: ret i12 [[R1]] ; %sh = lshr i32 %x, 20 @@ -633,7 +633,7 @@ define i12 @trunc_sandwich_max_sum_shift_use1(i32 %x) { ; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 20 ; CHECK-NEXT: call void @use(i32 [[SH]]) ; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X]], 31 -; CHECK-NEXT: [[R1:%.*]] = trunc i32 [[SUM_SHIFT]] to i12 +; CHECK-NEXT: [[R1:%.*]] = trunc nuw nsw i32 [[SUM_SHIFT]] to i12 ; CHECK-NEXT: ret i12 [[R1]] ; %sh = lshr i32 %x, 20 @@ -648,7 +648,7 @@ define i12 @trunc_sandwich_max_sum_shift2_use1(i32 %x) { ; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 30 ; CHECK-NEXT: call void @use(i32 [[SH]]) ; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X]], 31 -; CHECK-NEXT: [[R1:%.*]] = trunc i32 [[SUM_SHIFT]] to i12 +; CHECK-NEXT: [[R1:%.*]] = trunc nuw nsw i32 [[SUM_SHIFT]] to i12 ; CHECK-NEXT: ret i12 [[R1]] ; %sh = lshr i32 %x, 30 diff --git a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll index 58174f21f767f..866381ff2887f 100644 --- a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll +++ b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll @@ -320,7 +320,7 @@ define void @pr46688(i1 %cond, i32 %x, i16 %d, ptr %p1, ptr %p2) { ; CHECK-NEXT: [[THR1_PN:%.*]] = lshr i32 [[THR_PN]], [[X]] ; CHECK-NEXT: [[THR2_PN:%.*]] = lshr i32 [[THR1_PN]], [[X]] ; CHECK-NEXT: [[STOREMERGE:%.*]] = lshr i32 [[THR2_PN]], [[X]] -; CHECK-NEXT: [[STOREMERGE1:%.*]] = trunc i32 [[STOREMERGE]] to i16 +; CHECK-NEXT: [[STOREMERGE1:%.*]] = trunc nuw i32 [[STOREMERGE]] to i16 ; CHECK-NEXT: store i16 [[STOREMERGE1]], ptr [[P1:%.*]], align 2 ; CHECK-NEXT: store i32 [[STOREMERGE]], ptr [[P2:%.*]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InstCombine/narrow.ll b/llvm/test/Transforms/InstCombine/narrow.ll index 781974d33bf11..40229f8511f76 100644 --- a/llvm/test/Transforms/InstCombine/narrow.ll +++ b/llvm/test/Transforms/InstCombine/narrow.ll @@ -76,7 +76,7 @@ define <2 x i8> @shrink_or_vec(<2 x i16> %a) { define i31 @shrink_and(i64 %a) { ; CHECK-LABEL: @shrink_and( ; CHECK-NEXT: [[AND:%.*]] = and i64 [[A:%.*]], 42 -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[AND]] to i31 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw nsw i64 [[AND]] to i31 ; CHECK-NEXT: ret i31 [[TRUNC]] ; %and = and i64 %a, 42 diff --git a/llvm/test/Transforms/InstCombine/negated-bitmask.ll b/llvm/test/Transforms/InstCombine/negated-bitmask.ll index fe2386bd65c31..9188678186347 100644 --- a/llvm/test/Transforms/InstCombine/negated-bitmask.ll +++ b/llvm/test/Transforms/InstCombine/negated-bitmask.ll @@ -70,7 +70,7 @@ define i8 @sub_mask1_trunc_lshr(i64 %a0) { ; CHECK-LABEL: @sub_mask1_trunc_lshr( ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[A0:%.*]], 48 ; CHECK-NEXT: [[TMP2:%.*]] = ashr i64 [[TMP1]], 63 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i8 +; CHECK-NEXT: [[TMP3:%.*]] = trunc nsw i64 [[TMP2]] to i8 ; CHECK-NEXT: [[NEG:%.*]] = add nsw i8 [[TMP3]], 10 ; CHECK-NEXT: ret i8 [[NEG]] ; @@ -85,7 +85,7 @@ define i32 @sub_sext_mask1_trunc_lshr(i64 %a0) { ; CHECK-LABEL: @sub_sext_mask1_trunc_lshr( ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[A0:%.*]], 48 ; CHECK-NEXT: [[TMP2:%.*]] = ashr i64 [[TMP1]], 63 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i8 +; CHECK-NEXT: [[TMP3:%.*]] = trunc nsw i64 [[TMP2]] to i8 ; CHECK-NEXT: [[NARROW:%.*]] = add nsw i8 [[TMP3]], 10 ; CHECK-NEXT: [[NEG:%.*]] = zext i8 [[NARROW]] to i32 ; CHECK-NEXT: ret i32 [[NEG]] diff --git a/llvm/test/Transforms/InstCombine/pr34349.ll b/llvm/test/Transforms/InstCombine/pr34349.ll index ea4afaa245c66..89947650b26b4 100644 --- a/llvm/test/Transforms/InstCombine/pr34349.ll +++ b/llvm/test/Transforms/InstCombine/pr34349.ll @@ -7,7 +7,7 @@ define i8 @fast_div_201(i8 %p) { ; CHECK-NEXT: [[V3:%.*]] = zext i8 [[P:%.*]] to i16 ; CHECK-NEXT: [[V4:%.*]] = mul nuw nsw i16 [[V3]], 71 ; CHECK-NEXT: [[V5:%.*]] = lshr i16 [[V4]], 8 -; CHECK-NEXT: [[V6:%.*]] = trunc i16 [[V5]] to i8 +; CHECK-NEXT: [[V6:%.*]] = trunc nuw nsw i16 [[V5]] to i8 ; CHECK-NEXT: [[V7:%.*]] = sub i8 [[P]], [[V6]] ; CHECK-NEXT: [[V8:%.*]] = lshr i8 [[V7]], 1 ; CHECK-NEXT: [[V13:%.*]] = add nuw i8 [[V8]], [[V6]] diff --git a/llvm/test/Transforms/InstCombine/reduction-add-sext-zext-i1.ll b/llvm/test/Transforms/InstCombine/reduction-add-sext-zext-i1.ll index ad55b506a108b..b94be990199bf 100644 --- a/llvm/test/Transforms/InstCombine/reduction-add-sext-zext-i1.ll +++ b/llvm/test/Transforms/InstCombine/reduction-add-sext-zext-i1.ll @@ -53,7 +53,7 @@ define i8 @reduce_add_zext_long(<128 x i1> %x) { ; CHECK-LABEL: @reduce_add_zext_long( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <128 x i1> [[X:%.*]] to i128 ; CHECK-NEXT: [[TMP2:%.*]] = call i128 @llvm.ctpop.i128(i128 [[TMP1]]), !range [[RNG3:![0-9]+]] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i8 +; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw i128 [[TMP2]] to i8 ; CHECK-NEXT: [[RES:%.*]] = sub i8 0, [[TMP3]] ; CHECK-NEXT: ret i8 [[RES]] ; @@ -67,7 +67,7 @@ define i8 @reduce_add_zext_long_external_use(<128 x i1> %x) { ; CHECK-LABEL: @reduce_add_zext_long_external_use( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <128 x i1> [[X:%.*]] to i128 ; CHECK-NEXT: [[TMP2:%.*]] = call i128 @llvm.ctpop.i128(i128 [[TMP1]]), !range [[RNG3]] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i8 +; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw i128 [[TMP2]] to i8 ; CHECK-NEXT: [[RES:%.*]] = sub i8 0, [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <128 x i1> [[X]], i64 0 ; CHECK-NEXT: [[EXT:%.*]] = sext i1 [[TMP4]] to i8 diff --git a/llvm/test/Transforms/InstCombine/sadd_sat.ll b/llvm/test/Transforms/InstCombine/sadd_sat.ll index 5ccb6f92b6c72..1cce297122f8a 100644 --- a/llvm/test/Transforms/InstCombine/sadd_sat.ll +++ b/llvm/test/Transforms/InstCombine/sadd_sat.ll @@ -79,7 +79,7 @@ define i32 @smul_sat32(i32 %a, i32 %b) { ; CHECK-NEXT: [[ADD:%.*]] = mul nsw i64 [[CONV1]], [[CONV]] ; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647) ; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -2147483648) -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 +; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw i64 [[SPEC_STORE_SELECT8]] to i32 ; CHECK-NEXT: ret i32 [[CONV7]] ; entry: @@ -102,7 +102,7 @@ define i32 @smul_sat32_mm(i32 %a, i32 %b) { ; CHECK-NEXT: [[ADD:%.*]] = mul nsw i64 [[CONV1]], [[CONV]] ; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647) ; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -2147483648) -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 +; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw i64 [[SPEC_STORE_SELECT8]] to i32 ; CHECK-NEXT: ret i32 [[CONV7]] ; entry: @@ -295,7 +295,7 @@ define signext i4 @sadd_sat4(i4 signext %a, i4 signext %b) { ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] ; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i32 @llvm.smin.i32(i32 [[ADD]], i32 7) ; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = call i32 @llvm.smax.i32(i32 [[SPEC_STORE_SELECT]], i32 -8) -; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SPEC_STORE_SELECT10]] to i4 +; CHECK-NEXT: [[CONV9:%.*]] = trunc nsw i32 [[SPEC_STORE_SELECT10]] to i4 ; CHECK-NEXT: ret i4 [[CONV9]] ; entry: @@ -318,7 +318,7 @@ define signext i4 @ssub_sat4(i4 signext %a, i4 signext %b) { ; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]] ; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 7) ; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = call i32 @llvm.smax.i32(i32 [[SPEC_STORE_SELECT]], i32 -8) -; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SPEC_STORE_SELECT10]] to i4 +; CHECK-NEXT: [[CONV9:%.*]] = trunc nsw i32 [[SPEC_STORE_SELECT10]] to i4 ; CHECK-NEXT: ret i4 [[CONV9]] ; entry: @@ -465,7 +465,7 @@ define i32 @sadd_sat32_extrause_2(i32 %a, i32 %b) { ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]] ; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647) ; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -2147483648) -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 +; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw i64 [[SPEC_STORE_SELECT8]] to i32 ; CHECK-NEXT: call void @use64(i64 [[SPEC_STORE_SELECT]]) ; CHECK-NEXT: ret i32 [[CONV7]] ; @@ -490,7 +490,7 @@ define i32 @sadd_sat32_extrause_2_mm(i32 %a, i32 %b) { ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]] ; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647) ; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -2147483648) -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 +; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw i64 [[SPEC_STORE_SELECT8]] to i32 ; CHECK-NEXT: call void @use64(i64 [[SPEC_STORE_SELECT]]) ; CHECK-NEXT: ret i32 [[CONV7]] ; @@ -513,7 +513,7 @@ define i32 @sadd_sat32_extrause_3(i32 %a, i32 %b) { ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]] ; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647) ; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -2147483648) -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 +; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw i64 [[SPEC_STORE_SELECT8]] to i32 ; CHECK-NEXT: call void @use64(i64 [[ADD]]) ; CHECK-NEXT: ret i32 [[CONV7]] ; @@ -538,7 +538,7 @@ define i32 @sadd_sat32_extrause_3_mm(i32 %a, i32 %b) { ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]] ; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647) ; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -2147483648) -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 +; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw i64 [[SPEC_STORE_SELECT8]] to i32 ; CHECK-NEXT: call void @use64(i64 [[ADD]]) ; CHECK-NEXT: ret i32 [[CONV7]] ; @@ -561,7 +561,7 @@ define i32 @sadd_sat32_trunc(i32 %a, i32 %b) { ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]] ; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 32767) ; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -32768) -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 +; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw i64 [[SPEC_STORE_SELECT8]] to i32 ; CHECK-NEXT: ret i32 [[CONV7]] ; entry: @@ -603,7 +603,7 @@ define i8 @sadd_sat8_ext8(i8 %a, i16 %b) { ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] ; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i32 @llvm.smin.i32(i32 [[ADD]], i32 127) ; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i32 @llvm.smax.i32(i32 [[SPEC_STORE_SELECT]], i32 -128) -; CHECK-NEXT: [[CONV7:%.*]] = trunc i32 [[SPEC_STORE_SELECT8]] to i8 +; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw i32 [[SPEC_STORE_SELECT8]] to i8 ; CHECK-NEXT: ret i8 [[CONV7]] ; entry: @@ -625,7 +625,7 @@ define i32 @sadd_sat32_zext(i32 %a, i32 %b) { ; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[CONV1]], [[CONV]] ; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 2147483647) -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT]] to i32 +; CHECK-NEXT: [[CONV7:%.*]] = trunc nuw nsw i64 [[SPEC_STORE_SELECT]] to i32 ; CHECK-NEXT: ret i32 [[CONV7]] ; entry: @@ -680,7 +680,7 @@ define i32 @ashrA(i64 %a, i32 %b) { ; CHECK-LABEL: @ashrA( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw i64 [[TMP0]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP1]], i32 [[B:%.*]]) ; CHECK-NEXT: ret i32 [[TMP2]] ; @@ -698,7 +698,7 @@ define i32 @ashrB(i32 %a, i64 %b) { ; CHECK-LABEL: @ashrB( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[B:%.*]], 32 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw i64 [[TMP0]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP1]], i32 [[A:%.*]]) ; CHECK-NEXT: ret i32 [[TMP2]] ; @@ -719,8 +719,8 @@ define i32 @ashrAB(i64 %a, i64 %b) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[B:%.*]], 32 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc nuw i64 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw i64 [[TMP0]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP2]], i32 [[TMP3]]) ; CHECK-NEXT: ret i32 [[TMP4]] ; @@ -744,7 +744,7 @@ define i32 @ashrA31(i64 %a, i32 %b) { ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV]], [[CONV1]] ; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smax.i64(i64 [[ADD]], i64 -2147483648) ; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smin.i64(i64 [[SPEC_STORE_SELECT]], i64 2147483647) -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 +; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw i64 [[SPEC_STORE_SELECT8]] to i32 ; CHECK-NEXT: ret i32 [[CONV7]] ; entry: @@ -763,7 +763,7 @@ define i32 @ashrA33(i64 %a, i32 %b) { ; CHECK-LABEL: @ashrA33( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CONV:%.*]] = ashr i64 [[A:%.*]], 33 -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[CONV]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = trunc nsw i64 [[CONV]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP0]], i32 [[B:%.*]]) ; CHECK-NEXT: ret i32 [[TMP1]] ; @@ -787,7 +787,7 @@ define <2 x i8> @ashrv2i8(<2 x i16> %a, <2 x i8> %b) { ; CHECK-NEXT: [[ADD:%.*]] = add <2 x i16> [[CONV]], [[CONV1]] ; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ADD]], <2 x i16> ) ; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[SPEC_STORE_SELECT]], <2 x i16> ) -; CHECK-NEXT: [[CONV7:%.*]] = trunc <2 x i16> [[SPEC_STORE_SELECT8]] to <2 x i8> +; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw <2 x i16> [[SPEC_STORE_SELECT8]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[CONV7]] ; entry: @@ -806,7 +806,7 @@ define <2 x i8> @ashrv2i8_s(<2 x i16> %a, <2 x i8> %b) { ; CHECK-LABEL: @ashrv2i8_s( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i16> [[A:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i16> [[TMP0]] to <2 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw <2 x i16> [[TMP0]] to <2 x i8> ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[TMP1]], <2 x i8> [[B:%.*]]) ; CHECK-NEXT: ret <2 x i8> [[TMP2]] ; diff --git a/llvm/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll b/llvm/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll index 03dd6188ac039..69896f855f5f1 100644 --- a/llvm/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll +++ b/llvm/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll @@ -220,7 +220,7 @@ define i64 @test6c(i32 %x) { define i16 @test1d(i64 %x) { ; CHECK-LABEL: @test1d( ; CHECK-NEXT: [[CT:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X:%.*]], i1 false), !range [[RNG2]] -; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[CT]] to i16 +; CHECK-NEXT: [[CONV:%.*]] = trunc nuw nsw i64 [[CT]] to i16 ; CHECK-NEXT: ret i16 [[CONV]] ; %ct = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) @@ -233,7 +233,7 @@ define i16 @test1d(i64 %x) { define i32 @test2d(i64 %x) { ; CHECK-LABEL: @test2d( ; CHECK-NEXT: [[CT:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X:%.*]], i1 false), !range [[RNG2]] -; CHECK-NEXT: [[CAST:%.*]] = trunc i64 [[CT]] to i32 +; CHECK-NEXT: [[CAST:%.*]] = trunc nuw nsw i64 [[CT]] to i32 ; CHECK-NEXT: ret i32 [[CAST]] ; %ct = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) @@ -246,7 +246,7 @@ define i32 @test2d(i64 %x) { define i16 @test3d(i32 %x) { ; CHECK-LABEL: @test3d( ; CHECK-NEXT: [[CT:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range [[RNG1]] -; CHECK-NEXT: [[CAST:%.*]] = trunc i32 [[CT]] to i16 +; CHECK-NEXT: [[CAST:%.*]] = trunc nuw nsw i32 [[CT]] to i16 ; CHECK-NEXT: ret i16 [[CAST]] ; %ct = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) @@ -259,7 +259,7 @@ define i16 @test3d(i32 %x) { define i16 @test4d(i64 %x) { ; CHECK-LABEL: @test4d( ; CHECK-NEXT: [[CT:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X:%.*]], i1 false), !range [[RNG2]] -; CHECK-NEXT: [[CAST:%.*]] = trunc i64 [[CT]] to i16 +; CHECK-NEXT: [[CAST:%.*]] = trunc nuw nsw i64 [[CT]] to i16 ; CHECK-NEXT: ret i16 [[CAST]] ; %ct = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) @@ -272,7 +272,7 @@ define i16 @test4d(i64 %x) { define i32 @test5d(i64 %x) { ; CHECK-LABEL: @test5d( ; CHECK-NEXT: [[CT:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X:%.*]], i1 false), !range [[RNG2]] -; CHECK-NEXT: [[CAST:%.*]] = trunc i64 [[CT]] to i32 +; CHECK-NEXT: [[CAST:%.*]] = trunc nuw nsw i64 [[CT]] to i32 ; CHECK-NEXT: ret i32 [[CAST]] ; %ct = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) @@ -288,7 +288,7 @@ define i32 @not_op_ctlz(i64 %x) { ; CHECK-LABEL: @not_op_ctlz( ; CHECK-NEXT: [[N:%.*]] = xor i64 [[X:%.*]], -1 ; CHECK-NEXT: [[CT:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[N]], i1 false), !range [[RNG2]] -; CHECK-NEXT: [[CAST:%.*]] = trunc i64 [[CT]] to i32 +; CHECK-NEXT: [[CAST:%.*]] = trunc nuw nsw i64 [[CT]] to i32 ; CHECK-NEXT: ret i32 [[CAST]] ; %n = xor i64 %x, -1 @@ -303,7 +303,7 @@ define i32 @not_op_cttz(i64 %x) { ; CHECK-LABEL: @not_op_cttz( ; CHECK-NEXT: [[N:%.*]] = xor i64 [[X:%.*]], -1 ; CHECK-NEXT: [[CT:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[N]], i1 false), !range [[RNG2]] -; CHECK-NEXT: [[CAST:%.*]] = trunc i64 [[CT]] to i32 +; CHECK-NEXT: [[CAST:%.*]] = trunc nuw nsw i64 [[CT]] to i32 ; CHECK-NEXT: ret i32 [[CAST]] ; %n = xor i64 %x, -1 @@ -320,7 +320,7 @@ define i32 @not_op_ctlz_wrong_xor_op1(i64 %x) { ; CHECK-LABEL: @not_op_ctlz_wrong_xor_op1( ; CHECK-NEXT: [[N:%.*]] = xor i64 [[X:%.*]], -2 ; CHECK-NEXT: [[CT:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[N]], i1 true), !range [[RNG2]] -; CHECK-NEXT: [[CAST:%.*]] = trunc i64 [[CT]] to i32 +; CHECK-NEXT: [[CAST:%.*]] = trunc nuw nsw i64 [[CT]] to i32 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X]], -1 ; CHECK-NEXT: [[R:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]] ; CHECK-NEXT: ret i32 [[R]] @@ -339,7 +339,7 @@ define i32 @not_op_ctlz_wrong_xor_op0(i64 %x, i64 %y) { ; CHECK-LABEL: @not_op_ctlz_wrong_xor_op0( ; CHECK-NEXT: [[N:%.*]] = xor i64 [[Y:%.*]], -1 ; CHECK-NEXT: [[CT:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[N]], i1 true), !range [[RNG2]] -; CHECK-NEXT: [[CAST:%.*]] = trunc i64 [[CT]] to i32 +; CHECK-NEXT: [[CAST:%.*]] = trunc nuw nsw i64 [[CT]] to i32 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], -1 ; CHECK-NEXT: [[R:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]] ; CHECK-NEXT: ret i32 [[R]] @@ -358,7 +358,7 @@ define i32 @not_op_cttz_wrong_cmp(i64 %x) { ; CHECK-LABEL: @not_op_cttz_wrong_cmp( ; CHECK-NEXT: [[N:%.*]] = xor i64 [[X:%.*]], -1 ; CHECK-NEXT: [[CT:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[N]], i1 true), !range [[RNG2]] -; CHECK-NEXT: [[CAST:%.*]] = trunc i64 [[CT]] to i32 +; CHECK-NEXT: [[CAST:%.*]] = trunc nuw nsw i64 [[CT]] to i32 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X]], 0 ; CHECK-NEXT: [[R:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]] ; CHECK-NEXT: ret i32 [[R]] @@ -374,7 +374,7 @@ define i32 @not_op_cttz_wrong_cmp(i64 %x) { define i16 @test6d(i32 %x) { ; CHECK-LABEL: @test6d( ; CHECK-NEXT: [[CT:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range [[RNG1]] -; CHECK-NEXT: [[CAST:%.*]] = trunc i32 [[CT]] to i16 +; CHECK-NEXT: [[CAST:%.*]] = trunc nuw nsw i32 [[CT]] to i16 ; CHECK-NEXT: ret i16 [[CAST]] ; %ct = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) @@ -400,7 +400,7 @@ define i64 @select_bug1(i32 %x) { define i16 @select_bug2(i32 %x) { ; CHECK-LABEL: @select_bug2( ; CHECK-NEXT: [[CT:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range [[RNG1]] -; CHECK-NEXT: [[CONV:%.*]] = trunc i32 [[CT]] to i16 +; CHECK-NEXT: [[CONV:%.*]] = trunc nuw nsw i32 [[CT]] to i16 ; CHECK-NEXT: ret i16 [[CONV]] ; %ct = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) @@ -595,7 +595,7 @@ define i64 @test_multiuse_zext_undef(i32 %x, ptr %p) { define i16 @test_multiuse_trunc_def(i64 %x, ptr %p) { ; CHECK-LABEL: @test_multiuse_trunc_def( ; CHECK-NEXT: [[CT:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X:%.*]], i1 false), !range [[RNG2]] -; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[CT]] to i16 +; CHECK-NEXT: [[CONV:%.*]] = trunc nuw nsw i64 [[CT]] to i16 ; CHECK-NEXT: store i16 [[CONV]], ptr [[P:%.*]], align 2 ; CHECK-NEXT: ret i16 [[CONV]] ; @@ -610,7 +610,7 @@ define i16 @test_multiuse_trunc_def(i64 %x, ptr %p) { define i16 @test_multiuse_trunc_undef(i64 %x, ptr %p) { ; CHECK-LABEL: @test_multiuse_trunc_undef( ; CHECK-NEXT: [[CT:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X:%.*]], i1 false), !range [[RNG2]] -; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[CT]] to i16 +; CHECK-NEXT: [[CONV:%.*]] = trunc nuw nsw i64 [[CT]] to i16 ; CHECK-NEXT: store i16 [[CONV]], ptr [[P:%.*]], align 2 ; CHECK-NEXT: ret i16 [[CONV]] ; diff --git a/llvm/test/Transforms/InstCombine/select-imm-canon.ll b/llvm/test/Transforms/InstCombine/select-imm-canon.ll index cde6329fd1b27..6d57af9d939d9 100644 --- a/llvm/test/Transforms/InstCombine/select-imm-canon.ll +++ b/llvm/test/Transforms/InstCombine/select-imm-canon.ll @@ -20,7 +20,7 @@ define i8 @double(i32 %A) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 -128) ; CHECK-NEXT: [[CONV71:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP0]], i32 127) -; CHECK-NEXT: [[CONV7:%.*]] = trunc i32 [[CONV71]] to i8 +; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw i32 [[CONV71]] to i8 ; CHECK-NEXT: ret i8 [[CONV7]] ; entry: @@ -51,7 +51,7 @@ define i8 @original(i32 %A, i32 %B) { ; CHECK-LABEL: @original( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 -128) ; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 127) -; CHECK-NEXT: [[CONV7:%.*]] = trunc i32 [[SPEC_SELECT_I]] to i8 +; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw i32 [[SPEC_SELECT_I]] to i8 ; CHECK-NEXT: ret i8 [[CONV7]] ; %cmp4.i = icmp slt i32 127, %A @@ -68,7 +68,7 @@ define i8 @original_logical(i32 %A, i32 %B) { ; CHECK-LABEL: @original_logical( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 -128) ; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 127) -; CHECK-NEXT: [[CONV7:%.*]] = trunc i32 [[SPEC_SELECT_I]] to i8 +; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw i32 [[SPEC_SELECT_I]] to i8 ; CHECK-NEXT: ret i8 [[CONV7]] ; %cmp4.i = icmp slt i32 127, %A diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 05fcf66235295..bd8145ab2a35b 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -452,7 +452,7 @@ define i64 @test21(i32 %x) { define i16 @test22(i32 %x) { ; CHECK-LABEL: @test22( ; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr i32 [[X:%.*]], 31 -; CHECK-NEXT: [[RETVAL:%.*]] = trunc i32 [[X_LOBIT]] to i16 +; CHECK-NEXT: [[RETVAL:%.*]] = trunc nsw i32 [[X_LOBIT]] to i16 ; CHECK-NEXT: ret i16 [[RETVAL]] ; %t = icmp slt i32 %x, 0 diff --git a/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll b/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll index 5b9334ab93cb4..b992460d0be69 100644 --- a/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll +++ b/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll @@ -86,7 +86,7 @@ define i16 @t5_extrause(i8 %x) { ; CHECK-LABEL: @t5_extrause( ; CHECK-NEXT: [[A:%.*]] = ashr i8 [[X:%.*]], 5 ; CHECK-NEXT: call void @use8(i8 [[A]]) -; CHECK-NEXT: [[B:%.*]] = trunc i8 [[A]] to i4 +; CHECK-NEXT: [[B:%.*]] = trunc nsw i8 [[A]] to i4 ; CHECK-NEXT: call void @use4(i4 [[B]]) ; CHECK-NEXT: [[C:%.*]] = sext i8 [[A]] to i16 ; CHECK-NEXT: ret i16 [[C]] @@ -134,7 +134,7 @@ define i24 @wide_source_matching_signbits(i32 %x) { ; CHECK-LABEL: @wide_source_matching_signbits( ; CHECK-NEXT: [[M:%.*]] = and i32 [[X:%.*]], 7 ; CHECK-NEXT: [[A:%.*]] = shl nsw i32 -1, [[M]] -; CHECK-NEXT: [[C:%.*]] = trunc i32 [[A]] to i24 +; CHECK-NEXT: [[C:%.*]] = trunc nsw i32 [[A]] to i24 ; CHECK-NEXT: ret i24 [[C]] ; %m = and i32 %x, 7 @@ -194,7 +194,7 @@ define i32 @same_source_matching_signbits_extra_use(i32 %x) { ; CHECK-LABEL: @same_source_matching_signbits_extra_use( ; CHECK-NEXT: [[M:%.*]] = and i32 [[X:%.*]], 7 ; CHECK-NEXT: [[A:%.*]] = shl nsw i32 -1, [[M]] -; CHECK-NEXT: [[B:%.*]] = trunc i32 [[A]] to i8 +; CHECK-NEXT: [[B:%.*]] = trunc nsw i32 [[A]] to i8 ; CHECK-NEXT: call void @use8(i8 [[B]]) ; CHECK-NEXT: ret i32 [[A]] ; diff --git a/llvm/test/Transforms/InstCombine/sext.ll b/llvm/test/Transforms/InstCombine/sext.ll index 186745362a448..e3b6058ce7f80 100644 --- a/llvm/test/Transforms/InstCombine/sext.ll +++ b/llvm/test/Transforms/InstCombine/sext.ll @@ -385,7 +385,7 @@ define i16 @smear_set_bit_different_dest_type(i32 %x) { ; CHECK-LABEL: @smear_set_bit_different_dest_type( ; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 24 ; CHECK-NEXT: [[TMP2:%.*]] = ashr i32 [[TMP1]], 31 -; CHECK-NEXT: [[S:%.*]] = trunc i32 [[TMP2]] to i16 +; CHECK-NEXT: [[S:%.*]] = trunc nsw i32 [[TMP2]] to i16 ; CHECK-NEXT: ret i16 [[S]] ; %t = trunc i32 %x to i8 diff --git a/llvm/test/Transforms/InstCombine/shift-add.ll b/llvm/test/Transforms/InstCombine/shift-add.ll index aa3a238e0949c..7f948848844c5 100644 --- a/llvm/test/Transforms/InstCombine/shift-add.ll +++ b/llvm/test/Transforms/InstCombine/shift-add.ll @@ -742,7 +742,7 @@ define <3 x i32> @add3_i96(<3 x i32> %0, <3 x i32> %1) { ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <3 x i32> [[TMP1]], i64 2 ; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP12]] ; CHECK-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP11]], 32 -; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32 +; CHECK-NEXT: [[TMP16:%.*]] = trunc nuw nsw i64 [[TMP15]] to i32 ; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP14]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <3 x i32> poison, i32 [[ADD_NARROWED]], i64 0 ; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP11]] to i32 diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll index 60a7dce2a8753..a0a3c8edfb4b5 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll @@ -139,7 +139,7 @@ define i1 @n4(i32 %x, i32 %len) { ; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 ; CHECK-NEXT: [[T2_WIDE:%.*]] = zext nneg i32 [[T2]] to i64 ; CHECK-NEXT: [[T3:%.*]] = lshr i64 262143, [[T2_WIDE]] -; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc nuw nsw i64 [[T3]] to i32 ; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] ; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 ; CHECK-NEXT: ret i1 [[T5]] @@ -229,7 +229,7 @@ define <2 x i1> @n8_vec(<2 x i32> %x, <2 x i32> %len) { ; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], ; CHECK-NEXT: [[T2_WIDE:%.*]] = zext nneg <2 x i32> [[T2]] to <2 x i64> ; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> , [[T2_WIDE]] -; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32> +; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc nuw nsw <2 x i64> [[T3]] to <2 x i32> ; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]] ; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[T5]] diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll index 6773cbac1d1e8..84dd4c57ebc61 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll @@ -13,7 +13,7 @@ define i16 @t0(i32 %x, i16 %y) { ; CHECK-LABEL: @t0( ; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31 -; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16 +; CHECK-NEXT: [[T5:%.*]] = trunc nsw i32 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[T5]] ; %t0 = sub i16 32, %y @@ -30,7 +30,7 @@ define i16 @t0(i32 %x, i16 %y) { define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16> +; CHECK-NEXT: [[T5:%.*]] = trunc nsw <2 x i32> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[T5]] ; %t0 = sub <2 x i16> , %y @@ -100,7 +100,7 @@ define i16 @t6_extrause0(i32 %x, i16 %y) { ; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 ; CHECK-NEXT: call void @use16(i16 [[T3]]) ; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X]], 31 -; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16 +; CHECK-NEXT: [[T5:%.*]] = trunc nsw i32 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[T5]] ; %t0 = sub i16 32, %y @@ -118,7 +118,7 @@ define i16 @t7_extrause1(i32 %x, i16 %y) { ; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y:%.*]], -1 ; CHECK-NEXT: call void @use16(i16 [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31 -; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16 +; CHECK-NEXT: [[T5:%.*]] = trunc nsw i32 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[T5]] ; %t0 = sub i16 32, %y diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll index 63099a8af81f6..214ec88d2e551 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll @@ -13,7 +13,7 @@ define i16 @t0(i32 %x, i16 %y) { ; CHECK-LABEL: @t0( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 31 -; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16 +; CHECK-NEXT: [[T5:%.*]] = trunc nuw nsw i32 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[T5]] ; %t0 = sub i16 32, %y @@ -30,7 +30,7 @@ define i16 @t0(i32 %x, i16 %y) { define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16> +; CHECK-NEXT: [[T5:%.*]] = trunc nuw nsw <2 x i32> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[T5]] ; %t0 = sub <2 x i16> , %y @@ -100,7 +100,7 @@ define i16 @t6_extrause0(i32 %x, i16 %y) { ; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 ; CHECK-NEXT: call void @use16(i16 [[T3]]) ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X]], 31 -; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16 +; CHECK-NEXT: [[T5:%.*]] = trunc nuw nsw i32 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[T5]] ; %t0 = sub i16 32, %y @@ -118,7 +118,7 @@ define i16 @t7_extrause1(i32 %x, i16 %y) { ; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y:%.*]], -1 ; CHECK-NEXT: call void @use16(i16 [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 31 -; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16 +; CHECK-NEXT: [[T5:%.*]] = trunc nuw nsw i32 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[T5]] ; %t0 = sub i16 32, %y diff --git a/llvm/test/Transforms/InstCombine/shift-shift.ll b/llvm/test/Transforms/InstCombine/shift-shift.ll index 8a40863300d45..7c35718601ba7 100644 --- a/llvm/test/Transforms/InstCombine/shift-shift.ll +++ b/llvm/test/Transforms/InstCombine/shift-shift.ll @@ -166,7 +166,7 @@ define i8 @shl_trunc_smaller_lshr(i32 %x) { define i24 @shl_trunc_bigger_ashr(i32 %x) { ; CHECK-LABEL: @shl_trunc_bigger_ashr( ; CHECK-NEXT: [[SH_DIFF:%.*]] = ashr i32 [[X:%.*]], 9 -; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = trunc i32 [[SH_DIFF]] to i24 +; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = trunc nsw i32 [[SH_DIFF]] to i24 ; CHECK-NEXT: [[LT:%.*]] = and i24 [[TR_SH_DIFF]], -8 ; CHECK-NEXT: ret i24 [[LT]] ; @@ -502,7 +502,7 @@ define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5_undef_left( ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> , [[X:%.*]] ; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], -; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6> +; CHECK-NEXT: [[R:%.*]] = trunc nuw <2 x i8> [[LSHR]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] ; %shl = shl <2 x i8> , %x ; 0b1001_0100 @@ -561,7 +561,7 @@ define <2 x i6> @shl_lshr_demand5_nonuniform_vec_both(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_both( ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> , [[X:%.*]] ; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], -; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6> +; CHECK-NEXT: [[R:%.*]] = trunc nuw <2 x i8> [[LSHR]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] ; %shl = shl <2 x i8> , %x ; 0b1001_1000, 0b1001_0100 diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index bef7fc81a7d1f..bb8661919c89f 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -423,7 +423,7 @@ define i32 @test29(i64 %d18) { ; CHECK-LABEL: @test29( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i64 [[D18:%.*]], 63 -; CHECK-NEXT: [[I101:%.*]] = trunc i64 [[SUM_SHIFT]] to i32 +; CHECK-NEXT: [[I101:%.*]] = trunc nuw nsw i64 [[SUM_SHIFT]] to i32 ; CHECK-NEXT: ret i32 [[I101]] ; entry: @@ -437,7 +437,7 @@ define <2 x i32> @test29_uniform(<2 x i64> %d18) { ; CHECK-LABEL: @test29_uniform( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <2 x i64> [[D18:%.*]], -; CHECK-NEXT: [[I101:%.*]] = trunc <2 x i64> [[SUM_SHIFT]] to <2 x i32> +; CHECK-NEXT: [[I101:%.*]] = trunc nuw nsw <2 x i64> [[SUM_SHIFT]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[I101]] ; entry: @@ -466,7 +466,7 @@ define <2 x i32> @test29_poison(<2 x i64> %d18) { ; CHECK-LABEL: @test29_poison( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I916:%.*]] = lshr <2 x i64> [[D18:%.*]], -; CHECK-NEXT: [[I917:%.*]] = trunc <2 x i64> [[I916]] to <2 x i32> +; CHECK-NEXT: [[I917:%.*]] = trunc nuw <2 x i64> [[I916]] to <2 x i32> ; CHECK-NEXT: [[I10:%.*]] = lshr <2 x i32> [[I917]], ; CHECK-NEXT: ret <2 x i32> [[I10]] ; diff --git a/llvm/test/Transforms/InstCombine/shl-demand.ll b/llvm/test/Transforms/InstCombine/shl-demand.ll index 26175ebbe1535..08e6e74581848 100644 --- a/llvm/test/Transforms/InstCombine/shl-demand.ll +++ b/llvm/test/Transforms/InstCombine/shl-demand.ll @@ -222,7 +222,7 @@ define i8 @must_drop_poison(i32 %x, i32 %y) { define i32 @f_t15_t01_t09(i40 %t2) { ; CHECK-LABEL: @f_t15_t01_t09( ; CHECK-NEXT: [[SH_DIFF:%.*]] = ashr i40 [[T2:%.*]], 15 -; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = trunc i40 [[SH_DIFF]] to i32 +; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = trunc nsw i40 [[SH_DIFF]] to i32 ; CHECK-NEXT: [[SHL1:%.*]] = and i32 [[TR_SH_DIFF]], -65536 ; CHECK-NEXT: ret i32 [[SHL1]] ; diff --git a/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll b/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll index d3ac6cfa9c601..30fad66bf5218 100644 --- a/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll +++ b/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll @@ -322,7 +322,7 @@ define i1 @unsigned_sign_bit_extract_with_trunc_extrause(i64 %x) { ; CHECK-LABEL: @unsigned_sign_bit_extract_with_trunc_extrause( ; CHECK-NEXT: [[SIGNBIT:%.*]] = lshr i64 [[X:%.*]], 63 ; CHECK-NEXT: call void @use64(i64 [[SIGNBIT]]) -; CHECK-NEXT: [[SIGNBIT_NARROW:%.*]] = trunc i64 [[SIGNBIT]] to i32 +; CHECK-NEXT: [[SIGNBIT_NARROW:%.*]] = trunc nuw nsw i64 [[SIGNBIT]] to i32 ; CHECK-NEXT: call void @use32(i32 [[SIGNBIT_NARROW]]) ; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i64 [[X]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] @@ -348,7 +348,7 @@ define i1 @signed_sign_bit_extract_trunc_extrause(i64 %x) { ; CHECK-LABEL: @signed_sign_bit_extract_trunc_extrause( ; CHECK-NEXT: [[SIGNSMEAR:%.*]] = ashr i64 [[X:%.*]], 63 ; CHECK-NEXT: call void @use64(i64 [[SIGNSMEAR]]) -; CHECK-NEXT: [[SIGNSMEAR_NARROW:%.*]] = trunc i64 [[SIGNSMEAR]] to i32 +; CHECK-NEXT: [[SIGNSMEAR_NARROW:%.*]] = trunc nsw i64 [[SIGNSMEAR]] to i32 ; CHECK-NEXT: call void @use32(i32 [[SIGNSMEAR_NARROW]]) ; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i64 [[X]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] diff --git a/llvm/test/Transforms/InstCombine/trunc-demand.ll b/llvm/test/Transforms/InstCombine/trunc-demand.ll index 4f6e79285eaa8..9d7bf589268e2 100644 --- a/llvm/test/Transforms/InstCombine/trunc-demand.ll +++ b/llvm/test/Transforms/InstCombine/trunc-demand.ll @@ -36,7 +36,7 @@ define i6 @trunc_lshr_exact_mask(i8 %x) { define i6 @trunc_lshr_big_mask(i8 %x) { ; CHECK-LABEL: @trunc_lshr_big_mask( ; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2 -; CHECK-NEXT: [[T:%.*]] = trunc i8 [[S]] to i6 +; CHECK-NEXT: [[T:%.*]] = trunc nuw i8 [[S]] to i6 ; CHECK-NEXT: [[R:%.*]] = and i6 [[T]], 31 ; CHECK-NEXT: ret i6 [[R]] ; @@ -52,7 +52,7 @@ define i6 @trunc_lshr_use1(i8 %x) { ; CHECK-LABEL: @trunc_lshr_use1( ; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2 ; CHECK-NEXT: call void @use8(i8 [[S]]) -; CHECK-NEXT: [[T:%.*]] = trunc i8 [[S]] to i6 +; CHECK-NEXT: [[T:%.*]] = trunc nuw i8 [[S]] to i6 ; CHECK-NEXT: [[R:%.*]] = and i6 [[T]], 15 ; CHECK-NEXT: ret i6 [[R]] ; @@ -68,7 +68,7 @@ define i6 @trunc_lshr_use1(i8 %x) { define i6 @trunc_lshr_use2(i8 %x) { ; CHECK-LABEL: @trunc_lshr_use2( ; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2 -; CHECK-NEXT: [[T:%.*]] = trunc i8 [[S]] to i6 +; CHECK-NEXT: [[T:%.*]] = trunc nuw i8 [[S]] to i6 ; CHECK-NEXT: call void @use6(i6 [[T]]) ; CHECK-NEXT: [[R:%.*]] = and i6 [[T]], 15 ; CHECK-NEXT: ret i6 [[R]] @@ -157,7 +157,7 @@ define i6 @or_trunc_lshr_more(i8 %x) { define i6 @or_trunc_lshr_small_mask(i8 %x) { ; CHECK-LABEL: @or_trunc_lshr_small_mask( ; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 4 -; CHECK-NEXT: [[T:%.*]] = trunc i8 [[S]] to i6 +; CHECK-NEXT: [[T:%.*]] = trunc nuw nsw i8 [[S]] to i6 ; CHECK-NEXT: [[R:%.*]] = or i6 [[T]], -8 ; CHECK-NEXT: ret i6 [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll b/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll index 87c90bb91f39e..4c857125365a9 100644 --- a/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll @@ -171,7 +171,7 @@ define i32 @test5(i32 %A) { define i32 @test6(i64 %A) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A:%.*]], 32 -; CHECK-NEXT: [[D:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[D:%.*]] = trunc nuw i64 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[D]] ; %B = zext i64 %A to i128 @@ -459,7 +459,7 @@ define <2 x i64> @test12_vec_undef(<2 x i32> %A, <2 x i32> %B) { ; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i128> ; CHECK-NEXT: [[E:%.*]] = and <2 x i128> [[D]], ; CHECK-NEXT: [[F:%.*]] = lshr <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64> +; CHECK-NEXT: [[G:%.*]] = trunc nuw nsw <2 x i128> [[F]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[G]] ; %C = zext <2 x i32> %A to <2 x i128> @@ -524,7 +524,7 @@ define <2 x i64> @test13_vec_undef(<2 x i32> %A, <2 x i32> %B) { ; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i128> ; CHECK-NEXT: [[E:%.*]] = and <2 x i128> [[D]], ; CHECK-NEXT: [[F:%.*]] = ashr <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64> +; CHECK-NEXT: [[G:%.*]] = trunc nsw <2 x i128> [[F]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[G]] ; %C = sext <2 x i32> %A to <2 x i128> diff --git a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll index e578b604c9d6a..2c5f428cf98de 100644 --- a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll +++ b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll @@ -72,7 +72,7 @@ define i8 @trunc_lshr_trunc_outofrange(i64 %a) { ; CHECK-LABEL: @trunc_lshr_trunc_outofrange( ; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 ; CHECK-NEXT: [[C:%.*]] = lshr i32 [[B]], 25 -; CHECK-NEXT: [[D:%.*]] = trunc i32 [[C]] to i8 +; CHECK-NEXT: [[D:%.*]] = trunc nuw nsw i32 [[C]] to i8 ; CHECK-NEXT: ret i8 [[D]] ; %b = trunc i64 %a to i32 @@ -158,7 +158,7 @@ define i8 @trunc_ashr_trunc_outofrange(i64 %a) { ; CHECK-LABEL: @trunc_ashr_trunc_outofrange( ; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 ; CHECK-NEXT: [[C:%.*]] = ashr i32 [[B]], 25 -; CHECK-NEXT: [[D:%.*]] = trunc i32 [[C]] to i8 +; CHECK-NEXT: [[D:%.*]] = trunc nsw i32 [[C]] to i8 ; CHECK-NEXT: ret i8 [[D]] ; %b = trunc i64 %a to i32 diff --git a/llvm/test/Transforms/InstCombine/trunc.ll b/llvm/test/Transforms/InstCombine/trunc.ll index 760825d6b1da0..c77d7269f2cf7 100644 --- a/llvm/test/Transforms/InstCombine/trunc.ll +++ b/llvm/test/Transforms/InstCombine/trunc.ll @@ -171,7 +171,7 @@ define i32 @test5(i32 %A) { define i32 @test6(i64 %A) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A:%.*]], 32 -; CHECK-NEXT: [[D:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[D:%.*]] = trunc nuw i64 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[D]] ; %B = zext i64 %A to i128 @@ -459,7 +459,7 @@ define <2 x i64> @test12_vec_undef(<2 x i32> %A, <2 x i32> %B) { ; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i128> ; CHECK-NEXT: [[E:%.*]] = and <2 x i128> [[D]], ; CHECK-NEXT: [[F:%.*]] = lshr <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64> +; CHECK-NEXT: [[G:%.*]] = trunc nuw nsw <2 x i128> [[F]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[G]] ; %C = zext <2 x i32> %A to <2 x i128> @@ -524,7 +524,7 @@ define <2 x i64> @test13_vec_undef(<2 x i32> %A, <2 x i32> %B) { ; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i128> ; CHECK-NEXT: [[E:%.*]] = and <2 x i128> [[D]], ; CHECK-NEXT: [[F:%.*]] = ashr <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64> +; CHECK-NEXT: [[G:%.*]] = trunc nsw <2 x i128> [[F]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[G]] ; %C = sext <2 x i32> %A to <2 x i128> diff --git a/llvm/test/Transforms/InstCombine/truncating-saturate.ll b/llvm/test/Transforms/InstCombine/truncating-saturate.ll index e4df94afd1741..c0111528e2a4d 100644 --- a/llvm/test/Transforms/InstCombine/truncating-saturate.ll +++ b/llvm/test/Transforms/InstCombine/truncating-saturate.ll @@ -10,7 +10,7 @@ define i8 @testi16i8(i16 %add) { ; CHECK-LABEL: @testi16i8( ; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[ADD:%.*]], i16 -128) ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP1]], i16 127) -; CHECK-NEXT: [[COND_I:%.*]] = trunc i16 [[TMP2]] to i8 +; CHECK-NEXT: [[COND_I:%.*]] = trunc nsw i16 [[TMP2]] to i8 ; CHECK-NEXT: ret i8 [[COND_I]] ; %sh = lshr i16 %add, 8 @@ -29,7 +29,7 @@ define i32 @testi64i32(i64 %add) { ; CHECK-LABEL: @testi64i32( ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.smax.i64(i64 [[ADD:%.*]], i64 -2147483648) ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP1]], i64 2147483647) -; CHECK-NEXT: [[COND_I:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[COND_I:%.*]] = trunc nsw i64 [[TMP2]] to i32 ; CHECK-NEXT: ret i32 [[COND_I]] ; %sh = lshr i64 %add, 32 @@ -48,7 +48,7 @@ define i16 @testi32i16i8(i32 %add) { ; CHECK-LABEL: @testi32i16i8( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[ADD:%.*]], i32 -128) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 127) -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[TMP2]] to i16 +; CHECK-NEXT: [[R:%.*]] = trunc nsw i32 [[TMP2]] to i16 ; CHECK-NEXT: ret i16 [[R]] ; %a = add i32 %add, 128 @@ -64,7 +64,7 @@ define <4 x i16> @testv4i32i16i8(<4 x i32> %add) { ; CHECK-LABEL: @testv4i32i16i8( ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[ADD:%.*]], <4 x i32> ) ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) -; CHECK-NEXT: [[R:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[R:%.*]] = trunc nsw <4 x i32> [[TMP2]] to <4 x i16> ; CHECK-NEXT: ret <4 x i16> [[R]] ; %a = add <4 x i32> %add, @@ -149,7 +149,7 @@ define <4 x i8> @testv4i16i8(<4 x i16> %add) { ; CHECK-LABEL: @testv4i16i8( ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[ADD:%.*]], <4 x i16> ) ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) -; CHECK-NEXT: [[COND_I:%.*]] = trunc <4 x i16> [[TMP2]] to <4 x i8> +; CHECK-NEXT: [[COND_I:%.*]] = trunc nsw <4 x i16> [[TMP2]] to <4 x i8> ; CHECK-NEXT: ret <4 x i8> [[COND_I]] ; %sh = lshr <4 x i16> %add, @@ -188,7 +188,7 @@ define i8 @testi16i8_revcmp(i16 %add) { ; CHECK-LABEL: @testi16i8_revcmp( ; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[ADD:%.*]], i16 -128) ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP1]], i16 127) -; CHECK-NEXT: [[COND_I:%.*]] = trunc i16 [[TMP2]] to i8 +; CHECK-NEXT: [[COND_I:%.*]] = trunc nsw i16 [[TMP2]] to i8 ; CHECK-NEXT: ret i8 [[COND_I]] ; %sh = lshr i16 %add, 8 @@ -207,7 +207,7 @@ define i8 @testi16i8_revselect(i16 %add) { ; CHECK-LABEL: @testi16i8_revselect( ; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[ADD:%.*]], i16 -128) ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP1]], i16 127) -; CHECK-NEXT: [[COND_I:%.*]] = trunc i16 [[TMP2]] to i8 +; CHECK-NEXT: [[COND_I:%.*]] = trunc nsw i16 [[TMP2]] to i8 ; CHECK-NEXT: ret i8 [[COND_I]] ; %sh = lshr i16 %add, 8 @@ -268,7 +268,7 @@ define i16 @differentconsts(i32 %x, i16 %replacement_low, i16 %replacement_high) define i8 @badimm1(i16 %add) { ; CHECK-LABEL: @badimm1( ; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[ADD:%.*]], 9 -; CHECK-NEXT: [[CONV_I:%.*]] = trunc i16 [[SH]] to i8 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc nuw nsw i16 [[SH]] to i8 ; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8 ; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7 ; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]] @@ -292,7 +292,7 @@ define i8 @badimm1(i16 %add) { define i8 @badimm2(i16 %add) { ; CHECK-LABEL: @badimm2( ; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8 -; CHECK-NEXT: [[CONV_I:%.*]] = trunc i16 [[SH]] to i8 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc nuw i16 [[SH]] to i8 ; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8 ; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 6 ; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]] @@ -319,7 +319,7 @@ define i8 @badimm3(i16 %add) { ; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[ADD]], 128 ; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult i16 [[TMP1]], 256 ; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i16 [[ADD]], 14 -; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc nsw i16 [[SHR4_I]] to i8 ; CHECK-NEXT: [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127 ; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]] ; CHECK-NEXT: ret i8 [[COND_I]] diff --git a/llvm/test/Transforms/InstCombine/vector-trunc.ll b/llvm/test/Transforms/InstCombine/vector-trunc.ll index eeb5a3fdb7398..bccb12e66eba1 100644 --- a/llvm/test/Transforms/InstCombine/vector-trunc.ll +++ b/llvm/test/Transforms/InstCombine/vector-trunc.ll @@ -4,7 +4,7 @@ define <4 x i16> @trunc_add_nsw(<4 x i32> %0) { ; CHECK-LABEL: @trunc_add_nsw( ; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = trunc nsw <4 x i32> [[TMP2]] to <4 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[TMP3]], ; CHECK-NEXT: ret <4 x i16> [[TMP4]] ; @@ -17,7 +17,7 @@ define <4 x i16> @trunc_add_nsw(<4 x i32> %0) { define <4 x i16> @trunc_add_no_nsw(<4 x i32> %0) { ; CHECK-LABEL: @trunc_add_no_nsw( ; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw <4 x i32> [[TMP2]] to <4 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP3]], ; CHECK-NEXT: ret <4 x i16> [[TMP4]] ; diff --git a/llvm/test/Transforms/InstCombine/xor-ashr.ll b/llvm/test/Transforms/InstCombine/xor-ashr.ll index 32ca8e338c2cc..097e04b3b9cb5 100644 --- a/llvm/test/Transforms/InstCombine/xor-ashr.ll +++ b/llvm/test/Transforms/InstCombine/xor-ashr.ll @@ -80,7 +80,7 @@ define <4 x i8> @testv4i16i8_undef(<4 x i16> %add) { define i8 @wrongimm(i16 %add) { ; CHECK-LABEL: @wrongimm( ; CHECK-NEXT: [[SH:%.*]] = ashr i16 [[ADD:%.*]], 14 -; CHECK-NEXT: [[T:%.*]] = trunc i16 [[SH]] to i8 +; CHECK-NEXT: [[T:%.*]] = trunc nsw i16 [[SH]] to i8 ; CHECK-NEXT: [[X:%.*]] = xor i8 [[T]], 27 ; CHECK-NEXT: ret i8 [[X]] ; @@ -140,7 +140,7 @@ define i16 @extrause_trunc1(i32 %add) { define i16 @extrause_trunc2(i32 %add) { ; CHECK-LABEL: @extrause_trunc2( ; CHECK-NEXT: [[SH:%.*]] = ashr i32 [[ADD:%.*]], 31 -; CHECK-NEXT: [[T:%.*]] = trunc i32 [[SH]] to i16 +; CHECK-NEXT: [[T:%.*]] = trunc nsw i32 [[SH]] to i16 ; CHECK-NEXT: call void @use16(i16 [[T]]) ; CHECK-NEXT: [[X:%.*]] = xor i16 [[T]], 127 ; CHECK-NEXT: ret i16 [[X]] diff --git a/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll b/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll index f082873bf7839..c8eb513a8440b 100644 --- a/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll +++ b/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll @@ -57,7 +57,7 @@ define <2 x i17> @trunc_ctlz_zext_v2i17_v2i32_multiple_uses(<2 x i17> %x) { ; CHECK-LABEL: @trunc_ctlz_zext_v2i17_v2i32_multiple_uses( ; CHECK-NEXT: [[Z:%.*]] = zext <2 x i17> [[X:%.*]] to <2 x i32> ; CHECK-NEXT: [[P:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[Z]], i1 false), !range [[RNG2:![0-9]+]] -; CHECK-NEXT: [[ZZ:%.*]] = trunc <2 x i32> [[P]] to <2 x i17> +; CHECK-NEXT: [[ZZ:%.*]] = trunc nuw nsw <2 x i32> [[P]] to <2 x i17> ; CHECK-NEXT: call void @use(<2 x i32> [[P]]) ; CHECK-NEXT: ret <2 x i17> [[ZZ]] ; @@ -91,7 +91,7 @@ define i16 @trunc_ctlz_zext_i10_i32(i10 %x) { ; CHECK-LABEL: @trunc_ctlz_zext_i10_i32( ; CHECK-NEXT: [[Z:%.*]] = zext i10 [[X:%.*]] to i32 ; CHECK-NEXT: [[P:%.*]] = call i32 @llvm.ctlz.i32(i32 [[Z]], i1 false), !range [[RNG3:![0-9]+]] -; CHECK-NEXT: [[ZZ:%.*]] = trunc i32 [[P]] to i16 +; CHECK-NEXT: [[ZZ:%.*]] = trunc nuw nsw i32 [[P]] to i16 ; CHECK-NEXT: ret i16 [[ZZ]] ; %z = zext i10 %x to i32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll index 9a7b9f570cf77..ed8d8e15282d5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll @@ -34,14 +34,14 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur ; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16> ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw <16 x i16> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i16> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i16> [[TMP6]] to <16 x i8> +; CHECK-NEXT: [[TMP7:%.*]] = trunc nuw <16 x i16> [[TMP6]] to <16 x i8> ; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i16> ; CHECK-NEXT: [[TMP10:%.*]] = mul nuw <16 x i16> [[TMP9]], [[TMP4]] ; CHECK-NEXT: [[TMP11:%.*]] = lshr <16 x i16> [[TMP10]], -; CHECK-NEXT: [[TMP12:%.*]] = trunc <16 x i16> [[TMP11]] to <16 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = trunc nuw <16 x i16> [[TMP11]] to <16 x i8> ; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -67,14 +67,14 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur ; CHECK-NEXT: [[TMP17:%.*]] = zext <8 x i8> [[WIDE_LOAD8]] to <8 x i16> ; CHECK-NEXT: [[TMP18:%.*]] = mul nuw <8 x i16> [[TMP16]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], -; CHECK-NEXT: [[TMP20:%.*]] = trunc <8 x i16> [[TMP19]] to <8 x i8> +; CHECK-NEXT: [[TMP20:%.*]] = trunc nuw <8 x i16> [[TMP19]] to <8 x i8> ; CHECK-NEXT: store <8 x i8> [[TMP20]], ptr [[TMP15]], align 1 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX7]] ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i8>, ptr [[TMP21]], align 1 ; CHECK-NEXT: [[TMP22:%.*]] = zext <8 x i8> [[WIDE_LOAD10]] to <8 x i16> ; CHECK-NEXT: [[TMP23:%.*]] = mul nuw <8 x i16> [[TMP22]], [[TMP17]] ; CHECK-NEXT: [[TMP24:%.*]] = lshr <8 x i16> [[TMP23]], -; CHECK-NEXT: [[TMP25:%.*]] = trunc <8 x i16> [[TMP24]] to <8 x i8> +; CHECK-NEXT: [[TMP25:%.*]] = trunc nuw <8 x i16> [[TMP24]] to <8 x i8> ; CHECK-NEXT: store <8 x i8> [[TMP25]], ptr [[TMP21]], align 1 ; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX7]], 8 ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC5]] @@ -99,14 +99,14 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur ; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[TMP28]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV3]], [[CONV]] ; CHECK-NEXT: [[SHR_26:%.*]] = lshr i32 [[MUL]], 8 -; CHECK-NEXT: [[CONV4:%.*]] = trunc i32 [[SHR_26]] to i8 +; CHECK-NEXT: [[CONV4:%.*]] = trunc nuw i32 [[SHR_26]] to i8 ; CHECK-NEXT: store i8 [[CONV4]], ptr [[ARRAYIDX2]], align 1 ; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP29:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1 ; CHECK-NEXT: [[CONV9:%.*]] = zext i8 [[TMP29]] to i32 ; CHECK-NEXT: [[MUL10:%.*]] = mul nuw nsw i32 [[CONV9]], [[CONV]] ; CHECK-NEXT: [[SHR11_27:%.*]] = lshr i32 [[MUL10]], 8 -; CHECK-NEXT: [[CONV12:%.*]] = trunc i32 [[SHR11_27]] to i8 +; CHECK-NEXT: [[CONV12:%.*]] = trunc nuw i32 [[SHR11_27]] to i8 ; CHECK-NEXT: store i8 [[CONV12]], ptr [[ARRAYIDX8]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 @@ -172,8 +172,8 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i ; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i16> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = lshr <16 x i16> [[TMP5]], ; CHECK-NEXT: [[TMP8:%.*]] = lshr <16 x i16> [[TMP6]], -; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i16> [[TMP7]] to <16 x i8> -; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i16> [[TMP8]] to <16 x i8> +; CHECK-NEXT: [[TMP9:%.*]] = trunc nuw <16 x i16> [[TMP7]] to <16 x i8> +; CHECK-NEXT: [[TMP10:%.*]] = trunc nuw <16 x i16> [[TMP8]] to <16 x i8> ; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll index b772a3814a64a..0f26092f510ca 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll @@ -23,7 +23,7 @@ define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967280 -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST]] ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[TMP1]] @@ -138,7 +138,7 @@ define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END18:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]] ; CHECK-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]] -; CHECK-NEXT: [[DOTCAST11:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[DOTCAST11:%.*]] = trunc nuw i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[IND_END12:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST11]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 24 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 @@ -146,7 +146,7 @@ define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_VEC9:%.*]] = and i64 [[TMP0]], 4294967288 -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC9]] to i32 +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw i64 [[N_VEC9]] to i32 ; CHECK-NEXT: [[IND_END10:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST]] ; CHECK-NEXT: [[IND_END14:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC9]] ; CHECK-NEXT: [[IND_END17:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC9]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll index 23b653bbda380..b5effe73fa73d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll @@ -60,7 +60,7 @@ define void @uaddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[IND_END24:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP14]] ; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[IND_END21:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP15]] -; CHECK-NEXT: [[DOTCAST17:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[DOTCAST17:%.*]] = trunc nuw i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[IND_END18:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST17]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 56 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 @@ -68,7 +68,7 @@ define void @uaddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_VEC15:%.*]] = and i64 [[TMP0]], 4294967288 -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC15]] to i32 +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw i64 [[N_VEC15]] to i32 ; CHECK-NEXT: [[IND_END16:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST]] ; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[N_VEC15]], 1 ; CHECK-NEXT: [[IND_END20:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP16]] @@ -183,7 +183,7 @@ define void @fshl(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END24:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]] ; CHECK-NEXT: [[IND_END21:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]] -; CHECK-NEXT: [[DOTCAST17:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[DOTCAST17:%.*]] = trunc nuw i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[IND_END18:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST17]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 112 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 @@ -191,7 +191,7 @@ define void @fshl(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_VEC15:%.*]] = and i64 [[TMP0]], 4294967280 -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC15]] to i32 +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw i64 [[N_VEC15]] to i32 ; CHECK-NEXT: [[IND_END16:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST]] ; CHECK-NEXT: [[IND_END20:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC15]] ; CHECK-NEXT: [[IND_END23:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC15]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll index a47a38510eeeb..b66ce4047ad95 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction.ll @@ -1205,7 +1205,7 @@ define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], 32764 -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i16 +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw nsw i32 [[N_VEC]] to i16 ; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i16 [[DOTCAST]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1283,7 +1283,7 @@ define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], 32764 -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i16 +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw nsw i32 [[N_VEC]] to i16 ; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i16 [[DOTCAST]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll index d18b207e87076..9206893cb2341 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll @@ -45,8 +45,8 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i32> [[TMP12]], [[TMP10]] ; CHECK-NEXT: [[TMP15:%.*]] = lshr <8 x i32> [[TMP13]], ; CHECK-NEXT: [[TMP16:%.*]] = lshr <8 x i32> [[TMP14]], -; CHECK-NEXT: [[TMP17:%.*]] = trunc <8 x i32> [[TMP15]] to <8 x i16> -; CHECK-NEXT: [[TMP18:%.*]] = trunc <8 x i32> [[TMP16]] to <8 x i16> +; CHECK-NEXT: [[TMP17:%.*]] = trunc nuw <8 x i32> [[TMP15]] to <8 x i16> +; CHECK-NEXT: [[TMP18:%.*]] = trunc nuw <8 x i32> [[TMP16]] to <8 x i16> ; CHECK-NEXT: [[TMP19:%.*]] = sub <8 x i16> zeroinitializer, [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = sub <8 x i16> zeroinitializer, [[TMP18]] ; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw <8 x i32> [[TMP6]], [[TMP1]] @@ -55,8 +55,8 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[TMP24:%.*]] = mul <8 x i32> [[TMP22]], [[TMP10]] ; CHECK-NEXT: [[TMP25:%.*]] = lshr <8 x i32> [[TMP23]], ; CHECK-NEXT: [[TMP26:%.*]] = lshr <8 x i32> [[TMP24]], -; CHECK-NEXT: [[TMP27:%.*]] = trunc <8 x i32> [[TMP25]] to <8 x i16> -; CHECK-NEXT: [[TMP28:%.*]] = trunc <8 x i32> [[TMP26]] to <8 x i16> +; CHECK-NEXT: [[TMP27:%.*]] = trunc nuw <8 x i32> [[TMP25]] to <8 x i16> +; CHECK-NEXT: [[TMP28:%.*]] = trunc nuw <8 x i32> [[TMP26]] to <8 x i16> ; CHECK-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[TMP27]], <8 x i16> [[TMP19]] ; CHECK-NEXT: [[PREDPHI34:%.*]] = select <8 x i1> [[TMP4]], <8 x i16> [[TMP28]], <8 x i16> [[TMP20]] ; CHECK-NEXT: store <8 x i16> [[PREDPHI]], ptr [[DCT]], align 2, !alias.scope [[META0]], !noalias [[META3]] @@ -83,13 +83,13 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV5]], [[CONV]] ; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[ADD]], [[CONV11]] ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], 16 -; CHECK-NEXT: [[CONV12:%.*]] = trunc i32 [[SHR]] to i16 +; CHECK-NEXT: [[CONV12:%.*]] = trunc nuw i32 [[SHR]] to i16 ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: ; CHECK-NEXT: [[ADD21:%.*]] = sub nsw i32 [[CONV5]], [[CONV]] ; CHECK-NEXT: [[MUL25:%.*]] = mul i32 [[ADD21]], [[CONV11]] ; CHECK-NEXT: [[SHR26:%.*]] = lshr i32 [[MUL25]], 16 -; CHECK-NEXT: [[TMP33:%.*]] = trunc i32 [[SHR26]] to i16 +; CHECK-NEXT: [[TMP33:%.*]] = trunc nuw i32 [[SHR26]] to i16 ; CHECK-NEXT: [[CONV28:%.*]] = sub i16 0, [[TMP33]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: @@ -110,14 +110,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD21_1:%.*]] = sub nsw i32 [[CONV5_1]], [[CONV_1]] ; CHECK-NEXT: [[MUL25_1:%.*]] = mul i32 [[ADD21_1]], [[CONV11_1]] ; CHECK-NEXT: [[SHR26_1:%.*]] = lshr i32 [[MUL25_1]], 16 -; CHECK-NEXT: [[TMP37:%.*]] = trunc i32 [[SHR26_1]] to i16 +; CHECK-NEXT: [[TMP37:%.*]] = trunc nuw i32 [[SHR26_1]] to i16 ; CHECK-NEXT: [[CONV28_1:%.*]] = sub i16 0, [[TMP37]] ; CHECK-NEXT: br label [[IF_END_1:%.*]] ; CHECK: if.then.1: ; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i32 [[CONV5_1]], [[CONV_1]] ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[ADD_1]], [[CONV11_1]] ; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[MUL_1]], 16 -; CHECK-NEXT: [[CONV12_1:%.*]] = trunc i32 [[SHR_1]] to i16 +; CHECK-NEXT: [[CONV12_1:%.*]] = trunc nuw i32 [[SHR_1]] to i16 ; CHECK-NEXT: br label [[IF_END_1]] ; CHECK: if.end.1: ; CHECK-NEXT: [[STOREMERGE_1:%.*]] = phi i16 [ [[CONV28_1]], [[IF_ELSE_1]] ], [ [[CONV12_1]], [[IF_THEN_1]] ] @@ -138,14 +138,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD21_2:%.*]] = sub nsw i32 [[CONV5_2]], [[CONV_2]] ; CHECK-NEXT: [[MUL25_2:%.*]] = mul i32 [[ADD21_2]], [[CONV11_2]] ; CHECK-NEXT: [[SHR26_2:%.*]] = lshr i32 [[MUL25_2]], 16 -; CHECK-NEXT: [[TMP41:%.*]] = trunc i32 [[SHR26_2]] to i16 +; CHECK-NEXT: [[TMP41:%.*]] = trunc nuw i32 [[SHR26_2]] to i16 ; CHECK-NEXT: [[CONV28_2:%.*]] = sub i16 0, [[TMP41]] ; CHECK-NEXT: br label [[IF_END_2:%.*]] ; CHECK: if.then.2: ; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i32 [[CONV5_2]], [[CONV_2]] ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[ADD_2]], [[CONV11_2]] ; CHECK-NEXT: [[SHR_2:%.*]] = lshr i32 [[MUL_2]], 16 -; CHECK-NEXT: [[CONV12_2:%.*]] = trunc i32 [[SHR_2]] to i16 +; CHECK-NEXT: [[CONV12_2:%.*]] = trunc nuw i32 [[SHR_2]] to i16 ; CHECK-NEXT: br label [[IF_END_2]] ; CHECK: if.end.2: ; CHECK-NEXT: [[STOREMERGE_2:%.*]] = phi i16 [ [[CONV28_2]], [[IF_ELSE_2]] ], [ [[CONV12_2]], [[IF_THEN_2]] ] @@ -166,14 +166,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD21_3:%.*]] = sub nsw i32 [[CONV5_3]], [[CONV_3]] ; CHECK-NEXT: [[MUL25_3:%.*]] = mul i32 [[ADD21_3]], [[CONV11_3]] ; CHECK-NEXT: [[SHR26_3:%.*]] = lshr i32 [[MUL25_3]], 16 -; CHECK-NEXT: [[TMP45:%.*]] = trunc i32 [[SHR26_3]] to i16 +; CHECK-NEXT: [[TMP45:%.*]] = trunc nuw i32 [[SHR26_3]] to i16 ; CHECK-NEXT: [[CONV28_3:%.*]] = sub i16 0, [[TMP45]] ; CHECK-NEXT: br label [[IF_END_3:%.*]] ; CHECK: if.then.3: ; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i32 [[CONV5_3]], [[CONV_3]] ; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[ADD_3]], [[CONV11_3]] ; CHECK-NEXT: [[SHR_3:%.*]] = lshr i32 [[MUL_3]], 16 -; CHECK-NEXT: [[CONV12_3:%.*]] = trunc i32 [[SHR_3]] to i16 +; CHECK-NEXT: [[CONV12_3:%.*]] = trunc nuw i32 [[SHR_3]] to i16 ; CHECK-NEXT: br label [[IF_END_3]] ; CHECK: if.end.3: ; CHECK-NEXT: [[STOREMERGE_3:%.*]] = phi i16 [ [[CONV28_3]], [[IF_ELSE_3]] ], [ [[CONV12_3]], [[IF_THEN_3]] ] @@ -194,14 +194,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD21_4:%.*]] = sub nsw i32 [[CONV5_4]], [[CONV_4]] ; CHECK-NEXT: [[MUL25_4:%.*]] = mul i32 [[ADD21_4]], [[CONV11_4]] ; CHECK-NEXT: [[SHR26_4:%.*]] = lshr i32 [[MUL25_4]], 16 -; CHECK-NEXT: [[TMP49:%.*]] = trunc i32 [[SHR26_4]] to i16 +; CHECK-NEXT: [[TMP49:%.*]] = trunc nuw i32 [[SHR26_4]] to i16 ; CHECK-NEXT: [[CONV28_4:%.*]] = sub i16 0, [[TMP49]] ; CHECK-NEXT: br label [[IF_END_4:%.*]] ; CHECK: if.then.4: ; CHECK-NEXT: [[ADD_4:%.*]] = add nuw nsw i32 [[CONV5_4]], [[CONV_4]] ; CHECK-NEXT: [[MUL_4:%.*]] = mul i32 [[ADD_4]], [[CONV11_4]] ; CHECK-NEXT: [[SHR_4:%.*]] = lshr i32 [[MUL_4]], 16 -; CHECK-NEXT: [[CONV12_4:%.*]] = trunc i32 [[SHR_4]] to i16 +; CHECK-NEXT: [[CONV12_4:%.*]] = trunc nuw i32 [[SHR_4]] to i16 ; CHECK-NEXT: br label [[IF_END_4]] ; CHECK: if.end.4: ; CHECK-NEXT: [[STOREMERGE_4:%.*]] = phi i16 [ [[CONV28_4]], [[IF_ELSE_4]] ], [ [[CONV12_4]], [[IF_THEN_4]] ] @@ -222,14 +222,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD21_5:%.*]] = sub nsw i32 [[CONV5_5]], [[CONV_5]] ; CHECK-NEXT: [[MUL25_5:%.*]] = mul i32 [[ADD21_5]], [[CONV11_5]] ; CHECK-NEXT: [[SHR26_5:%.*]] = lshr i32 [[MUL25_5]], 16 -; CHECK-NEXT: [[TMP53:%.*]] = trunc i32 [[SHR26_5]] to i16 +; CHECK-NEXT: [[TMP53:%.*]] = trunc nuw i32 [[SHR26_5]] to i16 ; CHECK-NEXT: [[CONV28_5:%.*]] = sub i16 0, [[TMP53]] ; CHECK-NEXT: br label [[IF_END_5:%.*]] ; CHECK: if.then.5: ; CHECK-NEXT: [[ADD_5:%.*]] = add nuw nsw i32 [[CONV5_5]], [[CONV_5]] ; CHECK-NEXT: [[MUL_5:%.*]] = mul i32 [[ADD_5]], [[CONV11_5]] ; CHECK-NEXT: [[SHR_5:%.*]] = lshr i32 [[MUL_5]], 16 -; CHECK-NEXT: [[CONV12_5:%.*]] = trunc i32 [[SHR_5]] to i16 +; CHECK-NEXT: [[CONV12_5:%.*]] = trunc nuw i32 [[SHR_5]] to i16 ; CHECK-NEXT: br label [[IF_END_5]] ; CHECK: if.end.5: ; CHECK-NEXT: [[STOREMERGE_5:%.*]] = phi i16 [ [[CONV28_5]], [[IF_ELSE_5]] ], [ [[CONV12_5]], [[IF_THEN_5]] ] @@ -250,14 +250,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD21_6:%.*]] = sub nsw i32 [[CONV5_6]], [[CONV_6]] ; CHECK-NEXT: [[MUL25_6:%.*]] = mul i32 [[ADD21_6]], [[CONV11_6]] ; CHECK-NEXT: [[SHR26_6:%.*]] = lshr i32 [[MUL25_6]], 16 -; CHECK-NEXT: [[TMP57:%.*]] = trunc i32 [[SHR26_6]] to i16 +; CHECK-NEXT: [[TMP57:%.*]] = trunc nuw i32 [[SHR26_6]] to i16 ; CHECK-NEXT: [[CONV28_6:%.*]] = sub i16 0, [[TMP57]] ; CHECK-NEXT: br label [[IF_END_6:%.*]] ; CHECK: if.then.6: ; CHECK-NEXT: [[ADD_6:%.*]] = add nuw nsw i32 [[CONV5_6]], [[CONV_6]] ; CHECK-NEXT: [[MUL_6:%.*]] = mul i32 [[ADD_6]], [[CONV11_6]] ; CHECK-NEXT: [[SHR_6:%.*]] = lshr i32 [[MUL_6]], 16 -; CHECK-NEXT: [[CONV12_6:%.*]] = trunc i32 [[SHR_6]] to i16 +; CHECK-NEXT: [[CONV12_6:%.*]] = trunc nuw i32 [[SHR_6]] to i16 ; CHECK-NEXT: br label [[IF_END_6]] ; CHECK: if.end.6: ; CHECK-NEXT: [[STOREMERGE_6:%.*]] = phi i16 [ [[CONV28_6]], [[IF_ELSE_6]] ], [ [[CONV12_6]], [[IF_THEN_6]] ] @@ -278,14 +278,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD21_7:%.*]] = sub nsw i32 [[CONV5_7]], [[CONV_7]] ; CHECK-NEXT: [[MUL25_7:%.*]] = mul i32 [[ADD21_7]], [[CONV11_7]] ; CHECK-NEXT: [[SHR26_7:%.*]] = lshr i32 [[MUL25_7]], 16 -; CHECK-NEXT: [[TMP61:%.*]] = trunc i32 [[SHR26_7]] to i16 +; CHECK-NEXT: [[TMP61:%.*]] = trunc nuw i32 [[SHR26_7]] to i16 ; CHECK-NEXT: [[CONV28_7:%.*]] = sub i16 0, [[TMP61]] ; CHECK-NEXT: br label [[IF_END_7:%.*]] ; CHECK: if.then.7: ; CHECK-NEXT: [[ADD_7:%.*]] = add nuw nsw i32 [[CONV5_7]], [[CONV_7]] ; CHECK-NEXT: [[MUL_7:%.*]] = mul i32 [[ADD_7]], [[CONV11_7]] ; CHECK-NEXT: [[SHR_7:%.*]] = lshr i32 [[MUL_7]], 16 -; CHECK-NEXT: [[CONV12_7:%.*]] = trunc i32 [[SHR_7]] to i16 +; CHECK-NEXT: [[CONV12_7:%.*]] = trunc nuw i32 [[SHR_7]] to i16 ; CHECK-NEXT: br label [[IF_END_7]] ; CHECK: if.end.7: ; CHECK-NEXT: [[STOREMERGE_7:%.*]] = phi i16 [ [[CONV28_7]], [[IF_ELSE_7]] ], [ [[CONV12_7]], [[IF_THEN_7]] ] @@ -306,14 +306,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD21_8:%.*]] = sub nsw i32 [[CONV5_8]], [[CONV_8]] ; CHECK-NEXT: [[MUL25_8:%.*]] = mul i32 [[ADD21_8]], [[CONV11_8]] ; CHECK-NEXT: [[SHR26_8:%.*]] = lshr i32 [[MUL25_8]], 16 -; CHECK-NEXT: [[TMP65:%.*]] = trunc i32 [[SHR26_8]] to i16 +; CHECK-NEXT: [[TMP65:%.*]] = trunc nuw i32 [[SHR26_8]] to i16 ; CHECK-NEXT: [[CONV28_8:%.*]] = sub i16 0, [[TMP65]] ; CHECK-NEXT: br label [[IF_END_8:%.*]] ; CHECK: if.then.8: ; CHECK-NEXT: [[ADD_8:%.*]] = add nuw nsw i32 [[CONV5_8]], [[CONV_8]] ; CHECK-NEXT: [[MUL_8:%.*]] = mul i32 [[ADD_8]], [[CONV11_8]] ; CHECK-NEXT: [[SHR_8:%.*]] = lshr i32 [[MUL_8]], 16 -; CHECK-NEXT: [[CONV12_8:%.*]] = trunc i32 [[SHR_8]] to i16 +; CHECK-NEXT: [[CONV12_8:%.*]] = trunc nuw i32 [[SHR_8]] to i16 ; CHECK-NEXT: br label [[IF_END_8]] ; CHECK: if.end.8: ; CHECK-NEXT: [[STOREMERGE_8:%.*]] = phi i16 [ [[CONV28_8]], [[IF_ELSE_8]] ], [ [[CONV12_8]], [[IF_THEN_8]] ] @@ -334,14 +334,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD21_9:%.*]] = sub nsw i32 [[CONV5_9]], [[CONV_9]] ; CHECK-NEXT: [[MUL25_9:%.*]] = mul i32 [[ADD21_9]], [[CONV11_9]] ; CHECK-NEXT: [[SHR26_9:%.*]] = lshr i32 [[MUL25_9]], 16 -; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[SHR26_9]] to i16 +; CHECK-NEXT: [[TMP69:%.*]] = trunc nuw i32 [[SHR26_9]] to i16 ; CHECK-NEXT: [[CONV28_9:%.*]] = sub i16 0, [[TMP69]] ; CHECK-NEXT: br label [[IF_END_9:%.*]] ; CHECK: if.then.9: ; CHECK-NEXT: [[ADD_9:%.*]] = add nuw nsw i32 [[CONV5_9]], [[CONV_9]] ; CHECK-NEXT: [[MUL_9:%.*]] = mul i32 [[ADD_9]], [[CONV11_9]] ; CHECK-NEXT: [[SHR_9:%.*]] = lshr i32 [[MUL_9]], 16 -; CHECK-NEXT: [[CONV12_9:%.*]] = trunc i32 [[SHR_9]] to i16 +; CHECK-NEXT: [[CONV12_9:%.*]] = trunc nuw i32 [[SHR_9]] to i16 ; CHECK-NEXT: br label [[IF_END_9]] ; CHECK: if.end.9: ; CHECK-NEXT: [[STOREMERGE_9:%.*]] = phi i16 [ [[CONV28_9]], [[IF_ELSE_9]] ], [ [[CONV12_9]], [[IF_THEN_9]] ] @@ -362,14 +362,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD21_10:%.*]] = sub nsw i32 [[CONV5_10]], [[CONV_10]] ; CHECK-NEXT: [[MUL25_10:%.*]] = mul i32 [[ADD21_10]], [[CONV11_10]] ; CHECK-NEXT: [[SHR26_10:%.*]] = lshr i32 [[MUL25_10]], 16 -; CHECK-NEXT: [[TMP73:%.*]] = trunc i32 [[SHR26_10]] to i16 +; CHECK-NEXT: [[TMP73:%.*]] = trunc nuw i32 [[SHR26_10]] to i16 ; CHECK-NEXT: [[CONV28_10:%.*]] = sub i16 0, [[TMP73]] ; CHECK-NEXT: br label [[IF_END_10:%.*]] ; CHECK: if.then.10: ; CHECK-NEXT: [[ADD_10:%.*]] = add nuw nsw i32 [[CONV5_10]], [[CONV_10]] ; CHECK-NEXT: [[MUL_10:%.*]] = mul i32 [[ADD_10]], [[CONV11_10]] ; CHECK-NEXT: [[SHR_10:%.*]] = lshr i32 [[MUL_10]], 16 -; CHECK-NEXT: [[CONV12_10:%.*]] = trunc i32 [[SHR_10]] to i16 +; CHECK-NEXT: [[CONV12_10:%.*]] = trunc nuw i32 [[SHR_10]] to i16 ; CHECK-NEXT: br label [[IF_END_10]] ; CHECK: if.end.10: ; CHECK-NEXT: [[STOREMERGE_10:%.*]] = phi i16 [ [[CONV28_10]], [[IF_ELSE_10]] ], [ [[CONV12_10]], [[IF_THEN_10]] ] @@ -390,14 +390,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD21_11:%.*]] = sub nsw i32 [[CONV5_11]], [[CONV_11]] ; CHECK-NEXT: [[MUL25_11:%.*]] = mul i32 [[ADD21_11]], [[CONV11_11]] ; CHECK-NEXT: [[SHR26_11:%.*]] = lshr i32 [[MUL25_11]], 16 -; CHECK-NEXT: [[TMP77:%.*]] = trunc i32 [[SHR26_11]] to i16 +; CHECK-NEXT: [[TMP77:%.*]] = trunc nuw i32 [[SHR26_11]] to i16 ; CHECK-NEXT: [[CONV28_11:%.*]] = sub i16 0, [[TMP77]] ; CHECK-NEXT: br label [[IF_END_11:%.*]] ; CHECK: if.then.11: ; CHECK-NEXT: [[ADD_11:%.*]] = add nuw nsw i32 [[CONV5_11]], [[CONV_11]] ; CHECK-NEXT: [[MUL_11:%.*]] = mul i32 [[ADD_11]], [[CONV11_11]] ; CHECK-NEXT: [[SHR_11:%.*]] = lshr i32 [[MUL_11]], 16 -; CHECK-NEXT: [[CONV12_11:%.*]] = trunc i32 [[SHR_11]] to i16 +; CHECK-NEXT: [[CONV12_11:%.*]] = trunc nuw i32 [[SHR_11]] to i16 ; CHECK-NEXT: br label [[IF_END_11]] ; CHECK: if.end.11: ; CHECK-NEXT: [[STOREMERGE_11:%.*]] = phi i16 [ [[CONV28_11]], [[IF_ELSE_11]] ], [ [[CONV12_11]], [[IF_THEN_11]] ] @@ -418,14 +418,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD21_12:%.*]] = sub nsw i32 [[CONV5_12]], [[CONV_12]] ; CHECK-NEXT: [[MUL25_12:%.*]] = mul i32 [[ADD21_12]], [[CONV11_12]] ; CHECK-NEXT: [[SHR26_12:%.*]] = lshr i32 [[MUL25_12]], 16 -; CHECK-NEXT: [[TMP81:%.*]] = trunc i32 [[SHR26_12]] to i16 +; CHECK-NEXT: [[TMP81:%.*]] = trunc nuw i32 [[SHR26_12]] to i16 ; CHECK-NEXT: [[CONV28_12:%.*]] = sub i16 0, [[TMP81]] ; CHECK-NEXT: br label [[IF_END_12:%.*]] ; CHECK: if.then.12: ; CHECK-NEXT: [[ADD_12:%.*]] = add nuw nsw i32 [[CONV5_12]], [[CONV_12]] ; CHECK-NEXT: [[MUL_12:%.*]] = mul i32 [[ADD_12]], [[CONV11_12]] ; CHECK-NEXT: [[SHR_12:%.*]] = lshr i32 [[MUL_12]], 16 -; CHECK-NEXT: [[CONV12_12:%.*]] = trunc i32 [[SHR_12]] to i16 +; CHECK-NEXT: [[CONV12_12:%.*]] = trunc nuw i32 [[SHR_12]] to i16 ; CHECK-NEXT: br label [[IF_END_12]] ; CHECK: if.end.12: ; CHECK-NEXT: [[STOREMERGE_12:%.*]] = phi i16 [ [[CONV28_12]], [[IF_ELSE_12]] ], [ [[CONV12_12]], [[IF_THEN_12]] ] @@ -446,14 +446,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD21_13:%.*]] = sub nsw i32 [[CONV5_13]], [[CONV_13]] ; CHECK-NEXT: [[MUL25_13:%.*]] = mul i32 [[ADD21_13]], [[CONV11_13]] ; CHECK-NEXT: [[SHR26_13:%.*]] = lshr i32 [[MUL25_13]], 16 -; CHECK-NEXT: [[TMP85:%.*]] = trunc i32 [[SHR26_13]] to i16 +; CHECK-NEXT: [[TMP85:%.*]] = trunc nuw i32 [[SHR26_13]] to i16 ; CHECK-NEXT: [[CONV28_13:%.*]] = sub i16 0, [[TMP85]] ; CHECK-NEXT: br label [[IF_END_13:%.*]] ; CHECK: if.then.13: ; CHECK-NEXT: [[ADD_13:%.*]] = add nuw nsw i32 [[CONV5_13]], [[CONV_13]] ; CHECK-NEXT: [[MUL_13:%.*]] = mul i32 [[ADD_13]], [[CONV11_13]] ; CHECK-NEXT: [[SHR_13:%.*]] = lshr i32 [[MUL_13]], 16 -; CHECK-NEXT: [[CONV12_13:%.*]] = trunc i32 [[SHR_13]] to i16 +; CHECK-NEXT: [[CONV12_13:%.*]] = trunc nuw i32 [[SHR_13]] to i16 ; CHECK-NEXT: br label [[IF_END_13]] ; CHECK: if.end.13: ; CHECK-NEXT: [[STOREMERGE_13:%.*]] = phi i16 [ [[CONV28_13]], [[IF_ELSE_13]] ], [ [[CONV12_13]], [[IF_THEN_13]] ] @@ -474,14 +474,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD21_14:%.*]] = sub nsw i32 [[CONV5_14]], [[CONV_14]] ; CHECK-NEXT: [[MUL25_14:%.*]] = mul i32 [[ADD21_14]], [[CONV11_14]] ; CHECK-NEXT: [[SHR26_14:%.*]] = lshr i32 [[MUL25_14]], 16 -; CHECK-NEXT: [[TMP89:%.*]] = trunc i32 [[SHR26_14]] to i16 +; CHECK-NEXT: [[TMP89:%.*]] = trunc nuw i32 [[SHR26_14]] to i16 ; CHECK-NEXT: [[CONV28_14:%.*]] = sub i16 0, [[TMP89]] ; CHECK-NEXT: br label [[IF_END_14:%.*]] ; CHECK: if.then.14: ; CHECK-NEXT: [[ADD_14:%.*]] = add nuw nsw i32 [[CONV5_14]], [[CONV_14]] ; CHECK-NEXT: [[MUL_14:%.*]] = mul i32 [[ADD_14]], [[CONV11_14]] ; CHECK-NEXT: [[SHR_14:%.*]] = lshr i32 [[MUL_14]], 16 -; CHECK-NEXT: [[CONV12_14:%.*]] = trunc i32 [[SHR_14]] to i16 +; CHECK-NEXT: [[CONV12_14:%.*]] = trunc nuw i32 [[SHR_14]] to i16 ; CHECK-NEXT: br label [[IF_END_14]] ; CHECK: if.end.14: ; CHECK-NEXT: [[STOREMERGE_14:%.*]] = phi i16 [ [[CONV28_14]], [[IF_ELSE_14]] ], [ [[CONV12_14]], [[IF_THEN_14]] ] @@ -502,14 +502,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[ADD21_15:%.*]] = sub nsw i32 [[CONV5_15]], [[CONV_15]] ; CHECK-NEXT: [[MUL25_15:%.*]] = mul i32 [[ADD21_15]], [[CONV11_15]] ; CHECK-NEXT: [[SHR26_15:%.*]] = lshr i32 [[MUL25_15]], 16 -; CHECK-NEXT: [[TMP93:%.*]] = trunc i32 [[SHR26_15]] to i16 +; CHECK-NEXT: [[TMP93:%.*]] = trunc nuw i32 [[SHR26_15]] to i16 ; CHECK-NEXT: [[CONV28_15:%.*]] = sub i16 0, [[TMP93]] ; CHECK-NEXT: br label [[IF_END_15]] ; CHECK: if.then.15: ; CHECK-NEXT: [[ADD_15:%.*]] = add nuw nsw i32 [[CONV5_15]], [[CONV_15]] ; CHECK-NEXT: [[MUL_15:%.*]] = mul i32 [[ADD_15]], [[CONV11_15]] ; CHECK-NEXT: [[SHR_15:%.*]] = lshr i32 [[MUL_15]], 16 -; CHECK-NEXT: [[CONV12_15:%.*]] = trunc i32 [[SHR_15]] to i16 +; CHECK-NEXT: [[CONV12_15:%.*]] = trunc nuw i32 [[SHR_15]] to i16 ; CHECK-NEXT: br label [[IF_END_15]] ; CHECK: if.end.15: ; CHECK-NEXT: [[STOREMERGE_15:%.*]] = phi i16 [ [[CONV28_15]], [[IF_ELSE_15]] ], [ [[CONV12_15]], [[IF_THEN_15]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll index c38f2748a9763..75505f632a43f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll @@ -55,7 +55,7 @@ define void @store_i8(ptr nocapture %0, i32 %1, i32 %2) { ; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP7]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], ; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP9]], <4 x i32> ) -; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i32> [[TMP10]] to <4 x i8> +; CHECK-NEXT: [[TMP11:%.*]] = trunc nuw <4 x i32> [[TMP10]] to <4 x i8> ; CHECK-NEXT: store <4 x i8> [[TMP11]], ptr [[TMP0]], align 1, !tbaa [[TBAA4]] ; CHECK-NEXT: ret void ; From 496de32ee2c34880c7d3396bbd09e45d5d5c8a9e Mon Sep 17 00:00:00 2001 From: paperchalice Date: Thu, 11 Apr 2024 19:13:06 +0800 Subject: [PATCH 137/886] [NewPM] Remove `MachinePassInfoMixin` (#88243) Unify the inheritance paths of IR and machine function. --- .../llvm/CodeGen/DeadMachineInstructionElim.h | 2 +- .../llvm/CodeGen/FreeMachineFunction.h | 3 +- llvm/include/llvm/CodeGen/MIRPrinter.h | 4 +- .../include/llvm/CodeGen/MachinePassManager.h | 108 +++++++----------- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 4 +- llvm/include/llvm/Passes/PassBuilder.h | 3 +- llvm/lib/Passes/PassBuilder.cpp | 2 +- .../MIR/PassBuilderCallbacksTest.cpp | 2 +- 8 files changed, 49 insertions(+), 79 deletions(-) diff --git a/llvm/include/llvm/CodeGen/DeadMachineInstructionElim.h b/llvm/include/llvm/CodeGen/DeadMachineInstructionElim.h index b9fe7cfccf9a3..56cfa1e087181 100644 --- a/llvm/include/llvm/CodeGen/DeadMachineInstructionElim.h +++ b/llvm/include/llvm/CodeGen/DeadMachineInstructionElim.h @@ -14,7 +14,7 @@ namespace llvm { class DeadMachineInstructionElimPass - : public MachinePassInfoMixin { + : public PassInfoMixin { public: PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); diff --git a/llvm/include/llvm/CodeGen/FreeMachineFunction.h b/llvm/include/llvm/CodeGen/FreeMachineFunction.h index 77b76c591201a..5f21c6720350b 100644 --- a/llvm/include/llvm/CodeGen/FreeMachineFunction.h +++ b/llvm/include/llvm/CodeGen/FreeMachineFunction.h @@ -13,8 +13,7 @@ namespace llvm { -class FreeMachineFunctionPass - : public MachinePassInfoMixin { +class FreeMachineFunctionPass : public PassInfoMixin { public: PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); diff --git a/llvm/include/llvm/CodeGen/MIRPrinter.h b/llvm/include/llvm/CodeGen/MIRPrinter.h index daa0d7e2691f1..d0a11e1c4a2fd 100644 --- a/llvm/include/llvm/CodeGen/MIRPrinter.h +++ b/llvm/include/llvm/CodeGen/MIRPrinter.h @@ -24,7 +24,7 @@ class MachineFunction; class Module; template class SmallVectorImpl; -class PrintMIRPreparePass : public MachinePassInfoMixin { +class PrintMIRPreparePass : public PassInfoMixin { raw_ostream &OS; public: @@ -32,7 +32,7 @@ class PrintMIRPreparePass : public MachinePassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &MFAM); }; -class PrintMIRPass : public MachinePassInfoMixin { +class PrintMIRPass : public PassInfoMixin { raw_ostream &OS; public: diff --git a/llvm/include/llvm/CodeGen/MachinePassManager.h b/llvm/include/llvm/CodeGen/MachinePassManager.h index 8689fd19030f9..4f0b6ba2b1e73 100644 --- a/llvm/include/llvm/CodeGen/MachinePassManager.h +++ b/llvm/include/llvm/CodeGen/MachinePassManager.h @@ -36,65 +36,6 @@ class MachineFunction; extern template class AnalysisManager; using MachineFunctionAnalysisManager = AnalysisManager; -/// A CRTP mix-in that provides informational APIs needed for machine passes. -/// -/// This provides some boilerplate for types that are machine passes. It -/// automatically mixes in \c PassInfoMixin. -template -struct MachinePassInfoMixin : public PassInfoMixin { -protected: - class PropertyChanger { - MachineFunction &MF; - - template - using has_get_required_properties_t = - decltype(std::declval().getRequiredProperties()); - - template - using has_get_set_properties_t = - decltype(std::declval().getSetProperties()); - - template - using has_get_cleared_properties_t = - decltype(std::declval().getClearedProperties()); - - public: - PropertyChanger(MachineFunction &MF) : MF(MF) { -#ifndef NDEBUG - if constexpr (is_detected::value) { - auto &MFProps = MF.getProperties(); - auto RequiredProperties = DerivedT::getRequiredProperties(); - if (!MFProps.verifyRequiredProperties(RequiredProperties)) { - errs() << "MachineFunctionProperties required by " << DerivedT::name() - << " pass are not met by function " << MF.getName() << ".\n" - << "Required properties: "; - RequiredProperties.print(errs()); - errs() << "\nCurrent properties: "; - MFProps.print(errs()); - errs() << '\n'; - report_fatal_error("MachineFunctionProperties check failed"); - } - } -#endif - } - - ~PropertyChanger() { - if constexpr (is_detected::value) - MF.getProperties().set(DerivedT::getSetProperties()); - if constexpr (is_detected::value) - MF.getProperties().reset(DerivedT::getClearedProperties()); - } - }; - -public: - PreservedAnalyses runImpl(MachineFunction &MF, - MachineFunctionAnalysisManager &MFAM) { - PropertyChanger PC(MF); - return static_cast(this)->run(MF, MFAM); - } -}; - namespace detail { template @@ -117,8 +58,44 @@ struct MachinePassModel MachinePassModel &operator=(const MachinePassModel &) = delete; PreservedAnalyses run(MachineFunction &IR, MachineFunctionAnalysisManager &AM) override { - return this->Pass.runImpl(IR, AM); +#ifndef NDEBUG + if constexpr (is_detected::value) { + auto &MFProps = IR.getProperties(); + auto RequiredProperties = PassT::getRequiredProperties(); + if (!MFProps.verifyRequiredProperties(RequiredProperties)) { + errs() << "MachineFunctionProperties required by " << PassT::name() + << " pass are not met by function " << IR.getName() << ".\n" + << "Required properties: "; + RequiredProperties.print(errs()); + errs() << "\nCurrent properties: "; + MFProps.print(errs()); + errs() << '\n'; + report_fatal_error("MachineFunctionProperties check failed"); + } + } +#endif + + auto PA = this->Pass.run(IR, AM); + + if constexpr (is_detected::value) + IR.getProperties().set(PassT::getSetProperties()); + if constexpr (is_detected::value) + IR.getProperties().reset(PassT::getClearedProperties()); + return PA; } + +private: + template + using has_get_required_properties_t = + decltype(std::declval().getRequiredProperties()); + + template + using has_get_set_properties_t = + decltype(std::declval().getSetProperties()); + + template + using has_get_cleared_properties_t = + decltype(std::declval().getClearedProperties()); }; } // namespace detail @@ -246,20 +223,15 @@ createModuleToMachineFunctionPassAdaptor(MachineFunctionPassT &&Pass) { template <> template void PassManager::addPass(PassT &&Pass) { - using PassModelT = - detail::PassModel; using MachinePassModelT = detail::MachinePassModel; // Do not use make_unique or emplace_back, they cause too many template // instantiations, causing terrible compile times. - if constexpr (std::is_base_of_v, PassT>) { - Passes.push_back(std::unique_ptr( - new MachinePassModelT(std::forward(Pass)))); - } else if constexpr (std::is_same_v>) { + if constexpr (std::is_same_v>) { for (auto &P : Pass.Passes) Passes.push_back(std::move(P)); } else { - Passes.push_back(std::unique_ptr( - new PassModelT(std::forward(Pass)))); + Passes.push_back(std::unique_ptr( + new MachinePassModelT(std::forward(Pass)))); } } diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index ab231dbbb6a68..ed5d8affa7938 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -87,14 +87,14 @@ namespace llvm { } \ }; #define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME) \ - struct PASS_NAME : public MachinePassInfoMixin { \ + struct PASS_NAME : public PassInfoMixin { \ template PASS_NAME(Ts &&...) {} \ PreservedAnalyses run(Module &, ModuleAnalysisManager &) { \ return PreservedAnalyses::all(); \ } \ }; #define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME) \ - struct PASS_NAME : public MachinePassInfoMixin { \ + struct PASS_NAME : public PassInfoMixin { \ template PASS_NAME(Ts &&...) {} \ PreservedAnalyses run(MachineFunction &, \ MachineFunctionAnalysisManager &) { \ diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index d1232124d5d81..c8f643452bb15 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -909,8 +909,7 @@ struct NoOpLoopPass : PassInfoMixin { }; /// No-op machine function pass which does nothing. -struct NoOpMachineFunctionPass - : public MachinePassInfoMixin { +struct NoOpMachineFunctionPass : public PassInfoMixin { PreservedAnalyses run(MachineFunction &, MachineFunctionAnalysisManager &) { return PreservedAnalyses::all(); } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 832ee352205a0..8d408ca2363a9 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -368,7 +368,7 @@ class TriggerVerifierErrorPass // A pass requires all MachineFunctionProperties. // DO NOT USE THIS EXCEPT FOR TESTING! class RequireAllMachineFunctionPropertiesPass - : public MachinePassInfoMixin { + : public PassInfoMixin { public: PreservedAnalyses run(MachineFunction &, MachineFunctionAnalysisManager &) { return PreservedAnalyses::none(); diff --git a/llvm/unittests/MIR/PassBuilderCallbacksTest.cpp b/llvm/unittests/MIR/PassBuilderCallbacksTest.cpp index 8e3738dc91920..6fd4e54a929f4 100644 --- a/llvm/unittests/MIR/PassBuilderCallbacksTest.cpp +++ b/llvm/unittests/MIR/PassBuilderCallbacksTest.cpp @@ -233,7 +233,7 @@ template class MockAnalysisHandleBase { template class MockPassHandleBase { public: - class Pass : public MachinePassInfoMixin { + class Pass : public PassInfoMixin { friend MockPassHandleBase; DerivedT *Handle; From a6db20f2c39ecb5939890317068d5398c760746d Mon Sep 17 00:00:00 2001 From: Marc Auberer Date: Thu, 11 Apr 2024 14:15:16 +0200 Subject: [PATCH 138/886] [libcxx] Use generic builtins for popcount, clz and ctz (#86563) Fixes #86556 Use `__builtin_popcountg` instead of `__buildin_popcount{l|ll}` Use `__builtin_clzg instead` of `__buildin_clz{l|ll}` Use `__builtin_ctzg instead` of `__builtin_ctz{l|ll}` The generic variant of the builtins can be used to simplify some logic with >= Clang 19 or >= GCC 14, where these generic variants are available. As for backwards compatibility reasons, we can't completely remove the old logic. Therefore, I left ToDo comments to address this, as soon as support for pre Clang 19 as well as pre GCC 14 is dropped. --------- Co-authored-by: Nick Desaulniers --- libcxx/include/__bit/countl.h | 11 +++++++++++ libcxx/include/__bit/countr.h | 8 +++++++- libcxx/include/__bit/popcount.h | 7 +++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h index 396cfc2c3f406..13df8d4e66c40 100644 --- a/libcxx/include/__bit/countl.h +++ b/libcxx/include/__bit/countl.h @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// TODO: __builtin_clzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can +// refactor this code to exclusively use __builtin_clzg. + #ifndef _LIBCPP___BIT_COUNTL_H #define _LIBCPP___BIT_COUNTL_H @@ -38,6 +41,9 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_cl #ifndef _LIBCPP_HAS_NO_INT128 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) _NOEXCEPT { +# if __has_builtin(__builtin_clzg) + return __builtin_clzg(__x); +# else // The function is written in this form due to C++ constexpr limitations. // The algorithm: // - Test whether any bit in the high 64-bits is set @@ -49,12 +55,16 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) // zeros in the high 64-bits. return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast(__x))) : __builtin_clzll(static_cast(__x >> 64)); +# endif } #endif // _LIBCPP_HAS_NO_INT128 template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT { static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type"); +#if __has_builtin(__builtin_clzg) + return __builtin_clzg(__t, numeric_limits<_Tp>::digits); +#else // __has_builtin(__builtin_clzg) if (__t == 0) return numeric_limits<_Tp>::digits; @@ -79,6 +89,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _ } return __ret + __iter; } +#endif // __has_builtin(__builtin_clzg) } #if _LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h index b6b3ac52ca4e4..724a0bc23801c 100644 --- a/libcxx/include/__bit/countr.h +++ b/libcxx/include/__bit/countr.h @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// TODO: __builtin_ctzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can +// refactor this code to exclusively use __builtin_ctzg. + #ifndef _LIBCPP___BIT_COUNTR_H #define _LIBCPP___BIT_COUNTR_H @@ -37,9 +40,11 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ct template _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT { +#if __has_builtin(__builtin_ctzg) + return __builtin_ctzg(__t, numeric_limits<_Tp>::digits); +#else // __has_builtin(__builtin_ctzg) if (__t == 0) return numeric_limits<_Tp>::digits; - if (sizeof(_Tp) <= sizeof(unsigned int)) return std::__libcpp_ctz(static_cast(__t)); else if (sizeof(_Tp) <= sizeof(unsigned long)) @@ -55,6 +60,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun } return __ret + std::__libcpp_ctz(static_cast(__t)); } +#endif // __has_builtin(__builtin_ctzg) } #if _LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h index b0319cef25189..37b3a3e1f3f2b 100644 --- a/libcxx/include/__bit/popcount.h +++ b/libcxx/include/__bit/popcount.h @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// TODO: __builtin_popcountg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can +// refactor this code to exclusively use __builtin_popcountg. + #ifndef _LIBCPP___BIT_POPCOUNT_H #define _LIBCPP___BIT_POPCOUNT_H @@ -39,6 +42,9 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned lo template <__libcpp_unsigned_integer _Tp> _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept { +# if __has_builtin(__builtin_popcountg) + return __builtin_popcountg(__t); +# else // __has_builtin(__builtin_popcountg) if (sizeof(_Tp) <= sizeof(unsigned int)) return std::__libcpp_popcount(static_cast(__t)); else if (sizeof(_Tp) <= sizeof(unsigned long)) @@ -53,6 +59,7 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noex } return __ret; } +# endif // __has_builtin(__builtin_popcountg) } #endif // _LIBCPP_STD_VER >= 20 From 402f15ea92061d94412807887c8115374974967e Mon Sep 17 00:00:00 2001 From: martinboehme Date: Thu, 11 Apr 2024 14:38:18 +0200 Subject: [PATCH 139/886] [clang][dataflow] Remove deprecated alias `ControlFlowContext`. (#88358) --- clang/docs/tools/clang-formatted-files.txt | 1 - .../FlowSensitive/ControlFlowContext.h | 27 ------------------- 2 files changed, 28 deletions(-) delete mode 100644 clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt index 8fd4fed25a32a..3089438c23d94 100644 --- a/clang/docs/tools/clang-formatted-files.txt +++ b/clang/docs/tools/clang-formatted-files.txt @@ -123,7 +123,6 @@ clang/include/clang/Analysis/Analyses/CalledOnceCheck.h clang/include/clang/Analysis/Analyses/CFGReachabilityAnalysis.h clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h clang/include/clang/Analysis/FlowSensitive/AdornedCFG.h -clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h diff --git a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h deleted file mode 100644 index 3972962d0b2da..0000000000000 --- a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h +++ /dev/null @@ -1,27 +0,0 @@ -//===-- ControlFlowContext.h ------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines a deprecated alias for AdornedCFG. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_CONTROLFLOWCONTEXT_H -#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_CONTROLFLOWCONTEXT_H - -#include "clang/Analysis/FlowSensitive/AdornedCFG.h" - -namespace clang { -namespace dataflow { - -// This is a deprecated alias. Use `AdornedCFG` instead. -using ControlFlowContext = AdornedCFG; - -} // namespace dataflow -} // namespace clang - -#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_CONTROLFLOWCONTEXT_H From 6fd2fdccf2f28fc155f614eec41f785492aad618 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 11 Apr 2024 14:02:56 +0100 Subject: [PATCH 140/886] [VectorCombine] foldShuffleOfCastops - extend shuffle(bitcast(x),bitcast(y)) -> bitcast(shuffle(x,y)) support Handle shuffle mask scaling handling for cases where the bitcast src/dst element counts are different --- clang/test/CodeGen/X86/avx-shuffle-builtins.c | 3 +- .../Transforms/Vectorize/VectorCombine.cpp | 40 ++++++++++++++----- .../Transforms/PhaseOrdering/X86/pr67803.ll | 5 +-- .../VectorCombine/X86/shuffle-of-casts.ll | 14 +++---- .../Transforms/VectorCombine/X86/shuffle.ll | 5 ++- 5 files changed, 43 insertions(+), 24 deletions(-) diff --git a/clang/test/CodeGen/X86/avx-shuffle-builtins.c b/clang/test/CodeGen/X86/avx-shuffle-builtins.c index 49a56e73230d7..d184d28f3e07a 100644 --- a/clang/test/CodeGen/X86/avx-shuffle-builtins.c +++ b/clang/test/CodeGen/X86/avx-shuffle-builtins.c @@ -61,8 +61,7 @@ __m256 test_mm256_permute2f128_ps(__m256 a, __m256 b) { __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_permute2f128_si256 - // X64: shufflevector{{.*}} - // X86: shufflevector{{.*}} + // CHECK: shufflevector{{.*}} return _mm256_permute2f128_si256(a, b, 0x20); } diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index b74fdf27d213a..658e8e74fe5b8 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1448,9 +1448,9 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) { /// into "castop (shuffle)". bool VectorCombine::foldShuffleOfCastops(Instruction &I) { Value *V0, *V1; - ArrayRef Mask; + ArrayRef OldMask; if (!match(&I, m_Shuffle(m_OneUse(m_Value(V0)), m_OneUse(m_Value(V1)), - m_Mask(Mask)))) + m_Mask(OldMask)))) return false; auto *C0 = dyn_cast(V0); @@ -1473,12 +1473,32 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { auto *ShuffleDstTy = dyn_cast(I.getType()); auto *CastDstTy = dyn_cast(C0->getDestTy()); auto *CastSrcTy = dyn_cast(C0->getSrcTy()); - if (!ShuffleDstTy || !CastDstTy || !CastSrcTy || - CastDstTy->getElementCount() != CastSrcTy->getElementCount()) + if (!ShuffleDstTy || !CastDstTy || !CastSrcTy) return false; + unsigned NumSrcElts = CastSrcTy->getNumElements(); + unsigned NumDstElts = CastDstTy->getNumElements(); + assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) && + "Only bitcasts expected to alter src/dst element counts"); + + SmallVector NewMask; + if (NumSrcElts >= NumDstElts) { + // The bitcast is from wide to narrow/equal elements. The shuffle mask can + // always be expanded to the equivalent form choosing narrower elements. + assert(NumSrcElts % NumDstElts == 0 && "Unexpected shuffle mask"); + unsigned ScaleFactor = NumSrcElts / NumDstElts; + narrowShuffleMaskElts(ScaleFactor, OldMask, NewMask); + } else { + // The bitcast is from narrow elements to wide elements. The shuffle mask + // must choose consecutive elements to allow casting first. + assert(NumDstElts % NumSrcElts == 0 && "Unexpected shuffle mask"); + unsigned ScaleFactor = NumDstElts / NumSrcElts; + if (!widenShuffleMaskElts(ScaleFactor, OldMask, NewMask)) + return false; + } + auto *NewShuffleDstTy = - FixedVectorType::get(CastSrcTy->getScalarType(), Mask.size()); + FixedVectorType::get(CastSrcTy->getScalarType(), NewMask.size()); // Try to replace a castop with a shuffle if the shuffle is not costly. TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; @@ -1489,11 +1509,11 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy, TTI::CastContextHint::None, CostKind); OldCost += - TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, CastDstTy, Mask, - CostKind, 0, nullptr, std::nullopt, &I); + TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, CastDstTy, + OldMask, CostKind, 0, nullptr, std::nullopt, &I); InstructionCost NewCost = TTI.getShuffleCost( - TargetTransformInfo::SK_PermuteTwoSrc, CastSrcTy, Mask, CostKind); + TargetTransformInfo::SK_PermuteTwoSrc, CastSrcTy, NewMask, CostKind); NewCost += TTI.getCastInstrCost(Opcode, ShuffleDstTy, NewShuffleDstTy, TTI::CastContextHint::None, CostKind); @@ -1503,8 +1523,8 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { if (NewCost > OldCost) return false; - Value *Shuf = - Builder.CreateShuffleVector(C0->getOperand(0), C1->getOperand(0), Mask); + Value *Shuf = Builder.CreateShuffleVector(C0->getOperand(0), + C1->getOperand(0), NewMask); Value *Cast = Builder.CreateCast(Opcode, Shuf, ShuffleDstTy); // Intersect flags from the old casts. diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll index 45e411d733169..36535264bd0f2 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll @@ -17,7 +17,6 @@ define <4 x i64> @PR67803(<4 x i64> %x, <4 x i64> %y, <4 x i64> %a, <4 x i64> %b ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <32 x i8> [[TMP9]], <32 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[TMP6]], <16 x i8> [[TMP8]], <16 x i8> [[TMP10]]) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x i64> ; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i64> [[A]] to <32 x i8> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i64> [[B]] to <32 x i8> @@ -25,8 +24,8 @@ define <4 x i64> @PR67803(<4 x i64> %x, <4 x i64> %y, <4 x i64> %a, <4 x i64> %b ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <32 x i8> [[TMP17]], <32 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[TMP14]], <16 x i8> [[TMP16]], <16 x i8> [[TMP18]]) -; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i8> [[TMP19]] to <2 x i64> -; CHECK-NEXT: [[SHUFFLE_I23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP20]], <4 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x i8> [[TMP11]], <16 x i8> [[TMP19]], <32 x i32> +; CHECK-NEXT: [[SHUFFLE_I23:%.*]] = bitcast <32 x i8> [[TMP20]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[SHUFFLE_I23]] ; entry: diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll index 97fceacd82758..4f8cfea146ead 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll @@ -179,13 +179,12 @@ define <8 x float> @concat_bitcast_v4i32_v8f32(<4 x i32> %a0, <4 x i32> %a1) { ret <8 x float> %r } -; TODO - bitcasts (lower element count) +; bitcasts (lower element count) define <4 x double> @concat_bitcast_v8i16_v4f64(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @concat_bitcast_v8i16_v4f64( -; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i16> [[A0:%.*]] to <2 x double> -; CHECK-NEXT: [[X1:%.*]] = bitcast <8 x i16> [[A1:%.*]] to <2 x double> -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x double> [[X0]], <2 x double> [[X1]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> +; CHECK-NEXT: [[R:%.*]] = bitcast <16 x i16> [[TMP1]] to <4 x double> ; CHECK-NEXT: ret <4 x double> [[R]] ; %x0 = bitcast <8 x i16> %a0 to <2 x double> @@ -194,13 +193,12 @@ define <4 x double> @concat_bitcast_v8i16_v4f64(<8 x i16> %a0, <8 x i16> %a1) { ret <4 x double> %r } -; TODO - bitcasts (higher element count) +; bitcasts (higher element count) define <16 x i16> @concat_bitcast_v4i32_v16i16(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @concat_bitcast_v4i32_v16i16( -; CHECK-NEXT: [[X0:%.*]] = bitcast <4 x i32> [[A0:%.*]] to <8 x i16> -; CHECK-NEXT: [[X1:%.*]] = bitcast <4 x i32> [[A1:%.*]] to <8 x i16> -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[X0]], <8 x i16> [[X1]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = bitcast <8 x i32> [[TMP1]] to <16 x i16> ; CHECK-NEXT: ret <16 x i16> [[R]] ; %x0 = bitcast <4 x i32> %a0 to <8 x i16> diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll index 3d47f373ab77c..8337bb37bc549 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll @@ -122,11 +122,14 @@ define <16 x i8> @bitcast_shuf_uses(<4 x i32> %v) { } ; shuffle of 2 operands removes bitcasts +; TODO - can we remove the empty bitcast(bitcast()) ? define <4 x i64> @bitcast_shuf_remove_bitcasts(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK-LABEL: @bitcast_shuf_remove_bitcasts( ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]], <4 x i32> -; CHECK-NEXT: ret <4 x i64> [[R]] +; CHECK-NEXT: [[SHUF:%.*]] = bitcast <4 x i64> [[R]] to <8 x i32> +; CHECK-NEXT: [[R1:%.*]] = bitcast <8 x i32> [[SHUF]] to <4 x i64> +; CHECK-NEXT: ret <4 x i64> [[R1]] ; %bc0 = bitcast <2 x i64> %a0 to <4 x i32> %bc1 = bitcast <2 x i64> %a1 to <4 x i32> From 77dd43570bf7a4bad688de8d8326c34590a0fa94 Mon Sep 17 00:00:00 2001 From: Johannes Reifferscheid Date: Thu, 11 Apr 2024 15:08:54 +0200 Subject: [PATCH 141/886] Fix complex power for large inputs. (#88387) For example, 1e30^1.2 currently overflows. Also forward fastmath flags. This ports XLA's logic and was verified with its test suite. Note that rsqrt and sqrt are still broken. --- .../ComplexToStandard/ComplexToStandard.cpp | 149 +++++++++++------- .../convert-to-standard.mlir | 21 ++- 2 files changed, 115 insertions(+), 55 deletions(-) diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp index 462036e51a1f1..9c82e8105f06e 100644 --- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp +++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp @@ -989,65 +989,107 @@ struct ConjOpConversion : public OpConversionPattern { } }; -/// Coverts x^y = (a+bi)^(c+di) to +/// Converts lhs^y = (a+bi)^(c+di) to /// (a*a+b*b)^(0.5c) * exp(-d*atan2(b,a)) * (cos(q) + i*sin(q)), /// where q = c*atan2(b,a)+0.5d*ln(a*a+b*b) static Value powOpConversionImpl(mlir::ImplicitLocOpBuilder &builder, - ComplexType type, Value a, Value b, Value c, - Value d) { + ComplexType type, Value lhs, Value c, Value d, + arith::FastMathFlags fmf) { auto elementType = cast(type.getElementType()); - // Compute (a*a+b*b)^(0.5c). - Value aaPbb = builder.create( - builder.create(a, a), builder.create(b, b)); - Value half = builder.create( - elementType, builder.getFloatAttr(elementType, 0.5)); - Value halfC = builder.create(half, c); - Value aaPbbTohalfC = builder.create(aaPbb, halfC); - - // Compute exp(-d*atan2(b,a)). - Value negD = builder.create(d); - Value argX = builder.create(b, a); - Value negDArgX = builder.create(negD, argX); - Value eToNegDArgX = builder.create(negDArgX); - - // Compute (a*a+b*b)^(0.5c) * exp(-d*atan2(b,a)). - Value coeff = builder.create(aaPbbTohalfC, eToNegDArgX); - - // Compute c*atan2(b,a)+0.5d*ln(a*a+b*b). - Value lnAaPbb = builder.create(aaPbb); - Value halfD = builder.create(half, d); - Value q = builder.create( - builder.create(c, argX), - builder.create(halfD, lnAaPbb)); - - Value cosQ = builder.create(q); - Value sinQ = builder.create(q); + Value a = builder.create(lhs); + Value b = builder.create(lhs); + + Value abs = builder.create(lhs, fmf); + Value absToC = builder.create(abs, c, fmf); + + Value negD = builder.create(d, fmf); + Value argLhs = builder.create(b, a, fmf); + Value negDArgLhs = builder.create(negD, argLhs, fmf); + Value expNegDArgLhs = builder.create(negDArgLhs, fmf); + + Value coeff = builder.create(absToC, expNegDArgLhs, fmf); + Value lnAbs = builder.create(abs, fmf); + Value cArgLhs = builder.create(c, argLhs, fmf); + Value dLnAbs = builder.create(d, lnAbs, fmf); + Value q = builder.create(cArgLhs, dLnAbs, fmf); + Value cosQ = builder.create(q, fmf); + Value sinQ = builder.create(q, fmf); + + Value inf = builder.create( + elementType, + builder.getFloatAttr(elementType, + APFloat::getInf(elementType.getFloatSemantics()))); Value zero = builder.create( - elementType, builder.getFloatAttr(elementType, 0)); + elementType, builder.getFloatAttr(elementType, 0.0)); Value one = builder.create( - elementType, builder.getFloatAttr(elementType, 1)); - - Value xEqZero = - builder.create(arith::CmpFPredicate::OEQ, aaPbb, zero); - Value yGeZero = builder.create( - builder.create(arith::CmpFPredicate::OGE, c, zero), - builder.create(arith::CmpFPredicate::OEQ, d, zero)); - Value cEqZero = - builder.create(arith::CmpFPredicate::OEQ, c, zero); - Value complexZero = builder.create(type, zero, zero); + elementType, builder.getFloatAttr(elementType, 1.0)); Value complexOne = builder.create(type, one, zero); - Value complexOther = builder.create( - type, builder.create(coeff, cosQ), - builder.create(coeff, sinQ)); + Value complexZero = builder.create(type, zero, zero); + Value complexInf = builder.create(type, inf, zero); - // x^y is 0 if x is 0 and y > 0. 0^0 is defined to be 1.0, see + // Case 0: + // d^c is 0 if d is 0 and c > 0. 0^0 is defined to be 1.0, see + // Branch Cuts for Complex Elementary Functions or Much Ado About + // Nothing's Sign Bit, W. Kahan, Section 10. + Value absEqZero = + builder.create(arith::CmpFPredicate::OEQ, abs, zero, fmf); + Value dEqZero = + builder.create(arith::CmpFPredicate::OEQ, d, zero, fmf); + Value cEqZero = + builder.create(arith::CmpFPredicate::OEQ, c, zero, fmf); + Value bEqZero = + builder.create(arith::CmpFPredicate::OEQ, b, zero, fmf); + + Value zeroLeC = + builder.create(arith::CmpFPredicate::OLE, zero, c, fmf); + Value coeffCosQ = builder.create(coeff, cosQ, fmf); + Value coeffSinQ = builder.create(coeff, sinQ, fmf); + Value complexOneOrZero = + builder.create(cEqZero, complexOne, complexZero); + Value coeffCosSin = + builder.create(type, coeffCosQ, coeffSinQ); + Value cutoff0 = builder.create( + builder.create( + builder.create(absEqZero, dEqZero), zeroLeC), + complexOneOrZero, coeffCosSin); + + // Case 1: + // x^0 is defined to be 1 for any x, see // Branch Cuts for Complex Elementary Functions or Much Ado About // Nothing's Sign Bit, W. Kahan, Section 10. - return builder.create( - builder.create(xEqZero, yGeZero), - builder.create(cEqZero, complexOne, complexZero), - complexOther); + Value rhsEqZero = builder.create(cEqZero, dEqZero); + Value cutoff1 = + builder.create(rhsEqZero, complexOne, cutoff0); + + // Case 2: + // 1^(c + d*i) = 1 + 0*i + Value lhsEqOne = builder.create( + builder.create(arith::CmpFPredicate::OEQ, a, one), + bEqZero); + Value cutoff2 = + builder.create(lhsEqOne, complexOne, cutoff1); + + // Case 3: + // inf^(c + 0*i) = inf + 0*i, c > 0 + Value lhsEqInf = builder.create( + builder.create(arith::CmpFPredicate::OEQ, a, inf), + bEqZero); + Value rhsGt0 = builder.create( + dEqZero, + builder.create(arith::CmpFPredicate::OGT, c, zero)); + Value cutoff3 = builder.create( + builder.create(lhsEqInf, rhsGt0), complexInf, cutoff2); + + // Case 4: + // inf^(c + 0*i) = 0 + 0*i, c < 0 + Value rhsLt0 = builder.create( + dEqZero, + builder.create(arith::CmpFPredicate::OLT, c, zero)); + Value cutoff4 = builder.create( + builder.create(lhsEqInf, rhsLt0), complexZero, cutoff3); + + return cutoff4; } struct PowOpConversion : public OpConversionPattern { @@ -1060,12 +1102,11 @@ struct PowOpConversion : public OpConversionPattern { auto type = cast(adaptor.getLhs().getType()); auto elementType = cast(type.getElementType()); - Value a = builder.create(elementType, adaptor.getLhs()); - Value b = builder.create(elementType, adaptor.getLhs()); Value c = builder.create(elementType, adaptor.getRhs()); Value d = builder.create(elementType, adaptor.getRhs()); - rewriter.replaceOp(op, {powOpConversionImpl(builder, type, a, b, c, d)}); + rewriter.replaceOp(op, {powOpConversionImpl(builder, type, adaptor.getLhs(), + c, d, op.getFastmath())}); return success(); } }; @@ -1080,14 +1121,14 @@ struct RsqrtOpConversion : public OpConversionPattern { auto type = cast(adaptor.getComplex().getType()); auto elementType = cast(type.getElementType()); - Value a = builder.create(elementType, adaptor.getComplex()); - Value b = builder.create(elementType, adaptor.getComplex()); Value c = builder.create( elementType, builder.getFloatAttr(elementType, -0.5)); Value d = builder.create( elementType, builder.getFloatAttr(elementType, 0)); - rewriter.replaceOp(op, {powOpConversionImpl(builder, type, a, b, c, d)}); + rewriter.replaceOp(op, + {powOpConversionImpl(builder, type, adaptor.getComplex(), + c, d, op.getFastmath())}); return success(); } }; diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir index a1de61d10bb22..8d2fb09daa87b 100644 --- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir +++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir @@ -753,13 +753,32 @@ func.func @complex_conj(%arg: complex) -> complex { // ----- -// CHECK-LABEL: func.func @complex_pow +// CHECK-LABEL: func.func @complex_pow +// CHECK-SAME: %[[LHS:.*]]: complex, %[[RHS:.*]]: complex func.func @complex_pow(%lhs: complex, %rhs: complex) -> complex { %pow = complex.pow %lhs, %rhs : complex return %pow : complex } +// CHECK: %[[A:.*]] = complex.re %[[LHS]] +// CHECK: %[[B:.*]] = complex.im %[[LHS]] +// CHECK: math.atan2 %[[B]], %[[A]] : f32 + +// ----- + +// CHECK-LABEL: func.func @complex_pow_with_fmf +// CHECK-SAME: %[[LHS:.*]]: complex, %[[RHS:.*]]: complex +func.func @complex_pow_with_fmf(%lhs: complex, + %rhs: complex) -> complex { + %pow = complex.pow %lhs, %rhs fastmath : complex + return %pow : complex +} + +// CHECK: %[[A:.*]] = complex.re %[[LHS]] +// CHECK: %[[B:.*]] = complex.im %[[LHS]] +// CHECK: math.atan2 %[[B]], %[[A]] fastmath : f32 + // ----- // CHECK-LABEL: func.func @complex_rsqrt From 599adf30afe5802fab80419ec5bb896036a1c8fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?kadir=20=C3=A7etinkaya?= Date: Thu, 11 Apr 2024 15:33:35 +0200 Subject: [PATCH 142/886] [include-cleaner] Dont apply name-match for non-owning headers (#82625) --- .../include-cleaner/lib/FindHeaders.cpp | 6 ++++++ .../unittests/FindHeadersTest.cpp | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/clang-tools-extra/include-cleaner/lib/FindHeaders.cpp b/clang-tools-extra/include-cleaner/lib/FindHeaders.cpp index fd2de6a17ad4a..7b28d1c252d71 100644 --- a/clang-tools-extra/include-cleaner/lib/FindHeaders.cpp +++ b/clang-tools-extra/include-cleaner/lib/FindHeaders.cpp @@ -275,6 +275,12 @@ llvm::SmallVector
headersForSymbol(const Symbol &S, // are already ranked in the stdlib mapping. if (H.kind() == Header::Standard) continue; + // Don't apply name match hints to exporting headers. As they usually have + // names similar to the original header, e.g. foo_wrapper/foo.h vs + // foo/foo.h, but shouldn't be preferred (unless marked as the public + // interface). + if ((H.Hint & Hints::OriginHeader) == Hints::None) + continue; if (nameMatch(SymbolName, H)) H.Hint |= Hints::PreferredHeader; } diff --git a/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp b/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp index 5a2a41b2d99bd..07302142a13e3 100644 --- a/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp @@ -628,5 +628,24 @@ TEST_F(HeadersForSymbolTest, StandardHeaders) { tooling::stdlib::Header::named(""))); } +TEST_F(HeadersForSymbolTest, ExporterNoNameMatch) { + Inputs.Code = R"cpp( + #include "exporter/foo.h" + #include "foo_public.h" + )cpp"; + Inputs.ExtraArgs.emplace_back("-I."); + // Deliberately named as foo_public to make sure it doesn't get name-match + // boost and also gets lexicographically bigger order than "exporter/foo.h". + Inputs.ExtraFiles["foo_public.h"] = guard(R"cpp( + struct foo {}; + )cpp"); + Inputs.ExtraFiles["exporter/foo.h"] = guard(R"cpp( + #include "foo_public.h" // IWYU pragma: export + )cpp"); + buildAST(); + EXPECT_THAT(headersForFoo(), ElementsAre(physicalHeader("foo_public.h"), + physicalHeader("exporter/foo.h"))); +} + } // namespace } // namespace clang::include_cleaner From 61ea1bc23aa941714be3ec818c922e4ee5a279a3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 11 Apr 2024 15:23:50 +0100 Subject: [PATCH 143/886] [VectorCombine][X86] Add test coverage for #67803 We are still missing a fold for shuffle(bitcast(sext(x)),bitcast(sext(y))) -> bitcast(sext(shuffle(x,y))) due to foldShuffleOfCastops failing to add new instructions back onto the worklist --- .../Transforms/VectorCombine/X86/pr67803.ll | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 llvm/test/Transforms/VectorCombine/X86/pr67803.ll diff --git a/llvm/test/Transforms/VectorCombine/X86/pr67803.ll b/llvm/test/Transforms/VectorCombine/X86/pr67803.ll new file mode 100644 index 0000000000000..da94bf7f0c907 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/pr67803.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx | FileCheck %s +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s + +define <4 x i64> @PR67803(<8 x i32> %x, <8 x i32> %y, <8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: @PR67803( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP_LO:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[CMP_HI:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[SEXT_LO:%.*]] = sext <4 x i1> [[CMP_LO]] to <4 x i32> +; CHECK-NEXT: [[SEXT_HI:%.*]] = sext <4 x i1> [[CMP_HI]] to <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SEXT_LO]], <4 x i32> [[SEXT_HI]], <8 x i32> +; CHECK-NEXT: [[CONCAT:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64> +; CHECK-NEXT: [[MASK:%.*]] = bitcast <4 x i64> [[CONCAT]] to <8 x float> +; CHECK-NEXT: [[SEL:%.*]] = tail call noundef <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[MASK]]) +; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x float> [[SEL]] to <4 x i64> +; CHECK-NEXT: ret <4 x i64> [[RES]] +; +entry: + %cmp = icmp sgt <8 x i32> %x, %y + %cmp.lo = shufflevector <8 x i1> %cmp, <8 x i1> poison, <4 x i32> + %cmp.hi = shufflevector <8 x i1> %cmp, <8 x i1> poison, <4 x i32> + %sext.lo = sext <4 x i1> %cmp.lo to <4 x i32> + %sext.hi = sext <4 x i1> %cmp.hi to <4 x i32> + %bitcast.lo = bitcast <4 x i32> %sext.lo to <2 x i64> + %bitcast.hi = bitcast <4 x i32> %sext.hi to <2 x i64> + %concat = shufflevector <2 x i64> %bitcast.lo, <2 x i64> %bitcast.hi, <4 x i32> + %mask = bitcast <4 x i64> %concat to <8 x float> + %sel = tail call noundef <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %mask) + %res = bitcast <8 x float> %sel to <4 x i64> + ret <4 x i64> %res +} From ff74236f342c7bc185f56a07bab7bd0cf356c7c6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 11 Apr 2024 15:47:09 +0100 Subject: [PATCH 144/886] [VectorCombine] foldShuffleOfCastops - ensure we add all new instructions onto the worklist When creating cast(shuffle(x,y)) we were only adding the cast() to the worklist, not the new shuffle, preventing recursive combines. foldShuffleOfBinops is also failing to do this, but I still need to add test coverage for this. --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 3 +++ llvm/test/Transforms/VectorCombine/X86/pr67803.ll | 5 ++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 658e8e74fe5b8..44cba60013afa 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1440,6 +1440,8 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) { NewInst->copyIRFlags(B0); NewInst->andIRFlags(B1); } + + // TODO: Add Shuf0/Shuf1 to WorkList? replaceValue(I, *NewBO); return true; } @@ -1533,6 +1535,7 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { NewInst->andIRFlags(C1); } + Worklist.pushValue(Shuf); replaceValue(I, *Cast); return true; } diff --git a/llvm/test/Transforms/VectorCombine/X86/pr67803.ll b/llvm/test/Transforms/VectorCombine/X86/pr67803.ll index da94bf7f0c907..69fd6f6a10e2a 100644 --- a/llvm/test/Transforms/VectorCombine/X86/pr67803.ll +++ b/llvm/test/Transforms/VectorCombine/X86/pr67803.ll @@ -8,9 +8,8 @@ define <4 x i64> @PR67803(<8 x i32> %x, <8 x i32> %y, <8 x float> %a, <8 x float ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[CMP_LO:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> ; CHECK-NEXT: [[CMP_HI:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[SEXT_LO:%.*]] = sext <4 x i1> [[CMP_LO]] to <4 x i32> -; CHECK-NEXT: [[SEXT_HI:%.*]] = sext <4 x i1> [[CMP_HI]] to <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SEXT_LO]], <4 x i32> [[SEXT_HI]], <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i1> [[CMP_LO]], <4 x i1> [[CMP_HI]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i32> ; CHECK-NEXT: [[CONCAT:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64> ; CHECK-NEXT: [[MASK:%.*]] = bitcast <4 x i64> [[CONCAT]] to <8 x float> ; CHECK-NEXT: [[SEL:%.*]] = tail call noundef <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[MASK]]) From 44718311dee486f1823876e8af9100afcc50041b Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Thu, 11 Apr 2024 11:07:17 -0400 Subject: [PATCH 145/886] [mlir][amdgpu] Remove shared memory optimization pass (#88225) This implementation has a number of issues and ultimately does not work on gfx9. * It does not reduce bank conflicts with wide memory accesses. * It does not correctly account for when LDS bank conflicts occur on amdgpu. * The implementation is too fragile to be used on real-world code. For example, the code bails out on any `memref.subview` in the root op, even when the subview is not a user of any of the `memref.alloc` ops. I do not see how these can be easily fixed, therefore I think it's better to delete this code. --- .../mlir/Dialect/AMDGPU/CMakeLists.txt | 1 - mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 17 -- .../AMDGPU/TransformOps/AMDGPUTransformOps.h | 48 ---- .../AMDGPU/TransformOps/AMDGPUTransformOps.td | 47 ---- .../AMDGPU/TransformOps/CMakeLists.txt | 4 - .../mlir/Dialect/AMDGPU/Transforms/Passes.h | 2 +- .../mlir/Dialect/AMDGPU/Transforms/Passes.td | 20 -- .../Dialect/AMDGPU/Transforms/Transforms.h | 61 ---- .../mlir/Dialect/AMDGPU/Transforms/Utils.h | 24 -- mlir/include/mlir/InitAllExtensions.h | 2 - mlir/lib/Dialect/AMDGPU/CMakeLists.txt | 1 - mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp | 15 - .../TransformOps/AMDGPUTransformOps.cpp | 67 ----- .../AMDGPU/TransformOps/CMakeLists.txt | 25 -- .../Dialect/AMDGPU/Transforms/CMakeLists.txt | 3 - .../Transforms/OptimizeSharedMemory.cpp | 261 ------------------ mlir/lib/Dialect/AMDGPU/Transforms/Utils.cpp | 39 --- .../AMDGPU/optimize_shmem_reads_writes.mlir | 50 ---- ...transform_optimize_shmem_reads_writes.mlir | 54 ---- .../llvm-project-overlay/mlir/BUILD.bazel | 54 ---- 20 files changed, 1 insertion(+), 794 deletions(-) delete mode 100644 mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h delete mode 100644 mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.td delete mode 100644 mlir/include/mlir/Dialect/AMDGPU/TransformOps/CMakeLists.txt delete mode 100644 mlir/include/mlir/Dialect/AMDGPU/Transforms/Transforms.h delete mode 100644 mlir/include/mlir/Dialect/AMDGPU/Transforms/Utils.h delete mode 100644 mlir/lib/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.cpp delete mode 100644 mlir/lib/Dialect/AMDGPU/TransformOps/CMakeLists.txt delete mode 100644 mlir/lib/Dialect/AMDGPU/Transforms/OptimizeSharedMemory.cpp delete mode 100644 mlir/lib/Dialect/AMDGPU/Transforms/Utils.cpp delete mode 100644 mlir/test/Dialect/AMDGPU/optimize_shmem_reads_writes.mlir delete mode 100644 mlir/test/Dialect/AMDGPU/transform_optimize_shmem_reads_writes.mlir diff --git a/mlir/include/mlir/Dialect/AMDGPU/CMakeLists.txt b/mlir/include/mlir/Dialect/AMDGPU/CMakeLists.txt index 660deb21479d2..9f57627c321fb 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/AMDGPU/CMakeLists.txt @@ -1,3 +1,2 @@ add_subdirectory(IR) -add_subdirectory(TransformOps) add_subdirectory(Transforms) diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td index 21942b179a001..3f27e1541cf38 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td +++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td @@ -29,23 +29,6 @@ def AMDGPU_Dialect : Dialect { "gpu::GPUDialect" ]; let useDefaultAttributePrinterParser = 1; - - let extraClassDeclaration = [{ - /// Return true if the given MemRefType has an integer address - /// space that matches the ROCDL shared memory address space or - /// is a gpu::AddressSpaceAttr attribute with value 'workgroup`. - static bool hasSharedMemoryAddressSpace(MemRefType type); - - /// Return true if the given Attribute has an integer address - /// space that matches the ROCDL shared memory address space or - /// is a gpu::AddressSpaceAttr attribute with value 'workgroup`. - static bool isSharedMemoryAddressSpace(Attribute type); - - /// Defines the MemRef memory space attribute numeric value that indicates - /// a memref is located in shared memory. This should correspond to the - /// value used in ROCDL. - static constexpr unsigned kSharedMemoryAddressSpace = 3; - }]; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h b/mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h deleted file mode 100644 index dcf934c71dd1f..0000000000000 --- a/mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h +++ /dev/null @@ -1,48 +0,0 @@ -//===- AMDGPUTransformOps.h - AMDGPU transform ops ---------------*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_DIALECT_AMDGPU_TRANSFORMOPS_AMDGPUTRANSFORMOPS_H -#define MLIR_DIALECT_AMDGPU_TRANSFORMOPS_AMDGPUTRANSFORMOPS_H - -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/Transform/IR/TransformAttrs.h" -#include "mlir/Dialect/Transform/IR/TransformDialect.h" -#include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h" -#include "mlir/IR/OpImplementation.h" -#include "mlir/IR/RegionKindInterface.h" - -namespace mlir { -namespace transform { -class TransformHandleTypeInterface; -} // namespace transform -} // namespace mlir - -namespace mlir { -class DialectRegistry; - -namespace linalg { -class LinalgOp; -} // namespace linalg - -namespace scf { -class ForOp; -} // namespace scf - -namespace amdgpu { -void registerTransformDialectExtension(DialectRegistry ®istry); -} // namespace amdgpu -} // namespace mlir - -//===----------------------------------------------------------------------===// -// AMDGPU Transform Operations -//===----------------------------------------------------------------------===// - -#define GET_OP_CLASSES -#include "mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h.inc" - -#endif // MLIR_DIALECT_AMDGPU_TRANSFORMOPS_AMDGPUTRANSFORMOPS_H diff --git a/mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.td b/mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.td deleted file mode 100644 index 8aaa87511a2be..0000000000000 --- a/mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.td +++ /dev/null @@ -1,47 +0,0 @@ -//===- AMDGPUTransformOps.td - AMDGPU transform ops --------*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef AMDGPU_TRANSFORM_OPS -#define AMDGPU_TRANSFORM_OPS - -include "mlir/Dialect/Transform/IR/TransformAttrs.td" -include "mlir/Dialect/Transform/IR/TransformDialect.td" -include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.td" -include "mlir/Dialect/Transform/IR/TransformTypes.td" - -include "mlir/Interfaces/SideEffectInterfaces.td" -//===----------------------------------------------------------------------===// -// ApplyOptimizeSharedMemoryReadsAndWritesOp -//===----------------------------------------------------------------------===// - -def ApplyOptimizeSharedMemoryReadsAndWritesOp : - Op, - TransformOpInterface, TransformEachOpTrait]> { - let summary = "Reduce shared memory bank conflicts"; - let description = [{ This op attempts to optimize GPU Shared memory - reads/writes with the goal of avoiding bank conflicts. - }]; - - let arguments = (ins TransformHandleTypeInterface:$target, - DefaultValuedOptionalAttr:$sharedMemoryLineSizeBytes, - DefaultValuedOptionalAttr:$defaultVectorSizeBits); - let results = (outs); - - let assemblyFormat = "$target attr-dict `:` functional-type(operands, results)"; - - let extraClassDeclaration = [{ - ::mlir::DiagnosedSilenceableFailure applyToOne( - ::mlir::transform::TransformRewriter &rewriter, - ::mlir::func::FuncOp funcOp, - ::mlir::transform::ApplyToEachResultList &results, - ::mlir::transform::TransformState &state); - }]; -} - -#endif // AMDGPU_TRANSFORM_OPS diff --git a/mlir/include/mlir/Dialect/AMDGPU/TransformOps/CMakeLists.txt b/mlir/include/mlir/Dialect/AMDGPU/TransformOps/CMakeLists.txt deleted file mode 100644 index 07bfebc9f96d2..0000000000000 --- a/mlir/include/mlir/Dialect/AMDGPU/TransformOps/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS AMDGPUTransformOps.td) -mlir_tablegen(AMDGPUTransformOps.h.inc -gen-op-decls) -mlir_tablegen(AMDGPUTransformOps.cpp.inc -gen-op-defs) -add_public_tablegen_target(MLIRAMDGPUTransformOpsIncGen) diff --git a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h index ab695756d2a78..8dd5ff1a4b198 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h @@ -20,7 +20,7 @@ namespace mlir { class ConversionTarget; namespace amdgpu { -#define GEN_PASS_DECL +#define GEN_PASS_DECL_AMDGPUEMULATEATOMICSPASS #define GEN_PASS_REGISTRATION #include "mlir/Dialect/AMDGPU/Transforms/Passes.h.inc" diff --git a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.td index 67f951fd19d17..e6b27aa842dfc 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.td @@ -30,24 +30,4 @@ def AmdgpuEmulateAtomicsPass : Pass<"amdgpu-emulate-atomics"> { "Chipset that these operations will run on">]; } -def OptimizeSharedMemory : Pass<"amdgpu-optimize-shared-memory"> { - let summary = "Optimizes accesses to shared memory memrefs in order to reduce bank conflicts."; - let description = [{ - This pass adds a transformation and pass to the AMDGPU dialect that - attempts to optimize reads/writes from a memref representing GPU shared - memory in order to avoid bank conflicts. - }]; - let dependentDialects = [ - "memref::MemRefDialect", "vector::VectorDialect" - ]; - let options = [ - Option<"sharedMemoryLineSizeBytes", "shared-memory-line-size-bytes", "int64_t", - /*default=*/"128", - "Shared memory line size in bytes">, - Option<"defaultVectorSizeBits", "default-vector-size-bits", "int64_t", - /*default=*/"128", - "Default vector size in bits">, - ]; -} - #endif // MLIR_DIALECT_AMDGPU_TRANSFORMS_PASSES_TD_ diff --git a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Transforms.h b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Transforms.h deleted file mode 100644 index 843cea2c503b9..0000000000000 --- a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Transforms.h +++ /dev/null @@ -1,61 +0,0 @@ -//===- Transforms.h - AMDGPU Dialect transformations -------------*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file declares functions that assist transformations for the amdgpu -// dialect. -// -//===----------------------------------------------------------------------===// -#ifndef MLIR_DIALECT_AMDGPU_TRANSFORMS_TRANSFORMS_H_ -#define MLIR_DIALECT_AMDGPU_TRANSFORMS_TRANSFORMS_H_ - -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/IR/Operation.h" -#include "mlir/Support/LogicalResult.h" - -namespace mlir { -class RewriterBase; - -namespace amdgpu { - -/// -/// Passes -/// - -/// Optimizes vectorized accesses to a shared memory buffer specified by -/// memrefValue. This transformation assumes the following: -/// 1) All relevant accesses to `memrefValue` are contained with `parentOp`. -/// 2) The function will fail precondition checks if any subviews are -/// taken of `memrefValue`. All reads/writes to `memrefValue` should occur -/// through `memrefValue` directly. -/// -/// Shared memory bank conflicts occur when multiple threads attempt to read or -/// write locations assigned to the same shared memory bank. For `2^N` byte -/// vectorized accesses, we need to be concerned with conflicts among threads -/// identified as `(tid) -> tid.floordiv(2^{7-N})`. As such, this transformation -/// changes any indexed memory access (vector.load, memref.load, etc) -/// such that the final dimension's index value is permuted such that -/// `newColIndex = oldColIndex % vectorSize + -/// perm[rowIndex](oldColIndex/vectorSize, rowIndex)` where `rowIndex` is the -/// index for the second-to last dimension and `perm[rowIndex]` is a permutation -/// function that depends on the row Index. The permutation function is chosen -/// to ensure that sequential distributed+vectorized reads/writes down a single -/// dimension of the memref have minimal conflicts. -LogicalResult -optimizeSharedMemoryReadsAndWrites(Operation *parentOp, Value memrefValue, - int64_t sharedMemoryLineSizeBytes, - int64_t defaultVectorSizeBits); - -std::optional -optimizeSharedMemoryReadsAndWritesOp(func::FuncOp funcOp, - int64_t sharedMemoryLineSizeBytes, - int64_t defaultVectorSizeBits); - -} // namespace amdgpu -} // namespace mlir - -#endif // MLIR_DIALECT_AMDGPU_TRANSFORMS_TRANSFORMS_H_ diff --git a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Utils.h b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Utils.h deleted file mode 100644 index 9e5e9589d62f3..0000000000000 --- a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Utils.h +++ /dev/null @@ -1,24 +0,0 @@ -//===- Utils.h - Transform utilities -----------------------------*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/IR/Operation.h" - -namespace mlir { -namespace amdgpu { - -/// Get and set the indices that the given load/store operation is operating on. -/// Preconditions: -/// - The Op must have memory affects. -/// - Considers memref::LoadOp, vector::LoadOp, and vector::TransferReadOp. -/// - Considers memref::StoreOp, vector::StoreOp, and vector::TransferWriteOp. -/// - Excludes subview op. -std::optional getIndices(Operation *op); -void setIndices(Operation *op, ArrayRef indices); - -} // namespace amdgpu -} // namespace mlir diff --git a/mlir/include/mlir/InitAllExtensions.h b/mlir/include/mlir/InitAllExtensions.h index b31fb26f00f8f..7708ca5571de3 100644 --- a/mlir/include/mlir/InitAllExtensions.h +++ b/mlir/include/mlir/InitAllExtensions.h @@ -23,7 +23,6 @@ #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" #include "mlir/Conversion/NVVMToLLVM/NVVMToLLVM.h" #include "mlir/Conversion/UBToLLVM/UBToLLVM.h" -#include "mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h" #include "mlir/Dialect/Affine/TransformOps/AffineTransformOps.h" #include "mlir/Dialect/Bufferization/TransformOps/BufferizationTransformOps.h" #include "mlir/Dialect/Func/Extensions/AllExtensions.h" @@ -67,7 +66,6 @@ inline void registerAllExtensions(DialectRegistry ®istry) { ub::registerConvertUBToLLVMInterface(registry); // Register all transform dialect extensions. - amdgpu::registerTransformDialectExtension(registry); affine::registerTransformDialectExtension(registry); bufferization::registerTransformDialectExtension(registry); func::registerTransformDialectExtension(registry); diff --git a/mlir/lib/Dialect/AMDGPU/CMakeLists.txt b/mlir/lib/Dialect/AMDGPU/CMakeLists.txt index c47e4c5495c17..31167e6af908b 100644 --- a/mlir/lib/Dialect/AMDGPU/CMakeLists.txt +++ b/mlir/lib/Dialect/AMDGPU/CMakeLists.txt @@ -1,4 +1,3 @@ add_subdirectory(IR) -add_subdirectory(TransformOps) add_subdirectory(Transforms) add_subdirectory(Utils) diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp index 4e72fbf56b80a..2575ad4984814 100644 --- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp +++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp @@ -43,21 +43,6 @@ void AMDGPUDialect::initialize() { >(); } -bool amdgpu::AMDGPUDialect::isSharedMemoryAddressSpace(Attribute memorySpace) { - if (!memorySpace) - return false; - if (auto intAttr = llvm::dyn_cast(memorySpace)) - return intAttr.getInt() == AMDGPUDialect::kSharedMemoryAddressSpace; - if (auto gpuAttr = llvm::dyn_cast(memorySpace)) - return gpuAttr.getValue() == gpu::AddressSpace::Workgroup; - return false; -} - -bool amdgpu::AMDGPUDialect::hasSharedMemoryAddressSpace(MemRefType type) { - Attribute memorySpace = type.getMemorySpace(); - return isSharedMemoryAddressSpace(memorySpace); -} - //===----------------------------------------------------------------------===// // 8-bit float ops //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.cpp b/mlir/lib/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.cpp deleted file mode 100644 index b7e17a9289738..0000000000000 --- a/mlir/lib/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.cpp +++ /dev/null @@ -1,67 +0,0 @@ -//===- AMDGPUTransformOps.cpp - Implementation of AMDGPU transform ops-----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h" - -#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h" -#include "mlir/Dialect/AMDGPU/Transforms/Transforms.h" -#include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/Vector/IR/VectorOps.h" - -using namespace mlir; -using namespace mlir::amdgpu; -using namespace mlir::transform; -using namespace mlir::func; - -#define DEBUG_TYPE "amdgpu-transforms" -#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ") -#define DBGSNL() (llvm::dbgs() << "\n") -#define LDBG(X) LLVM_DEBUG(DBGS() << (X) << "\n") - -DiagnosedSilenceableFailure -ApplyOptimizeSharedMemoryReadsAndWritesOp::applyToOne( - TransformRewriter &rewriter, FuncOp funcOp, ApplyToEachResultList &results, - TransformState &state) { - optimizeSharedMemoryReadsAndWritesOp(funcOp, getSharedMemoryLineSizeBytes(), - getDefaultVectorSizeBits()); - return DiagnosedSilenceableFailure::success(); -} - -void ApplyOptimizeSharedMemoryReadsAndWritesOp::getEffects( - SmallVectorImpl &effects) { - onlyReadsHandle(getTarget(), effects); - modifiesPayload(effects); -} - -//===----------------------------------------------------------------------===// -// Transform op registration -//===----------------------------------------------------------------------===// - -namespace { -class AMDGPUTransformDialectExtension - : public TransformDialectExtension { -public: - AMDGPUTransformDialectExtension() { - declareGeneratedDialect(); - declareGeneratedDialect(); - declareGeneratedDialect(); - declareGeneratedDialect(); - registerTransformOps< -#define GET_OP_LIST -#include "mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.cpp.inc" - >(); - } -}; -} // namespace - -#define GET_OP_CLASSES -#include "mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.cpp.inc" - -void amdgpu::registerTransformDialectExtension(DialectRegistry ®istry) { - registry.addExtensions(); -} diff --git a/mlir/lib/Dialect/AMDGPU/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/AMDGPU/TransformOps/CMakeLists.txt deleted file mode 100644 index c39a3b55eabca..0000000000000 --- a/mlir/lib/Dialect/AMDGPU/TransformOps/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -add_mlir_dialect_library(MLIRAMDGPUTransformOps - AMDGPUTransformOps.cpp - - ADDITIONAL_HEADER_DIRS - ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/AMDGPU/TransformOps - - DEPENDS - MLIRAMDGPUTransformOpsIncGen - - LINK_LIBS PUBLIC - MLIRAffineDialect - MLIRArithDialect - MLIRIR - MLIRLinalgDialect - MLIRAMDGPUDialect - MLIRAMDGPUTransforms - MLIRParser - MLIRSideEffectInterfaces - MLIRSCFDialect - MLIRSCFTransforms - MLIRTransformDialect - MLIRTransformDialectUtils - MLIRVectorTransforms - - ) diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt b/mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt index a955d585b9a1d..0889a21bddc44 100644 --- a/mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt @@ -1,7 +1,5 @@ add_mlir_dialect_library(MLIRAMDGPUTransforms EmulateAtomics.cpp - OptimizeSharedMemory.cpp - Utils.cpp ADDITIONAL_HEADER_DIRS {$MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/AMDGPU/Transforms @@ -19,5 +17,4 @@ add_mlir_dialect_library(MLIRAMDGPUTransforms MLIRPass MLIRTransforms MLIRTransformUtils - MLIRVectorDialect ) diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/OptimizeSharedMemory.cpp b/mlir/lib/Dialect/AMDGPU/Transforms/OptimizeSharedMemory.cpp deleted file mode 100644 index 32fab265e03cc..0000000000000 --- a/mlir/lib/Dialect/AMDGPU/Transforms/OptimizeSharedMemory.cpp +++ /dev/null @@ -1,261 +0,0 @@ -//===- OptimizeSharedMemory.cpp - MLIR AMDGPU pass implementation ---------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements transforms to optimize accesses to shared memory. -// It is inspired by -// https://github.com/llvm/llvm-project/blob/main/mlir/lib/Dialect/NVGPU/Transforms/OptimizeSharedMemory.cpp -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/AMDGPU/Transforms/Passes.h" - -#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h" -#include "mlir/Dialect/AMDGPU/Transforms/Transforms.h" -#include "mlir/Dialect/AMDGPU/Transforms/Utils.h" -#include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/GPU/IR/GPUDialect.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/Vector/IR/VectorOps.h" -#include "mlir/Interfaces/SideEffectInterfaces.h" -#include "mlir/Support/LogicalResult.h" - -namespace mlir { -namespace amdgpu { -#define GEN_PASS_DEF_OPTIMIZESHAREDMEMORY -#include "mlir/Dialect/AMDGPU/Transforms/Passes.h.inc" -} // namespace amdgpu -} // namespace mlir - -using namespace mlir; -using namespace mlir::amdgpu; - -/// Uses `srcIndexValue` to permute `tgtIndexValue` via -/// `result = xor(floordiv(srcIdxVal,permuteEveryN), -/// floordiv(tgtIdxVal,vectorSize))) -/// + tgtIdxVal % vectorSize` -/// This is done using an optimized sequence of `arith` operations. -static Value permuteVectorOffset(OpBuilder &b, Location loc, - ArrayRef indices, MemRefType memrefTy, - int64_t srcDim, int64_t tgtDim, - int64_t sharedMemoryLineSizeBytes, - int64_t defaultVectorSizeBits) { - // Adjust the src index to change how often the permutation changes - // if necessary. - Value src = indices[srcDim]; - - // We only want to permute every N iterations of the target dim where N is - // ceil(sharedMemoryLineSizeBytes / dimSizeBytes(tgtDim)). - const int64_t permuteEveryN = std::max( - 1, sharedMemoryLineSizeBytes / ((memrefTy.getDimSize(tgtDim) * - memrefTy.getElementTypeBitWidth()) / - 8)); - - // clang-format off - // Index bit representation (b0 = least significant bit) for dim(1) - // of a `memref` is as follows: - // N := log2(128/elementSizeBits) - // M := log2(dimSize(1)) - // then - // bits[0:N] = sub-vector element offset - // bits[N:M] = vector index - // clang-format on - int64_t n = - llvm::Log2_64(defaultVectorSizeBits / memrefTy.getElementTypeBitWidth()); - int64_t m = llvm::Log2_64(memrefTy.getDimSize(tgtDim)); - - // Capture bits[0:(M-N)] of src by first creating a (M-N) mask. - int64_t mask = (1LL << (m - n)) - 1; - if (permuteEveryN > 1) - mask = mask << llvm::Log2_64(permuteEveryN); - Value srcBits = b.create(loc, mask); - srcBits = b.create(loc, src, srcBits); - - /// Use the src bits to permute the target bits b[N:M] containing the - /// vector offset. - if (permuteEveryN > 1) { - int64_t shlBits = n - llvm::Log2_64(permuteEveryN); - if (shlBits > 0) { - Value finalShiftVal = b.create(loc, shlBits); - srcBits = b.createOrFold(loc, srcBits, finalShiftVal); - } else if (shlBits < 0) { - Value finalShiftVal = b.create(loc, -1 * shlBits); - srcBits = b.createOrFold(loc, srcBits, finalShiftVal); - } - } else { - Value finalShiftVal = b.create(loc, n); - srcBits = b.createOrFold(loc, srcBits, finalShiftVal); - } - - Value permutedVectorIdx = - b.create(loc, indices[tgtDim], srcBits); - return permutedVectorIdx; -} - -static void transformIndices(OpBuilder &builder, Location loc, - SmallVector &indices, - MemRefType memrefTy, int64_t srcDim, - int64_t tgtDim, int64_t sharedMemoryLineSizeBytes, - int64_t defaultVectorSizeBits) { - indices[tgtDim] = - permuteVectorOffset(builder, loc, indices, memrefTy, srcDim, tgtDim, - sharedMemoryLineSizeBytes, defaultVectorSizeBits); -} - -// Return all operations within `parentOp` that read from or write to -// `shmMemRef`. -static LogicalResult -getShmReadAndWriteOps(Operation *parentOp, Value shmMemRef, - SmallVector &readOps, - SmallVector &writeOps) { - parentOp->walk([&](Operation *op) { - MemoryEffectOpInterface iface = dyn_cast(op); - if (!iface) - return; - std::optional effect = - iface.getEffectOnValue(shmMemRef); - if (effect) { - readOps.push_back(op); - return; - } - effect = iface.getEffectOnValue(shmMemRef); - if (effect) - writeOps.push_back(op); - }); - - // Restrict to a supported set of ops. We also require at least 2D access, - // although this could be relaxed. - if (llvm::any_of(readOps, [](Operation *op) { - return !isa( - op) || - amdgpu::getIndices(op)->size() < 2; - })) - return failure(); - if (llvm::any_of(writeOps, [](Operation *op) { - return !isa( - op) || - amdgpu::getIndices(op)->size() < 2; - })) - return failure(); - - return success(); -} - -LogicalResult amdgpu::optimizeSharedMemoryReadsAndWrites( - Operation *parentOp, Value memrefValue, int64_t sharedMemoryLineSizeBytes, - int64_t defaultVectorSizeBits) { - auto memRefType = dyn_cast(memrefValue.getType()); - if (!memRefType || - !amdgpu::AMDGPUDialect::hasSharedMemoryAddressSpace(memRefType)) - return failure(); - - // Abort if the given value has any sub-views; we do not do any alias - // analysis. - bool hasSubView = false; - parentOp->walk([&](memref::SubViewOp subView) { hasSubView = true; }); - if (hasSubView) - return failure(); - - // Check if this is necessary given the assumption of 128b accesses: - // If dim[rank-1] is small enough to fit 8 rows in a 128B line. - const int64_t rowSize = memRefType.getDimSize(memRefType.getRank() - 1); - const int64_t rowsPerLine = - (8 * sharedMemoryLineSizeBytes / memRefType.getElementTypeBitWidth()) / - rowSize; - const int64_t threadGroupSize = - 1LL << (7 - llvm::Log2_64(defaultVectorSizeBits / 8)); - if (rowsPerLine >= threadGroupSize) - return failure(); - - // Get sets of operations within the function that read/write to shared - // memory. - SmallVector shmReadOps; - SmallVector shmWriteOps; - if (failed(getShmReadAndWriteOps(parentOp, memrefValue, shmReadOps, - shmWriteOps))) - return failure(); - - if (shmReadOps.empty() || shmWriteOps.empty()) - return failure(); - - OpBuilder builder(parentOp->getContext()); - - int64_t tgtDim = memRefType.getRank() - 1; - int64_t srcDim = memRefType.getRank() - 2; - - // Transform indices for the ops writing to shared memory. - while (!shmWriteOps.empty()) { - Operation *shmWriteOp = shmWriteOps.pop_back_val(); - builder.setInsertionPoint(shmWriteOp); - - auto indices = amdgpu::getIndices(shmWriteOp); - SmallVector transformedIndices(indices->begin(), indices->end()); - transformIndices(builder, shmWriteOp->getLoc(), transformedIndices, - memRefType, srcDim, tgtDim, sharedMemoryLineSizeBytes, - defaultVectorSizeBits); - amdgpu::setIndices(shmWriteOp, transformedIndices); - } - - // Transform indices for the ops reading from shared memory. - while (!shmReadOps.empty()) { - Operation *shmReadOp = shmReadOps.pop_back_val(); - builder.setInsertionPoint(shmReadOp); - - auto indices = amdgpu::getIndices(shmReadOp); - SmallVector transformedIndices(indices->begin(), indices->end()); - transformIndices(builder, shmReadOp->getLoc(), transformedIndices, - memRefType, srcDim, tgtDim, sharedMemoryLineSizeBytes, - defaultVectorSizeBits); - amdgpu::setIndices(shmReadOp, transformedIndices); - } - - return success(); -} - -std::optional -amdgpu::optimizeSharedMemoryReadsAndWritesOp(func::FuncOp funcOp, - int64_t sharedMemoryLineSizeBytes, - int64_t defaultVectorSizeBits) { - SmallVector shmAllocOps; - funcOp.walk([&](memref::AllocOp allocOp) { - if (!amdgpu::AMDGPUDialect::hasSharedMemoryAddressSpace(allocOp.getType())) - return; - shmAllocOps.push_back(allocOp); - }); - for (auto allocOp : shmAllocOps) { - if (failed(amdgpu::optimizeSharedMemoryReadsAndWrites( - funcOp, allocOp.getMemref(), sharedMemoryLineSizeBytes, - defaultVectorSizeBits))) - return failure(); - } - return success(); -} - -struct OptimizeSharedMemoryPass - : public amdgpu::impl::OptimizeSharedMemoryBase { -public: - OptimizeSharedMemoryPass() = default; - OptimizeSharedMemoryPass(const OptimizeSharedMemoryOptions &options) - : OptimizeSharedMemoryBase(options) {} - void runOnOperation() override { - Operation *op = getOperation(); - SmallVector shmAllocOps; - op->walk([&](memref::AllocOp allocOp) { - if (!amdgpu::AMDGPUDialect::hasSharedMemoryAddressSpace( - allocOp.getType())) - return; - shmAllocOps.push_back(allocOp); - }); - for (auto allocOp : shmAllocOps) { - if (failed(optimizeSharedMemoryReadsAndWrites(op, allocOp.getMemref(), - sharedMemoryLineSizeBytes, - defaultVectorSizeBits))) - return; - } - } -}; diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/Utils.cpp b/mlir/lib/Dialect/AMDGPU/Transforms/Utils.cpp deleted file mode 100644 index 8163eeafdf1f0..0000000000000 --- a/mlir/lib/Dialect/AMDGPU/Transforms/Utils.cpp +++ /dev/null @@ -1,39 +0,0 @@ -#include "mlir/Dialect/AMDGPU/Transforms/Utils.h" - -#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/Vector/IR/VectorOps.h" - -using namespace mlir; -using namespace mlir::amdgpu; - -std::optional amdgpu::getIndices(Operation *op) { - if (auto loadOp = dyn_cast(op)) - return loadOp.getIndices(); - if (auto storeOp = dyn_cast(op)) - return storeOp.getIndices(); - if (auto vectorReadOp = dyn_cast(op)) - return vectorReadOp.getIndices(); - if (auto vectorStoreOp = dyn_cast(op)) - return vectorStoreOp.getIndices(); - if (auto transferReadOp = dyn_cast(op)) - return transferReadOp.getIndices(); - if (auto transferWriteOp = dyn_cast(op)) - return transferWriteOp.getIndices(); - return std::nullopt; -} - -void amdgpu::setIndices(Operation *op, ArrayRef indices) { - if (auto loadOp = dyn_cast(op)) - return loadOp.getIndicesMutable().assign(indices); - if (auto storeOp = dyn_cast(op)) - return storeOp.getIndicesMutable().assign(indices); - if (auto vectorReadOp = dyn_cast(op)) - return vectorReadOp.getIndicesMutable().assign(indices); - if (auto vectorStoreOp = dyn_cast(op)) - return vectorStoreOp.getIndicesMutable().assign(indices); - if (auto transferReadOp = dyn_cast(op)) - return transferReadOp.getIndicesMutable().assign(indices); - if (auto transferWriteOp = dyn_cast(op)) - return transferWriteOp.getIndicesMutable().assign(indices); -} diff --git a/mlir/test/Dialect/AMDGPU/optimize_shmem_reads_writes.mlir b/mlir/test/Dialect/AMDGPU/optimize_shmem_reads_writes.mlir deleted file mode 100644 index 983eee732e2af..0000000000000 --- a/mlir/test/Dialect/AMDGPU/optimize_shmem_reads_writes.mlir +++ /dev/null @@ -1,50 +0,0 @@ -// RUN: mlir-opt %s --pass-pipeline='builtin.module(func.func(amdgpu-optimize-shared-memory))' | FileCheck %s - - // CHECK: @optimize_shmem([[arg0:%.+]]: memref<{{.*}}>, [[readRow:%.+]]: index, [[readCol:%.+]]: index, [[writeRow:%.+]]: index, [[writeCol:%.+]]: index, [[fragRow:%.+]]: index, [[fragCol:%.+]]: index, [[fragColPerm:%.+]]: index, [[stRow:%.+]]: index, [[stCol:%.+]]: index) - func.func @optimize_shmem(%arg0: memref<4096x4096xf16>, - %readRow: index, %readCol: index, - %writeRow: index, %writeCol: index, - %fragRow: index, %fragCol: index, - %fragColPerm: index, - %stRow: index, %stCol: index) { - // CHECK: %[[cst:.+]] = arith.constant 0.000000e+00 : f16 - %cst = arith.constant 0.000000e+00 : f16 - - // CHECK: [[shmA:%.+]] = memref.alloc - // CHECK: [[shmB:%.+]] = memref.alloc - %shmA = memref.alloc() {alignment = 64 : i64} : memref<128x32xf16, 3> - %shmB = memref.alloc() {alignment = 64 : i64} : memref<256x32xf16, 3> - - %0 = vector.transfer_read %arg0[%readRow, %readCol], %cst {in_bounds = [true, true]} : memref<4096x4096xf16>, vector<1x8xf16> - // CHECK: [[c6:%.+]] = arith.constant 6 : index - // CHECK: [[srcBits:%.+]] = arith.andi [[stRow:%.+]], [[c6]] - // CHECK: [[c2:%.+]] = arith.constant 2 : index - // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c2]] - // CHECK: [[stColPerm:%.+]] = arith.xori [[stCol:%.+]], [[xorBits]] - vector.transfer_write %0, %shmB[%writeRow, %writeCol] {in_bounds = [true, true]} : vector<1x8xf16>, memref<256x32xf16, 3> - gpu.barrier - gpu.barrier - // CHECK: [[c6:%.+]] = arith.constant 6 : index - // CHECK: [[srcBits:%.+]] = arith.andi [[fragRow]], [[c6]] - // CHECK: [[c2:%.+]] = arith.constant 2 : index - // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c2]] - // CHECK: [[fragColPerm:%.+]] = arith.xori [[fragCol:%.+]], [[xorBits]] - %1 = vector.load %shmB[%fragRow, %fragColPerm] : memref<256x32xf16, 3>, vector<8xf16> - - %2 = vector.transfer_read %arg0[%readRow, %readCol], %cst {in_bounds = [true, true]} : memref<4096x4096xf16>, vector<1x8xf16> - // CHECK: [[c6:%.+]] = arith.constant 6 : index - // CHECK: [[srcBits:%.+]] = arith.andi [[stRow:%.+]], [[c6]] - // CHECK: [[c2:%.+]] = arith.constant 2 : index - // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c2]] - // CHECK: [[stColPerm:%.+]] = arith.xori [[stCol:%.+]], [[xorBits]] - vector.transfer_write %2, %shmA[%writeRow, %writeCol] {in_bounds = [true, true]} : vector<1x8xf16>, memref<128x32xf16, 3> - gpu.barrier - gpu.barrier - // CHECK: [[c6:%.+]] = arith.constant 6 : index - // CHECK: [[srcBits:%.+]] = arith.andi [[fragRow]], [[c6]] - // CHECK: [[c2:%.+]] = arith.constant 2 : index - // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c2]] - // CHECK: [[fragColPerm:%.+]] = arith.xori [[fragCol:%.+]], [[xorBits]] - %3 = vector.load %shmA[%fragRow, %fragColPerm] : memref<128x32xf16, 3>, vector<8xf16> - return - } diff --git a/mlir/test/Dialect/AMDGPU/transform_optimize_shmem_reads_writes.mlir b/mlir/test/Dialect/AMDGPU/transform_optimize_shmem_reads_writes.mlir deleted file mode 100644 index b1bb91ffc2972..0000000000000 --- a/mlir/test/Dialect/AMDGPU/transform_optimize_shmem_reads_writes.mlir +++ /dev/null @@ -1,54 +0,0 @@ -// RUN: mlir-opt %s -transform-interpreter | FileCheck %s - - // CHECK: @optimize_shmem([[arg0:%.+]]: memref<{{.*}}>, [[readRow:%.+]]: index, [[readCol:%.+]]: index, [[writeRow:%.+]]: index, [[writeCol:%.+]]: index, [[fragRow:%.+]]: index, [[fragCol:%.+]]: index, [[fragColPerm:%.+]]: index, [[stRow:%.+]]: index, [[stCol:%.+]]: index) - func.func @optimize_shmem(%arg0: memref<4096x4096xf16>, - %readRow: index, %readCol: index, - %writeRow: index, %writeCol: index, - %fragRow: index, %fragCol: index, - %fragColPerm: index, - %stRow: index, %stCol: index) { - %cst = arith.constant 0.000000e+00 : f16 - - %shmA = memref.alloc() {alignment = 64 : i64} : memref<128x32xf16, 3> - %shmB = memref.alloc() {alignment = 64 : i64} : memref<256x32xf16, 3> - - %0 = vector.transfer_read %arg0[%readRow, %readCol], %cst {in_bounds = [true, true]} : memref<4096x4096xf16>, vector<1x8xf16> - // CHECK: [[c6:%.+]] = arith.constant 6 : index - // CHECK: [[srcBits:%.+]] = arith.andi [[stRow:%.+]], [[c6]] - // CHECK: [[c2:%.+]] = arith.constant 2 : index - // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c2]] - // CHECK: [[stColPerm:%.+]] = arith.xori [[stCol:%.+]], [[xorBits]] - vector.transfer_write %0, %shmB[%writeRow, %writeCol] {in_bounds = [true, true]} : vector<1x8xf16>, memref<256x32xf16, 3> - gpu.barrier - gpu.barrier - // CHECK: [[c6:%.+]] = arith.constant 6 : index - // CHECK: [[srcBits:%.+]] = arith.andi [[fragRow]], [[c6]] - // CHECK: [[c2:%.+]] = arith.constant 2 : index - // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c2]] - // CHECK: [[fragColPerm:%.+]] = arith.xori [[fragCol:%.+]], [[xorBits]] - %1 = vector.load %shmB[%fragRow, %fragColPerm] : memref<256x32xf16, 3>, vector<8xf16> - %2 = vector.transfer_read %arg0[%readRow, %readCol], %cst {in_bounds = [true, true]} : memref<4096x4096xf16>, vector<1x8xf16> - // CHECK: [[c6:%.+]] = arith.constant 6 : index - // CHECK: [[srcBits:%.+]] = arith.andi [[stRow:%.+]], [[c6]] - // CHECK: [[c2:%.+]] = arith.constant 2 : index - // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c2]] - // CHECK: [[stColPerm:%.+]] = arith.xori [[stCol:%.+]], [[xorBits]] - vector.transfer_write %2, %shmA[%writeRow, %writeCol] {in_bounds = [true, true]} : vector<1x8xf16>, memref<128x32xf16, 3> - gpu.barrier - gpu.barrier - // CHECK: [[c6:%.+]] = arith.constant 6 : index - // CHECK: [[srcBits:%.+]] = arith.andi [[fragRow]], [[c6]] - // CHECK: [[c2:%.+]] = arith.constant 2 : index - // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c2]] - // CHECK: [[fragColPerm:%.+]] = arith.xori [[fragCol:%.+]], [[xorBits]] - %3 = vector.load %shmA[%fragRow, %fragColPerm] : memref<128x32xf16, 3>, vector<8xf16> - return - } - -module attributes { transform.with_named_sequence } { - transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.any_op - transform.amdgpu.optimize_shared_memory_reads_and_writes %0 {sharedMemoryLineSizeBytes = 128, defaultVectorSizeBits = 128}: (!transform.any_op) -> () - transform.yield - } // @__transform_main -} // module diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 497edcfceffe4..67052fcd39930 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1552,58 +1552,6 @@ cc_library( ], ) -cc_library( - name = "AMDGPUTransformOps", - srcs = glob([ - "lib/Dialect/AMDGPU/TransformOps/*.cpp", - ]), - hdrs = glob([ - "include/mlir/Dialect/AMDGPU/TransformOps/*.h", - ]), - includes = ["include"], - deps = [ - ":AMDGPUDialect", - ":AMDGPUTransformOpsIncGen", - ":AMDGPUTransforms", - ":AffineDialect", - ":FuncDialect", - ":IR", - ":TransformDialect", - ":TransformDialectInterfaces", - ":VectorDialect", - ], -) - -td_library( - name = "AMDGPUTransformOpsTdFiles", - srcs = glob([ - "include/mlir/Dialect/AMDGPU/TransformOps/*.td", - ]), - includes = ["include"], - deps = [ - ":TransformDialectTdFiles", - ], -) - -gentbl_cc_library( - name = "AMDGPUTransformOpsIncGen", - tbl_outs = [ - ( - ["-gen-op-decls"], - "include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h.inc", - ), - ( - ["-gen-op-defs"], - "include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.cpp.inc", - ), - ], - tblgen = ":mlir-tblgen", - td_file = "include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.td", - deps = [ - ":AMDGPUTransformOpsTdFiles", - ], -) - gentbl_cc_library( name = "AMDGPUPassIncGen", tbl_outs = [ @@ -4787,7 +4735,6 @@ cc_library( name = "AllExtensions", hdrs = ["include/mlir/InitAllExtensions.h"], deps = [ - ":AMDGPUTransformOps", ":AffineTransformOps", ":ArithToLLVM", ":BufferizationTransformOps", @@ -9033,7 +8980,6 @@ cc_library( deps = [ ":AMDGPUDialect", ":AMDGPUToROCDL", - ":AMDGPUTransformOps", ":AMDGPUTransforms", ":AMXDialect", ":AMXTransforms", From 198ffb85314f7741ed048de67d68ca83bb30e16e Mon Sep 17 00:00:00 2001 From: Krystian Stasiowski Date: Thu, 11 Apr 2024 11:23:24 -0400 Subject: [PATCH 146/886] [Clang][Sema] Implement approved resolution for CWG2858 (#88042) The approved resolution for CWG2858 changes [expr.prim.id.qual] p2 sentence 2 to read: > A declarative _nested-name-specifier_ shall not have a _computed-type-specifier_. This patch implements the approved resolution. Since we don't consider _nested-name-specifiers_ in friend declarations to be declarative (yet), it currently isn't possible to write a test that would produce this diagnostic (`diagnoseQualifiedDeclaration` is never called if the `DeclContext` can't be computed). Nevertheless, tests were added which will produce the diagnostic once we start calling `diagnoseQualifiedDeclaration` for friend declarations. --- clang/docs/ReleaseNotes.rst | 3 +++ .../clang/Basic/DiagnosticSemaKinds.td | 7 +++--- clang/lib/Sema/SemaDecl.cpp | 13 +++++------ clang/test/CXX/dcl.decl/dcl.meaning/p1-0x.cpp | 4 ++-- clang/test/CXX/drs/dr28xx.cpp | 23 +++++++++++++++++++ clang/test/Parser/cxx-class.cpp | 6 ++--- 6 files changed, 40 insertions(+), 16 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c4a4893aec5cd..93318871fa9f6 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -147,6 +147,9 @@ Resolutions to C++ Defect Reports compatibility of two types. (`CWG2759: [[no_unique_address] and common initial sequence `_). +- Clang now diagnoses declarative nested-name-specifiers with pack-index-specifiers. + (`CWG2858: Declarative nested-name-specifiers and pack-index-specifiers `_). + C Language Changes ------------------ diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 059a8f58da5db..180e913155d67 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2402,10 +2402,6 @@ def err_selected_explicit_constructor : Error< def note_explicit_ctor_deduction_guide_here : Note< "explicit %select{constructor|deduction guide}0 declared here">; -// C++11 decltype -def err_decltype_in_declarator : Error< - "'decltype' cannot be used to name a declaration">; - // C++11 auto def warn_cxx98_compat_auto_type_specifier : Warning< "'auto' type specifier is incompatible with C++98">, @@ -8313,6 +8309,9 @@ def ext_template_after_declarative_nns : ExtWarn< def ext_alias_template_in_declarative_nns : ExtWarn< "a declarative nested name specifier cannot name an alias template">, InGroup>; +def err_computed_type_in_declarative_nns : Error< + "a %select{pack indexing|'decltype'}0 specifier cannot be used in " + "a declarative nested name specifier">; def err_no_typeid_with_fno_rtti : Error< "use of typeid requires -frtti">; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 5a23179dfbbf4..a4699d6ba2c73 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6335,16 +6335,15 @@ bool Sema::diagnoseQualifiedDeclaration(CXXScopeSpec &SS, DeclContext *DC, if (TST->isDependentType() && TST->isTypeAlias()) Diag(Loc, diag::ext_alias_template_in_declarative_nns) << SpecLoc.getLocalSourceRange(); - } else if (T->isDecltypeType()) { + } else if (T->isDecltypeType() || T->getAsAdjusted()) { // C++23 [expr.prim.id.qual]p2: // [...] A declarative nested-name-specifier shall not have a - // decltype-specifier. + // computed-type-specifier. // - // FIXME: This wording appears to be defective as it does not forbid - // declarative nested-name-specifiers with pack-index-specifiers. - // See https://github.com/cplusplus/CWG/issues/499. - Diag(Loc, diag::err_decltype_in_declarator) - << SpecLoc.getTypeLoc().getSourceRange(); + // CWG2858 changed this from 'decltype-specifier' to + // 'computed-type-specifier'. + Diag(Loc, diag::err_computed_type_in_declarative_nns) + << T->isDecltypeType() << SpecLoc.getTypeLoc().getSourceRange(); } } } while ((SpecLoc = SpecLoc.getPrefix())); diff --git a/clang/test/CXX/dcl.decl/dcl.meaning/p1-0x.cpp b/clang/test/CXX/dcl.decl/dcl.meaning/p1-0x.cpp index fbe9c0895aeae..13be079a40bc3 100644 --- a/clang/test/CXX/dcl.decl/dcl.meaning/p1-0x.cpp +++ b/clang/test/CXX/dcl.decl/dcl.meaning/p1-0x.cpp @@ -6,8 +6,8 @@ class foo { void func(); }; -int decltype(foo())::i; // expected-error{{'decltype' cannot be used to name a declaration}} -void decltype(foo())::func() { // expected-error{{'decltype' cannot be used to name a declaration}} +int decltype(foo())::i; // expected-error{{a 'decltype' specifier cannot be used in a declarative nested name specifier}} +void decltype(foo())::func() { // expected-error{{a 'decltype' specifier cannot be used in a declarative nested name specifier}} } diff --git a/clang/test/CXX/drs/dr28xx.cpp b/clang/test/CXX/drs/dr28xx.cpp index 7f72003d66f1e..9b21d3410a049 100644 --- a/clang/test/CXX/drs/dr28xx.cpp +++ b/clang/test/CXX/drs/dr28xx.cpp @@ -58,3 +58,26 @@ void B::g() requires true; #endif } // namespace dr2847 + +namespace dr2858 { // dr2858: 19 + +#if __cplusplus > 202302L + +template +struct A { + // FIXME: The nested-name-specifier in the following friend declarations are declarative, + // but we don't treat them as such (yet). + friend void Ts...[0]::f(); + template + friend void Ts...[0]::g(); + + friend struct Ts...[0]::B; + // FIXME: The index of the pack-index-specifier is printed as a memory address in the diagnostic. + template + friend struct Ts...[0]::C; + // expected-warning-re@-1 {{dependent nested name specifier 'Ts...[{{.*}}]::' for friend template declaration is not supported; ignoring this friend declaration}} +}; + +#endif + +} // namespace dr2858 diff --git a/clang/test/Parser/cxx-class.cpp b/clang/test/Parser/cxx-class.cpp index 046d2dd580f02..c90c7e030a8bd 100644 --- a/clang/test/Parser/cxx-class.cpp +++ b/clang/test/Parser/cxx-class.cpp @@ -59,14 +59,14 @@ typedef union { } y; } bug3177; -// check that we don't consume the token after the access specifier +// check that we don't consume the token after the access specifier // when it's not a colon class D { public // expected-error{{expected ':'}} int i; }; -// consume the token after the access specifier if it's a semicolon +// consume the token after the access specifier if it's a semicolon // that was meant to be a colon class E { public; // expected-error{{expected ':'}} @@ -281,7 +281,7 @@ struct A {} ::PR41192::a; // ok, no missing ';' here expected-warning {{extra q #if __cplusplus >= 201103L struct C; struct D { static C c; }; -struct C {} decltype(D())::c; // expected-error {{'decltype' cannot be used to name a declaration}} +struct C {} decltype(D())::c; // expected-error {{a 'decltype' specifier cannot be used in a declarative nested name specifier}} #endif } From 298ea9bfd50ca41c77e45065700df06adb6264ae Mon Sep 17 00:00:00 2001 From: Raghu Maddhipatla <7686592+raghavendhra@users.noreply.github.com> Date: Thu, 11 Apr 2024 10:26:54 -0500 Subject: [PATCH 147/886] [Flang] [OpenMP] [MLIR] [Lowering] Add lowering support for IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses on OMP TARGET directive. (#88206) Added lowering support for IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses for OMP TARGET directive and added related tests for these changes. IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses apply to OMP TARGET directive OpenMP spec states The **is_device_ptr** clause indicates that its list items are device pointers. The **has_device_addr** clause indicates that its list items already have device addresses and therefore they may be directly accessed from a target device. Whereas USE_DEVICE_PTR and USE_DEVICE_ADDR clauses apply to OMP TARGET DATA directive and OpenMP spec for them states Each list item in the **use_device_ptr** clause results in a new list item that is a device pointer that refers to a device address Each list item in a **use_device_addr** clause that is present in the device data environment is treated as if it is implicitly mapped by a map clause on the construct with a map-type of alloc Fixed build error caused by Squash merge which needs rebase --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 29 +++++++++++++ flang/lib/Lower/OpenMP/ClauseProcessor.h | 12 ++++++ flang/lib/Lower/OpenMP/OpenMP.cpp | 22 +++++++--- flang/test/Lower/OpenMP/FIR/target.f90 | 43 ++++++++++++++++++- .../Dialect/OpenMP/OpenMPClauseOperands.h | 16 ++++--- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 18 ++++++-- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 9 ++-- mlir/test/Dialect/OpenMP/invalid.mlir | 2 +- mlir/test/Dialect/OpenMP/ops.mlir | 8 ++-- 9 files changed, 136 insertions(+), 23 deletions(-) diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 0a57a1496289f..fb24c8d1fe3eb 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -751,6 +751,20 @@ bool ClauseProcessor::processDepend( }); } +bool ClauseProcessor::processHasDeviceAddr( + llvm::SmallVectorImpl &operands, + llvm::SmallVectorImpl &isDeviceTypes, + llvm::SmallVectorImpl &isDeviceLocs, + llvm::SmallVectorImpl &isDeviceSymbols) + const { + return findRepeatableClause( + [&](const omp::clause::HasDeviceAddr &devAddrClause, + const Fortran::parser::CharBlock &) { + addUseDeviceClause(converter, devAddrClause.v, operands, isDeviceTypes, + isDeviceLocs, isDeviceSymbols); + }); +} + bool ClauseProcessor::processIf( omp::clause::If::DirectiveNameModifier directiveName, mlir::Value &result) const { @@ -771,6 +785,20 @@ bool ClauseProcessor::processIf( return found; } +bool ClauseProcessor::processIsDevicePtr( + llvm::SmallVectorImpl &operands, + llvm::SmallVectorImpl &isDeviceTypes, + llvm::SmallVectorImpl &isDeviceLocs, + llvm::SmallVectorImpl &isDeviceSymbols) + const { + return findRepeatableClause( + [&](const omp::clause::IsDevicePtr &devPtrClause, + const Fortran::parser::CharBlock &) { + addUseDeviceClause(converter, devPtrClause.v, operands, isDeviceTypes, + isDeviceLocs, isDeviceSymbols); + }); +} + bool ClauseProcessor::processLink( llvm::SmallVectorImpl &result) const { return findRepeatableClause( @@ -993,6 +1021,7 @@ bool ClauseProcessor::processUseDevicePtr( useDeviceLocs, useDeviceSymbols); }); } + } // namespace omp } // namespace lower } // namespace Fortran diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index d31d6a5c20623..df8f4f5310fcb 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -66,6 +66,12 @@ class ClauseProcessor { bool processDeviceType(mlir::omp::DeclareTargetDeviceType &result) const; bool processFinal(Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const; + bool + processHasDeviceAddr(llvm::SmallVectorImpl &operands, + llvm::SmallVectorImpl &isDeviceTypes, + llvm::SmallVectorImpl &isDeviceLocs, + llvm::SmallVectorImpl + &isDeviceSymbols) const; bool processHint(mlir::IntegerAttr &result) const; bool processMergeable(mlir::UnitAttr &result) const; bool processNowait(mlir::UnitAttr &result) const; @@ -104,6 +110,12 @@ class ClauseProcessor { bool processIf(omp::clause::If::DirectiveNameModifier directiveName, mlir::Value &result) const; bool + processIsDevicePtr(llvm::SmallVectorImpl &operands, + llvm::SmallVectorImpl &isDeviceTypes, + llvm::SmallVectorImpl &isDeviceLocs, + llvm::SmallVectorImpl + &isDeviceSymbols) const; + bool processLink(llvm::SmallVectorImpl &result) const; // This method is used to process a map clause. diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 340921c867246..50ad889052ab0 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1294,6 +1294,11 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, llvm::SmallVector mapSymTypes; llvm::SmallVector mapSymLocs; llvm::SmallVector mapSymbols; + llvm::SmallVector devicePtrOperands, deviceAddrOperands; + llvm::SmallVector devicePtrTypes, deviceAddrTypes; + llvm::SmallVector devicePtrLocs, deviceAddrLocs; + llvm::SmallVector devicePtrSymbols, + deviceAddrSymbols; ClauseProcessor cp(converter, semaCtx, clauseList); cp.processIf(llvm::omp::Directive::OMPD_target, ifClauseOperand); @@ -1303,11 +1308,15 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, cp.processNowait(nowaitAttr); cp.processMap(currentLocation, directive, stmtCtx, mapOperands, &mapSymTypes, &mapSymLocs, &mapSymbols); + cp.processIsDevicePtr(devicePtrOperands, devicePtrTypes, devicePtrLocs, + devicePtrSymbols); + cp.processHasDeviceAddr(deviceAddrOperands, deviceAddrTypes, deviceAddrLocs, + deviceAddrSymbols); - cp.processTODO( - currentLocation, llvm::omp::Directive::OMPD_target); + cp.processTODO(currentLocation, + llvm::omp::Directive::OMPD_target); // 5.8.1 Implicit Data-Mapping Attribute Rules // The following code follows the implicit data-mapping rules to map all the @@ -1400,7 +1409,8 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, ? nullptr : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), dependTypeOperands), - dependOperands, nowaitAttr, mapOperands); + dependOperands, nowaitAttr, devicePtrOperands, deviceAddrOperands, + mapOperands); genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSymTypes, mapSymLocs, mapSymbols, currentLocation); @@ -2059,6 +2069,8 @@ genOMP(Fortran::lower::AbstractConverter &converter, !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u) && + !std::get_if(&clause.u) && + !std::get_if(&clause.u) && !std::get_if(&clause.u) && !std::get_if(&clause.u)) { TODO(clauseLocation, "OpenMP Block construct clause"); diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90 index 821196b83c3b9..022327f9c25da 100644 --- a/flang/test/Lower/OpenMP/FIR/target.f90 +++ b/flang/test/Lower/OpenMP/FIR/target.f90 @@ -506,4 +506,45 @@ subroutine omp_target_parallel_do !CHECK: omp.terminator !CHECK: } !$omp end target parallel do - end subroutine omp_target_parallel_do +end subroutine omp_target_parallel_do + +!=============================================================================== +! Target `is_device_ptr` clause +!=============================================================================== + +!CHECK-LABEL: func.func @_QPomp_target_is_device_ptr() { +subroutine omp_target_is_device_ptr + use iso_c_binding, only : c_ptr, c_loc + !CHECK: %[[VAL_0:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = "a", uniq_name = "_QFomp_target_is_device_ptrEa"} + type(c_ptr) :: a + !CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "b", fir.target, uniq_name = "_QFomp_target_is_device_ptrEb"} + integer, target :: b + !CHECK: %[[MAP_0:.*]] = omp.map.info var_ptr(%[[DEV_PTR:.*]] : !fir.ref>, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) map_clauses(tofrom) capture(ByRef) -> !fir.ref> {name = "a"} + !CHECK: %[[MAP_1:.*]] = omp.map.info var_ptr(%[[VAL_0:.*]] : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "b"} + !CHECK: %[[MAP_2:.*]] = omp.map.info var_ptr(%[[DEV_PTR:.*]] : !fir.ref>, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByRef) -> !fir.ref> {name = "a"} + !CHECK: omp.target is_device_ptr(%[[DEV_PTR:.*]] : !fir.ref>) map_entries(%[[MAP_0:.*]] -> %[[ARG0:.*]], %[[MAP_1:.*]] -> %[[ARG1:.*]], %[[MAP_2:.*]] -> %[[ARG2:.*]] : !fir.ref>, !fir.ref, !fir.ref>) { + !CHECK: ^bb0(%[[ARG0]]: !fir.ref>, %[[ARG1]]: !fir.ref, %[[ARG2]]: !fir.ref>): + !$omp target map(tofrom: a,b) is_device_ptr(a) + !CHECK: {{.*}} = fir.coordinate_of %[[VAL_0:.*]], {{.*}} : (!fir.ref>, !fir.field) -> !fir.ref + a = c_loc(b) + !CHECK: omp.terminator + !$omp end target + !CHECK: } +end subroutine omp_target_is_device_ptr + + !=============================================================================== + ! Target `has_device_addr` clause + !=============================================================================== + + !CHECK-LABEL: func.func @_QPomp_target_has_device_addr() { + subroutine omp_target_has_device_addr + !CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box> {bindc_name = "a", uniq_name = "_QFomp_target_has_device_addrEa"} + integer, pointer :: a + !CHECK: omp.target has_device_addr(%[[VAL_0:.*]] : !fir.ref>>) map_entries({{.*}} -> {{.*}}, {{.*}} -> {{.*}} : !fir.llvm_ptr>, !fir.ref>>) { + !$omp target has_device_addr(a) + !CHECK: {{.*}} = fir.load %[[VAL_0:.*]] : !fir.ref>> + a = 10 + !CHECK: omp.terminator + !$omp end target + !CHECK: } +end subroutine omp_target_has_device_addr diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index 6454076f7593b..4ce7e47da046b 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -81,6 +81,9 @@ struct GrainsizeClauseOps { Value grainsizeVar; }; +struct HasDeviceAddrOps { + llvm::SmallVector hasDeviceAddrVars; +}; struct HintClauseOps { IntegerAttr hintAttr; }; @@ -94,6 +97,10 @@ struct InReductionClauseOps { llvm::SmallVector inReductionDeclSymbols; }; +struct IsDevicePtrOps { + llvm::SmallVector isDevicePtrVars; +}; + struct LinearClauseOps { llvm::SmallVector linearVars, linearStepVars; }; @@ -251,13 +258,12 @@ using SimdLoopClauseOps = using SingleClauseOps = detail::Clauses; -// TODO `defaultmap`, `has_device_addr`, `is_device_ptr`, `uses_allocators` -// clauses. +// TODO `defaultmap`, `uses_allocators` clauses. using TargetClauseOps = detail::Clauses; + HasDeviceAddrOps, IfClauseOps, InReductionClauseOps, + IsDevicePtrOps, MapClauseOps, NowaitClauseOps, + PrivateClauseOps, ReductionClauseOps, ThreadLimitClauseOps>; using TargetDataClauseOps = detail::Clauses; diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index a38a82f9cc607..2a8582be38331 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -1678,14 +1678,23 @@ def TargetOp : OpenMP_Op<"target", [IsolatedFromAbove, MapClauseOwningOpInterfac The optional $thread_limit specifies the limit on the number of threads - The optional $nowait elliminates the implicit barrier so the parent task can make progress + The optional $nowait eliminates the implicit barrier so the parent task can make progress even if the target task is not yet completed. The `depends` and `depend_vars` arguments are variadic lists of values that specify the dependencies of this particular target task in relation to other tasks. - TODO: is_device_ptr, defaultmap, in_reduction + The optional $is_device_ptr indicates list items are device pointers. + + The optional $has_device_addr indicates that list items already have device + addresses, so they may be directly accessed from the target device. This + includes array sections. + + The optional $map_operands maps data from the task’s environment to the + device environment. + + TODO: defaultmap, in_reduction }]; @@ -1695,8 +1704,9 @@ def TargetOp : OpenMP_Op<"target", [IsolatedFromAbove, MapClauseOwningOpInterfac OptionalAttr:$depends, Variadic:$depend_vars, UnitAttr:$nowait, + Variadic:$is_device_ptr, + Variadic:$has_device_addr, Variadic:$map_operands); - let regions = (region AnyRegion:$region); let builders = [ @@ -1708,6 +1718,8 @@ def TargetOp : OpenMP_Op<"target", [IsolatedFromAbove, MapClauseOwningOpInterfac | `device` `(` $device `:` type($device) `)` | `thread_limit` `(` $thread_limit `:` type($thread_limit) `)` | `nowait` $nowait + | `is_device_ptr` `(` $is_device_ptr `:` type($is_device_ptr) `)` + | `has_device_addr` `(` $has_device_addr `:` type($has_device_addr) `)` | `map_entries` `(` custom($map_operands, type($map_operands)) `)` | `depend` `(` custom($depend_vars, type($depend_vars), $depends) `)` ) $region attr-dict diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 543655338db8c..2d5b2231d2dd5 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1258,10 +1258,11 @@ void TargetOp::build(OpBuilder &builder, OperationState &state, // TODO Store clauses in op: allocateVars, allocatorVars, inReductionVars, // inReductionDeclSymbols, privateVars, privatizers, reductionVars, // reductionByRefAttr, reductionDeclSymbols. - TargetOp::build(builder, state, clauses.ifVar, clauses.deviceVar, - clauses.threadLimitVar, - makeArrayAttr(ctx, clauses.dependTypeAttrs), - clauses.dependVars, clauses.nowaitAttr, clauses.mapVars); + TargetOp::build( + builder, state, clauses.ifVar, clauses.deviceVar, clauses.threadLimitVar, + makeArrayAttr(ctx, clauses.dependTypeAttrs), clauses.dependVars, + clauses.nowaitAttr, clauses.isDevicePtrVars, clauses.hasDeviceAddrVars, + clauses.mapVars); } LogicalResult TargetOp::verify() { diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 1134db77d5baa..27a440b3f97ca 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -1809,7 +1809,7 @@ func.func @omp_target_depend(%data_var: memref) { // expected-error @below {{op expected as many depend values as depend variables}} "omp.target"(%data_var) ({ "omp.terminator"() : () -> () - }) {depends = [], operandSegmentSizes = array} : (memref) -> () + }) {depends = [], operandSegmentSizes = array} : (memref) -> () "func.return"() : () -> () } diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index e2c255c7a3ccc..ad5b74b84ac70 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -510,22 +510,22 @@ return // CHECK-LABEL: omp_target -func.func @omp_target(%if_cond : i1, %device : si32, %num_threads : i32, %map1: memref, %map2: memref) -> () { +func.func @omp_target(%if_cond : i1, %device : si32, %num_threads : i32, %device_ptr: memref, %device_addr: memref, %map1: memref, %map2: memref) -> () { // Test with optional operands; if_expr, device, thread_limit, private, firstprivate and nowait. // CHECK: omp.target if({{.*}}) device({{.*}}) thread_limit({{.*}}) nowait "omp.target"(%if_cond, %device, %num_threads) ({ // CHECK: omp.terminator omp.terminator - }) {nowait, operandSegmentSizes = array} : ( i1, si32, i32 ) -> () + }) {nowait, operandSegmentSizes = array} : ( i1, si32, i32 ) -> () // Test with optional map clause. // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_1:.*]] : memref, tensor) map_clauses(tofrom) capture(ByRef) -> memref {name = ""} // CHECK: %[[MAP_B:.*]] = omp.map.info var_ptr(%[[VAL_2:.*]] : memref, tensor) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref {name = ""} - // CHECK: omp.target map_entries(%[[MAP_A]] -> {{.*}}, %[[MAP_B]] -> {{.*}} : memref, memref) { + // CHECK: omp.target is_device_ptr(%[[VAL_4:.*]] : memref) has_device_addr(%[[VAL_5:.*]] : memref) map_entries(%[[MAP_A]] -> {{.*}}, %[[MAP_B]] -> {{.*}} : memref, memref) { %mapv1 = omp.map.info var_ptr(%map1 : memref, tensor) map_clauses(tofrom) capture(ByRef) -> memref {name = ""} %mapv2 = omp.map.info var_ptr(%map2 : memref, tensor) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref {name = ""} - omp.target map_entries(%mapv1 -> %arg0, %mapv2 -> %arg1 : memref, memref) { + omp.target map_entries(%mapv1 -> %arg0, %mapv2 -> %arg1 : memref, memref) is_device_ptr(%device_ptr : memref) has_device_addr(%device_addr : memref) { ^bb0(%arg0: memref, %arg1: memref): omp.terminator } From ffb5bea2be9f966a39f243a7d8c2f48a1343cb4c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 11 Apr 2024 16:33:05 +0100 Subject: [PATCH 148/886] [X86] LowerBITREVERSE - support SSE-only GFNI i32/i64 bitreverse Support Tremont CPUs which don't have AVX but do have GFNI. Noticed while trying to workout how to clean up the costmodel for GFNI bitreverse --- llvm/lib/Target/X86/X86ISelLowering.cpp | 17 ++++---- llvm/test/CodeGen/X86/vector-bitreverse.ll | 46 ++++------------------ 2 files changed, 15 insertions(+), 48 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 52be35aafb0f5..f274da6f6f776 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1286,6 +1286,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTLZ, VT, Custom); } + if (Subtarget.hasGFNI()) { + setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); + setOperationAction(ISD::BITREVERSE, MVT::i64, Custom); + } + // These might be better off as horizontal vector ops. setOperationAction(ISD::ADD, MVT::i16, Custom); setOperationAction(ISD::ADD, MVT::i32, Custom); @@ -1496,11 +1501,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::TRUNCATE, MVT::v32i32, Custom); setOperationAction(ISD::TRUNCATE, MVT::v32i64, Custom); - if (Subtarget.hasGFNI()) { - setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); - setOperationAction(ISD::BITREVERSE, MVT::i64, Custom); - } - for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::CTPOP, VT, Custom); @@ -31337,12 +31337,9 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, if (VT.is256BitVector() && !Subtarget.hasInt256()) return splitVectorIntUnary(Op, DAG, DL); - // Lower i32/i64 to GFNI as vXi8 BITREVERSE + BSWAP + // Lower i32/i64 as vXi8 BITREVERSE + BSWAP if (!VT.isVector()) { - - assert((VT.getScalarType() == MVT::i32) || - (VT.getScalarType() == MVT::i64)); - + assert((VT == MVT::i32 || VT == MVT::i64) && "Only tested for i32/i64"); MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits()); SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In); Res = DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8, diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll index 1c5326d35bb00..b22b508db8b28 100644 --- a/llvm/test/CodeGen/X86/vector-bitreverse.ll +++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll @@ -254,24 +254,10 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind { ; ; GFNISSE-LABEL: test_bitreverse_i32: ; GFNISSE: # %bb.0: -; GFNISSE-NEXT: # kill: def $edi killed $edi def $rdi -; GFNISSE-NEXT: bswapl %edi -; GFNISSE-NEXT: movl %edi, %eax -; GFNISSE-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F -; GFNISSE-NEXT: shll $4, %eax -; GFNISSE-NEXT: shrl $4, %edi -; GFNISSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; GFNISSE-NEXT: orl %eax, %edi -; GFNISSE-NEXT: movl %edi, %eax -; GFNISSE-NEXT: andl $858993459, %eax # imm = 0x33333333 -; GFNISSE-NEXT: shrl $2, %edi -; GFNISSE-NEXT: andl $858993459, %edi # imm = 0x33333333 -; GFNISSE-NEXT: leal (%rdi,%rax,4), %eax -; GFNISSE-NEXT: movl %eax, %ecx -; GFNISSE-NEXT: andl $1431655765, %ecx # imm = 0x55555555 -; GFNISSE-NEXT: shrl %eax -; GFNISSE-NEXT: andl $1431655765, %eax # imm = 0x55555555 -; GFNISSE-NEXT: leal (%rax,%rcx,2), %eax +; GFNISSE-NEXT: movd %edi, %xmm0 +; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSE-NEXT: movd %xmm0, %eax +; GFNISSE-NEXT: bswapl %eax ; GFNISSE-NEXT: retq ; ; GFNIAVX-LABEL: test_bitreverse_i32: @@ -343,26 +329,10 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind { ; ; GFNISSE-LABEL: test_bitreverse_i64: ; GFNISSE: # %bb.0: -; GFNISSE-NEXT: bswapq %rdi -; GFNISSE-NEXT: movq %rdi, %rax -; GFNISSE-NEXT: shrq $4, %rax -; GFNISSE-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F -; GFNISSE-NEXT: andq %rcx, %rax -; GFNISSE-NEXT: andq %rcx, %rdi -; GFNISSE-NEXT: shlq $4, %rdi -; GFNISSE-NEXT: orq %rax, %rdi -; GFNISSE-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 -; GFNISSE-NEXT: movq %rdi, %rcx -; GFNISSE-NEXT: andq %rax, %rcx -; GFNISSE-NEXT: shrq $2, %rdi -; GFNISSE-NEXT: andq %rax, %rdi -; GFNISSE-NEXT: leaq (%rdi,%rcx,4), %rax -; GFNISSE-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 -; GFNISSE-NEXT: movq %rax, %rdx -; GFNISSE-NEXT: andq %rcx, %rdx -; GFNISSE-NEXT: shrq %rax -; GFNISSE-NEXT: andq %rcx, %rax -; GFNISSE-NEXT: leaq (%rax,%rdx,2), %rax +; GFNISSE-NEXT: movq %rdi, %xmm0 +; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSE-NEXT: movq %xmm0, %rax +; GFNISSE-NEXT: bswapq %rax ; GFNISSE-NEXT: retq ; ; GFNIAVX-LABEL: test_bitreverse_i64: From 7ab7e7a55f3fce08ccd3cbcae94dabe99dd9e94a Mon Sep 17 00:00:00 2001 From: Xu Zhang Date: Thu, 11 Apr 2024 23:49:59 +0800 Subject: [PATCH 149/886] [libc][docs] Generate docs for signal.h & optimized is_implemented func (#88028) Fixes #87835 This patch added the documentation for the POSIX functions according to [n3096](https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3096.pdf) Section 7.14, and gives the *docgen.py* script a more elegant *is_implemented* function. --- libc/docs/index.rst | 1 + libc/docs/signal.rst | 43 +++++++++++++++++++++++++++++++++++ libc/utils/docgen/docgen.py | 13 +++++++---- libc/utils/docgen/signal.json | 29 +++++++++++++++++++++++ 4 files changed, 82 insertions(+), 4 deletions(-) create mode 100644 libc/docs/signal.rst create mode 100644 libc/utils/docgen/signal.json diff --git a/libc/docs/index.rst b/libc/docs/index.rst index 8470c8d9287c2..11d5ae197d718 100644 --- a/libc/docs/index.rst +++ b/libc/docs/index.rst @@ -70,6 +70,7 @@ stages there is no ABI stability in any form. libc_search c23 ctype + signal .. toctree:: :hidden: diff --git a/libc/docs/signal.rst b/libc/docs/signal.rst new file mode 100644 index 0000000000000..7903bb439cb33 --- /dev/null +++ b/libc/docs/signal.rst @@ -0,0 +1,43 @@ +.. include:: check.rst + +signal.h Functions +================== + +.. list-table:: + :widths: auto + :align: center + :header-rows: 1 + + * - Function + - Implemented + - Standard + * - kill + - |check| + - + * - raise + - |check| + - 7.14.2.1 + * - sigaction + - |check| + - + * - sigaddset + - |check| + - + * - sigaltstack + - |check| + - + * - sigdelset + - |check| + - + * - sigemptyset + - |check| + - + * - sigfillset + - |check| + - + * - signal + - |check| + - 7.14.1.1 + * - sigprocmask + - |check| + - diff --git a/libc/utils/docgen/docgen.py b/libc/utils/docgen/docgen.py index 7411b4506f082..36eb409421b4f 100755 --- a/libc/utils/docgen/docgen.py +++ b/libc/utils/docgen/docgen.py @@ -23,12 +23,17 @@ def load_api(hname: str) -> Dict: # TODO: we may need to get more sophisticated for less generic implementations. # Does libc/src/{hname minus .h suffix}/{fname}.cpp exist? def is_implemented(hname: str, fname: str) -> bool: - return Path( + path = Path( Path(__file__).parent.parent.parent, "src", - hname.rstrip(".h"), - fname + ".cpp", - ).exists() + hname.rstrip(".h") + ) + # Recursively search for the target source file in the subdirectories under + # libc/src/{hname}. + for _ in path.glob("**/" + fname + ".cpp"): + return True + + return False def print_functions(header: str, functions: Dict): diff --git a/libc/utils/docgen/signal.json b/libc/utils/docgen/signal.json new file mode 100644 index 0000000000000..976021a803a67 --- /dev/null +++ b/libc/utils/docgen/signal.json @@ -0,0 +1,29 @@ +{ + "macros": [ + "SIG_DFL", + "SIG_ERR", + "SIG_IGN", + "SIGABRT", + "SIGFPE", + "SIGILL", + "SIGINT", + "SIGSEGV", + "SIGTERM" + ], + "functions": { + "kill": null, + "sigaction": null, + "sigaddset": null, + "sigaltstack": null, + "sigdelset": null, + "sigemptyset": null, + "sigfillset": null, + "sigprocmask": null, + "signal": { + "defined": "7.14.1.1" + }, + "raise": { + "defined": "7.14.2.1" + } + } +} From b63fe0d72e2df3b3c4b9fcb91aea07b2582be195 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 11 Apr 2024 17:55:25 +0200 Subject: [PATCH 150/886] [libc++][NFC] Reduce the memory footprint of __copy_cv a bit (#87718) Instead of instantiating `__copy_cv` for every combination of `_From` and `_To` this only instantiates `__copy_cv` for every `_From` type, reducing the number of instantiations. --- libcxx/include/__type_traits/copy_cv.h | 28 +++++++++++++++----------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/libcxx/include/__type_traits/copy_cv.h b/libcxx/include/__type_traits/copy_cv.h index b1c057ff778b1..d482cb42bffed 100644 --- a/libcxx/include/__type_traits/copy_cv.h +++ b/libcxx/include/__type_traits/copy_cv.h @@ -19,28 +19,32 @@ _LIBCPP_BEGIN_NAMESPACE_STD // Let COPYCV(FROM, TO) be an alias for type TO with the addition of FROM's // top-level cv-qualifiers. -template +template struct __copy_cv { - using type = _To; + template + using __apply = _To; }; -template -struct __copy_cv { - using type = const _To; +template +struct __copy_cv { + template + using __apply = const _To; }; -template -struct __copy_cv { - using type = volatile _To; +template +struct __copy_cv { + template + using __apply = volatile _To; }; -template -struct __copy_cv { - using type = const volatile _To; +template +struct __copy_cv { + template + using __apply = const volatile _To; }; template -using __copy_cv_t = typename __copy_cv<_From, _To>::type; +using __copy_cv_t = typename __copy_cv<_From>::template __apply<_To>; _LIBCPP_END_NAMESPACE_STD From 72f9881c3ffcf4be6361c3e4312d91c9c8d94a98 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 11 Apr 2024 17:09:07 +0100 Subject: [PATCH 151/886] [libclc] Refactor build system to allow in-tree builds (#87622) The previous build system was adding custom "OpenCL" and "LLVM IR" languages in CMake to build the builtin libraries. This was making it harder to build in-tree because the tool binaries needed to be present at configure time. This commit refactors the build system to use custom commands to build the bytecode files one by one, and link them all together into the final bytecode library. It also enables in-tree builds by aliasing the clang/llvm-link/etc. tool targets to internal targets, which are imported from the LLVM installation directory when building out of tree. Diffing (with llvm-diff) all of the final bytecode libraries in an out-of-tree configuration against those built using the current tip system shows no changes. Note that there are textual changes to metadata IDs which confuse regular diff, and that llvm-diff 14 and below may show false-positives. This commit also removes a file listed in one of the SOURCEs which didn't exist and which was preventing the use of ENABLE_RUNTIME_SUBNORMAL when configuring CMake. --- libclc/CMakeLists.txt | 277 +++++++++++------- libclc/cmake/CMakeCLCCompiler.cmake.in | 9 - libclc/cmake/CMakeCLCInformation.cmake | 12 - libclc/cmake/CMakeDetermineCLCCompiler.cmake | 18 -- .../cmake/CMakeDetermineLLAsmCompiler.cmake | 24 -- libclc/cmake/CMakeLLAsmCompiler.cmake.in | 10 - libclc/cmake/CMakeLLAsmInformation.cmake | 12 - libclc/cmake/CMakeTestCLCCompiler.cmake | 56 ---- libclc/cmake/CMakeTestLLAsmCompiler.cmake | 56 ---- libclc/cmake/modules/AddLibclc.cmake | 156 ++++++++++ libclc/generic/lib/SOURCES | 1 - llvm/tools/CMakeLists.txt | 3 + 12 files changed, 322 insertions(+), 312 deletions(-) delete mode 100644 libclc/cmake/CMakeCLCCompiler.cmake.in delete mode 100644 libclc/cmake/CMakeCLCInformation.cmake delete mode 100644 libclc/cmake/CMakeDetermineCLCCompiler.cmake delete mode 100644 libclc/cmake/CMakeDetermineLLAsmCompiler.cmake delete mode 100644 libclc/cmake/CMakeLLAsmCompiler.cmake.in delete mode 100644 libclc/cmake/CMakeLLAsmInformation.cmake delete mode 100644 libclc/cmake/CMakeTestCLCCompiler.cmake delete mode 100644 libclc/cmake/CMakeTestLLAsmCompiler.cmake create mode 100644 libclc/cmake/modules/AddLibclc.cmake diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index c6e3cdf23fe0c..7528228b3b7f9 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -4,6 +4,15 @@ project( libclc VERSION 0.2.0 LANGUAGES CXX C) set(CMAKE_CXX_STANDARD 17) +# Add path for custom modules +list( INSERT CMAKE_MODULE_PATH 0 "${PROJECT_SOURCE_DIR}/cmake/modules" ) + +set( LIBCLC_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ) +set( LIBCLC_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} ) +set( LIBCLC_OBJFILE_DIR ${LIBCLC_BINARY_DIR}/obj.libclc.dir ) + +include( AddLibclc ) + include( GNUInstallDirs ) set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS amdgcn-amdhsa/lib/SOURCES; @@ -27,31 +36,51 @@ set( LIBCLC_TARGETS_TO_BUILD "all" option( ENABLE_RUNTIME_SUBNORMAL "Enable runtime linking of subnormal support." OFF ) -find_package(LLVM REQUIRED HINTS "${LLVM_CMAKE_DIR}") -include(AddLLVM) +if( LIBCLC_STANDALONE_BUILD OR CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) + # Out-of-tree configuration + set( LIBCLC_STANDALONE_BUILD TRUE ) -message( STATUS "libclc LLVM version: ${LLVM_PACKAGE_VERSION}" ) + find_package(LLVM REQUIRED HINTS "${LLVM_CMAKE_DIR}") + include(AddLLVM) -if( LLVM_PACKAGE_VERSION VERSION_LESS LIBCLC_MIN_LLVM ) - message( FATAL_ERROR "libclc needs at least LLVM ${LIBCLC_MIN_LLVM}" ) -endif() + message( STATUS "libclc LLVM version: ${LLVM_PACKAGE_VERSION}" ) -find_program( LLVM_CLANG clang PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH ) -find_program( LLVM_AS llvm-as PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH ) -find_program( LLVM_LINK llvm-link PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH ) -find_program( LLVM_OPT opt PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH ) -find_program( LLVM_SPIRV llvm-spirv PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH ) + if( LLVM_PACKAGE_VERSION VERSION_LESS LIBCLC_MIN_LLVM ) + message( FATAL_ERROR "libclc needs at least LLVM ${LIBCLC_MIN_LLVM}" ) + endif() + + # Import required tools as targets + foreach( tool clang llvm-as llvm-link opt ) + find_program( LLVM_TOOL_${tool} ${tool} PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH ) + add_executable( libclc::${tool} IMPORTED GLOBAL ) + set_target_properties( libclc::${tool} PROPERTIES IMPORTED_LOCATION ${LLVM_TOOL_${tool}} ) + endforeach() +else() + # In-tree configuration + set( LIBCLC_STANDALONE_BUILD FALSE ) + + set( LLVM_PACKAGE_VERSION ${LLVM_VERSION} ) -# Print toolchain -message( STATUS "libclc toolchain - clang: ${LLVM_CLANG}" ) -message( STATUS "libclc toolchain - llvm-as: ${LLVM_AS}" ) -message( STATUS "libclc toolchain - llvm-link: ${LLVM_LINK}" ) -message( STATUS "libclc toolchain - opt: ${LLVM_OPT}" ) -message( STATUS "libclc toolchain - llvm-spirv: ${LLVM_SPIRV}" ) -if( NOT LLVM_CLANG OR NOT LLVM_OPT OR NOT LLVM_AS OR NOT LLVM_LINK ) + # Note that we check this later (for both build types) but we can provide a + # more useful error message when built in-tree. We assume that LLVM tools are + # always available so don't warn here. + if( NOT clang IN_LIST LLVM_ENABLE_PROJECTS ) + message(FATAL_ERROR "Clang is not enabled, but is required to build libclc in-tree") + endif() + + foreach( tool clang llvm-as llvm-link opt ) + add_executable(libclc::${tool} ALIAS ${tool}) + endforeach() +endif() + +if( NOT TARGET libclc::clang OR NOT TARGET libclc::opt + OR NOT TARGET libclc::llvm-as OR NOT TARGET libclc::llvm-link ) message( FATAL_ERROR "libclc toolchain incomplete!" ) endif() +# llvm-spirv is an optional dependency, used to build spirv-* targets. +find_program( LLVM_SPIRV llvm-spirv PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH ) + # List of all targets. Note that some are added dynamically below. set( LIBCLC_TARGETS_ALL amdgcn-- @@ -90,24 +119,9 @@ if( "spirv-mesa3d-" IN_LIST LIBCLC_TARGETS_TO_BUILD OR "spirv64-mesa3d-" IN_LIST endif() endif() -set( CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ) -set( CMAKE_CLC_COMPILER ${LLVM_CLANG} ) -set( CMAKE_CLC_ARCHIVE ${LLVM_LINK} ) -set( CMAKE_LLAsm_PREPROCESSOR ${LLVM_CLANG} ) -set( CMAKE_LLAsm_COMPILER ${LLVM_AS} ) -set( CMAKE_LLAsm_ARCHIVE ${LLVM_LINK} ) - # Construct LLVM version define set( LLVM_VERSION_DEFINE "-DHAVE_LLVM=0x${LLVM_VERSION_MAJOR}0${LLVM_VERSION_MINOR}" ) - -# LLVM 13 enables standard includes by default -if( LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 13.0.0 ) - set( CMAKE_LLAsm_FLAGS "${CMAKE_LLAsm_FLAGS} -cl-no-stdinc" ) - set( CMAKE_CLC_FLAGS "${CMAKE_CLC_FLAGS} -cl-no-stdinc" ) -endif() - -enable_language( CLC LLAsm ) # This needs to be set before any target that needs it # We need to use LLVM_INCLUDE_DIRS here, because if we are linking to an # llvm build directory, this includes $src/llvm/include which is where all the @@ -122,7 +136,7 @@ set(LLVM_LINK_COMPONENTS IRReader Support ) -if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) +if( LIBCLC_STANDALONE_BUILD ) add_llvm_executable( prepare_builtins utils/prepare-builtins.cpp ) else() add_llvm_utility( prepare_builtins utils/prepare-builtins.cpp ) @@ -167,12 +181,14 @@ install( FILES ${CMAKE_CURRENT_BINARY_DIR}/libclc.pc DESTINATION "${CMAKE_INSTAL install( DIRECTORY generic/include/clc DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" ) if( ENABLE_RUNTIME_SUBNORMAL ) - add_library( subnormal_use_default STATIC - generic/lib/subnormal_use_default.ll ) - add_library( subnormal_disable STATIC - generic/lib/subnormal_disable.ll ) - install( TARGETS subnormal_use_default subnormal_disable ARCHIVE - DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) + foreach( file subnormal_use_default subnormal_disable ) + link_bc( + TARGET ${file} + INPUTS ${PROJECT_SOURCE_DIR}/generic/lib/${file}.ll + ) + install( FILES $ ARCHIVE + DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) + endforeach() endif() find_package( Python3 REQUIRED COMPONENTS Interpreter ) @@ -232,19 +248,19 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) # Add the generated convert.cl here to prevent adding the one listed in # SOURCES + set( objects ) # A "set" of already-added input files + set( rel_files ) # Source directory input files, relative to the root dir + set( gen_files ) # Generated binary input files, relative to the binary dir if( NOT ${ARCH} STREQUAL "spirv" AND NOT ${ARCH} STREQUAL "spirv64" ) if( NOT ENABLE_RUNTIME_SUBNORMAL AND NOT ${ARCH} STREQUAL "clspv" AND NOT ${ARCH} STREQUAL "clspv64" ) - set( rel_files convert.cl ) - set( objects convert.cl ) + list( APPEND gen_files convert.cl ) + list( APPEND objects convert.cl ) list( APPEND rel_files generic/lib/subnormal_use_default.ll ) elseif(${ARCH} STREQUAL "clspv" OR ${ARCH} STREQUAL "clspv64") - set( rel_files clspv-convert.cl ) - set( objects clspv-convert.cl ) + list( APPEND gen_files clspv-convert.cl ) + list( APPEND objects clspv-convert.cl ) endif() - else() - set( rel_files ) - set( objects ) endif() foreach( l ${source_list} ) @@ -252,46 +268,35 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) string( REPLACE "\n" ";" file_list ${file_list} ) get_filename_component( dir ${l} DIRECTORY ) foreach( f ${file_list} ) - list( FIND objects ${f} found ) - if( found EQUAL -1 ) + # Only add each file once, so that targets can 'specialize' builtins + if( NOT ${f} IN_LIST objects ) list( APPEND objects ${f} ) list( APPEND rel_files ${dir}/${f} ) - # FIXME: This should really go away - file( TO_CMAKE_PATH ${PROJECT_SOURCE_DIR}/${dir}/${f} src_loc ) - get_filename_component( fdir ${src_loc} DIRECTORY ) - - set_source_files_properties( ${dir}/${f} - PROPERTIES COMPILE_FLAGS "-I ${fdir}" ) endif() endforeach() endforeach() foreach( d ${${t}_devices} ) - # Some targets don't have a specific GPU to target - if( ${d} STREQUAL "none" OR ${ARCH} STREQUAL "spirv" OR ${ARCH} STREQUAL "spirv64" ) - set( mcpu ) - set( arch_suffix "${t}" ) - else() - set( mcpu "-mcpu=${d}" ) - set( arch_suffix "${d}-${t}" ) + get_libclc_device_info( + TRIPLE ${t} + DEVICE ${d} + CPU cpu + ARCH_SUFFIX arch_suffix + CLANG_TRIPLE clang_triple + ) + + set( mcpu ) + if( NOT "${cpu}" STREQUAL "" ) + set( mcpu "-mcpu=${cpu}" ) endif() + message( STATUS " device: ${d} ( ${${d}_aliases} )" ) - if ( ${ARCH} STREQUAL "spirv" OR ${ARCH} STREQUAL "spirv64" ) - if( ${ARCH} STREQUAL "spirv" ) - set( t "spir--" ) - else() - set( t "spir64--" ) - endif() + if ( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 ) set( build_flags -O0 -finline-hint-functions ) set( opt_flags ) set( spvflags --spirv-max-version=1.1 ) - elseif( ${ARCH} STREQUAL "clspv" ) - set( t "spir--" ) - set( build_flags "-Wno-unknown-assumption") - set( opt_flags -O3 ) - elseif( ${ARCH} STREQUAL "clspv64" ) - set( t "spir64--" ) + elseif( ARCH STREQUAL clspv OR ARCH STREQUAL clspv64 ) set( build_flags "-Wno-unknown-assumption") set( opt_flags -O3 ) else() @@ -299,53 +304,97 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) set( opt_flags -O3 ) endif() + set( LIBCLC_ARCH_OBJFILE_DIR "${LIBCLC_OBJFILE_DIR}/${arch_suffix}" ) + file( MAKE_DIRECTORY ${LIBCLC_ARCH_OBJFILE_DIR} ) + + string( TOUPPER "CLC_${ARCH}" CLC_TARGET_DEFINE ) + + list( APPEND build_flags + -D__CLC_INTERNAL + -D${CLC_TARGET_DEFINE} + -I${PROJECT_SOURCE_DIR}/generic/include + # FIXME: Fix libclc to not require disabling this noisy warning + -Wno-bitwise-conditional-parentheses + ) + + set( bytecode_files "" ) + foreach( file IN LISTS gen_files rel_files ) + # We need to take each file and produce an absolute input file, as well + # as a unique architecture-specific output file. We deal with a mix of + # different input files, which makes this trickier. + if( ${file} IN_LIST gen_files ) + # Generated files are given just as file names, which we must make + # absolute to the binary directory. + set( input_file ${CMAKE_CURRENT_BINARY_DIR}/${file} ) + set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${file}.o" ) + else() + # Other files are originally relative to each SOURCE file, which are + # then make relative to the libclc root directory. We must normalize + # the path (e.g., ironing out any ".."), then make it relative to the + # root directory again, and use that relative path component for the + # binary path. + get_filename_component( abs_path ${file} ABSOLUTE BASE_DIR ${PROJECT_SOURCE_DIR} ) + file( RELATIVE_PATH root_rel_path ${PROJECT_SOURCE_DIR} ${abs_path} ) + set( input_file ${PROJECT_SOURCE_DIR}/${file} ) + set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${root_rel_path}.o" ) + endif() + + get_filename_component( file_dir ${file} DIRECTORY ) + + compile_to_bc( + TRIPLE ${clang_triple} + INPUT ${input_file} + OUTPUT ${output_file} + EXTRA_OPTS "${mcpu}" -fno-builtin -nostdlib + "${build_flags}" -I${PROJECT_SOURCE_DIR}/${file_dir} + ) + list( APPEND bytecode_files ${output_file} ) + endforeach() + set( builtins_link_lib_tgt builtins.link.${arch_suffix} ) - add_library( ${builtins_link_lib_tgt} STATIC ${rel_files} ) - # Make sure we depend on the pseudo target to prevent - # multiple invocations - add_dependencies( ${builtins_link_lib_tgt} generate_convert.cl ) - add_dependencies( ${builtins_link_lib_tgt} clspv-generate_convert.cl ) - # CMake will turn this include into absolute path - target_include_directories( ${builtins_link_lib_tgt} PRIVATE - "generic/include" ) - target_compile_definitions( ${builtins_link_lib_tgt} PRIVATE - "__CLC_INTERNAL" ) - string( TOUPPER "-DCLC_${ARCH}" CLC_TARGET_DEFINE ) - target_compile_definitions( ${builtins_link_lib_tgt} PRIVATE - ${CLC_TARGET_DEFINE} ) - target_compile_options( ${builtins_link_lib_tgt} PRIVATE -target - ${t} ${mcpu} -fno-builtin -nostdlib ${build_flags} ) - set_target_properties( ${builtins_link_lib_tgt} PROPERTIES - LINKER_LANGUAGE CLC ) - - set( obj_suffix ${arch_suffix}.bc ) - set( builtins_opt_lib_tgt builtins.opt.${obj_suffix} ) - - # Add opt target - add_custom_command( OUTPUT ${builtins_opt_lib_tgt} - COMMAND ${LLVM_OPT} ${opt_flags} -o ${builtins_opt_lib_tgt} - $ - DEPENDS ${builtins_link_lib_tgt} ) - add_custom_target( "opt.${obj_suffix}" ALL - DEPENDS ${builtins_opt_lib_tgt} ) - - if( ${ARCH} STREQUAL "spirv" OR ${ARCH} STREQUAL "spirv64" ) + + link_bc( + TARGET ${builtins_link_lib_tgt} + INPUTS ${bytecode_files} + ) + + set( builtins_link_lib $ ) + + if( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 ) set( spv_suffix ${arch_suffix}.spv ) - add_custom_command( OUTPUT "${spv_suffix}" - COMMAND ${LLVM_SPIRV} ${spvflags} -o "${spv_suffix}" $ - DEPENDS ${builtins_link_lib_tgt} ) + add_custom_command( OUTPUT ${spv_suffix} + COMMAND ${LLVM_SPIRV} ${spvflags} -o ${spv_suffix} ${builtins_link_lib} + DEPENDS ${builtins_link_lib_tgt} + ) add_custom_target( "prepare-${spv_suffix}" ALL DEPENDS "${spv_suffix}" ) install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${spv_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) else() + set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} ) + + # Add opt target + add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc + COMMAND libclc::opt ${opt_flags} -o ${builtins_opt_lib_tgt}.bc + ${builtins_link_lib} + DEPENDS libclc::opt ${builtins_link_lib_tgt} + ) + add_custom_target( ${builtins_opt_lib_tgt} + ALL DEPENDS ${builtins_opt_lib_tgt}.bc + ) + set_target_properties( ${builtins_opt_lib_tgt} + PROPERTIES TARGET_FILE ${builtins_opt_lib_tgt}.bc + ) + # Add prepare target - add_custom_command( OUTPUT "${obj_suffix}" - COMMAND prepare_builtins -o "${obj_suffix}" ${builtins_opt_lib_tgt} - DEPENDS "opt.${obj_suffix}" ${builtins_opt_lib_tgt} prepare_builtins ) - add_custom_target( "prepare-${obj_suffix}" ALL DEPENDS "${obj_suffix}" ) + set( obj_suffix ${arch_suffix}.bc ) + add_custom_command( OUTPUT ${obj_suffix} + COMMAND prepare_builtins -o ${obj_suffix} + $ + DEPENDS ${builtins_opt_lib_tgt} prepare_builtins ) + add_custom_target( prepare-${obj_suffix} ALL DEPENDS ${obj_suffix} ) # nvptx-- targets don't include workitem builtins - if( NOT ${t} MATCHES ".*ptx.*--$" ) + if( NOT clang_triple MATCHES ".*ptx.*--$" ) add_test( NAME external-calls-${obj_suffix} COMMAND ./check_external_calls.sh ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} ${LLVM_TOOLS_BINARY_DIR} WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} ) @@ -353,10 +402,10 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) foreach( a ${${d}_aliases} ) - set( alias_suffix "${a}-${t}.bc" ) + set( alias_suffix "${a}-${clang_triple}.bc" ) add_custom_target( ${alias_suffix} ALL COMMAND ${CMAKE_COMMAND} -E create_symlink ${obj_suffix} ${alias_suffix} - DEPENDS "prepare-${obj_suffix}" ) + DEPENDS prepare-${obj_suffix} ) install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${alias_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) endforeach( a ) endif() diff --git a/libclc/cmake/CMakeCLCCompiler.cmake.in b/libclc/cmake/CMakeCLCCompiler.cmake.in deleted file mode 100644 index 2730b83d9e7d0..0000000000000 --- a/libclc/cmake/CMakeCLCCompiler.cmake.in +++ /dev/null @@ -1,9 +0,0 @@ -set(CMAKE_CLC_COMPILER "@CMAKE_CLC_COMPILER@") -set(CMAKE_CLC_COMPILER_LOADED 1) - -set(CMAKE_CLC_SOURCE_FILE_EXTENSIONS cl) -set(CMAKE_CLC_OUTPUT_EXTENSION .bc) -set(CMAKE_CLC_OUTPUT_EXTENSION_REPLACE 1) -set(CMAKE_STATIC_LIBRARY_PREFIX_CLC "") -set(CMAKE_STATIC_LIBRARY_SUFFIX_CLC ".bc") -set(CMAKE_CLC_COMPILER_ENV_VAR "CLC_COMPILER") diff --git a/libclc/cmake/CMakeCLCInformation.cmake b/libclc/cmake/CMakeCLCInformation.cmake deleted file mode 100644 index 95327e4439722..0000000000000 --- a/libclc/cmake/CMakeCLCInformation.cmake +++ /dev/null @@ -1,12 +0,0 @@ -if(NOT CMAKE_CLC_COMPILE_OBJECT) - set(CMAKE_CLC_COMPILE_OBJECT - " -o -c -emit-llvm") -endif() - -if(NOT CMAKE_CLC_CREATE_STATIC_LIBRARY) - set(CMAKE_CLC_CREATE_STATIC_LIBRARY - " -o ") -endif() - -set(CMAKE_INCLUDE_FLAG_CLC "-I") -set(CMAKE_DEPFILE_FLAGS_CLC "-MD -MT -MF ") diff --git a/libclc/cmake/CMakeDetermineCLCCompiler.cmake b/libclc/cmake/CMakeDetermineCLCCompiler.cmake deleted file mode 100644 index 94d85d9e666ad..0000000000000 --- a/libclc/cmake/CMakeDetermineCLCCompiler.cmake +++ /dev/null @@ -1,18 +0,0 @@ -include(${CMAKE_ROOT}/Modules/CMakeDetermineCompiler.cmake) - -if(NOT CMAKE_CLC_COMPILER) - find_program(CMAKE_CLC_COMPILER NAMES clang) -endif() -mark_as_advanced(CMAKE_CLC_COMPILER) - -if(NOT CMAKE_CLC_ARCHIVE) - find_program(CMAKE_CLC_ARCHIVE NAMES llvm-link) -endif() -mark_as_advanced(CMAKE_CLC_ARCHIVE) - -set(CMAKE_CLC_COMPILER_ENV_VAR "CLC_COMPILER") -set(CMAKE_CLC_ARCHIVE_ENV_VAR "CLC_LINKER") -find_file(clc_comp_in CMakeCLCCompiler.cmake.in PATHS ${CMAKE_ROOT}/Modules ${CMAKE_MODULE_PATH}) -# configure all variables set in this file -configure_file(${clc_comp_in} ${CMAKE_PLATFORM_INFO_DIR}/CMakeCLCCompiler.cmake @ONLY) -mark_as_advanced(clc_comp_in) diff --git a/libclc/cmake/CMakeDetermineLLAsmCompiler.cmake b/libclc/cmake/CMakeDetermineLLAsmCompiler.cmake deleted file mode 100644 index 1c424c79cb453..0000000000000 --- a/libclc/cmake/CMakeDetermineLLAsmCompiler.cmake +++ /dev/null @@ -1,24 +0,0 @@ -include(${CMAKE_ROOT}/Modules/CMakeDetermineCompiler.cmake) - -if(NOT CMAKE_LLAsm_PREPROCESSOR) - find_program(CMAKE_LLAsm_PREPROCESSOR NAMES clang) -endif() -mark_as_advanced(CMAKE_LLAsm_PREPROCESSOR) - -if(NOT CMAKE_LLAsm_COMPILER) - find_program(CMAKE_LLAsm_COMPILER NAMES llvm-as) -endif() -mark_as_advanced(CMAKE_LLAsm_ASSEMBLER) - -if(NOT CMAKE_LLAsm_ARCHIVE) - find_program(CMAKE_LLAsm_ARCHIVE NAMES llvm-link) -endif() -mark_as_advanced(CMAKE_LLAsm_ARCHIVE) - -set(CMAKE_LLAsm_PREPROCESSOR_ENV_VAR "LL_PREPROCESSOR") -set(CMAKE_LLAsm_COMPILER_ENV_VAR "LL_ASSEMBLER") -set(CMAKE_LLAsm_ARCHIVE_ENV_VAR "LL_LINKER") -find_file(ll_comp_in CMakeLLAsmCompiler.cmake.in PATHS ${CMAKE_ROOT}/Modules ${CMAKE_MODULE_PATH}) -# configure all variables set in this file -configure_file(${ll_comp_in} ${CMAKE_PLATFORM_INFO_DIR}/CMakeLLAsmCompiler.cmake @ONLY) -mark_as_advanced(ll_comp_in) diff --git a/libclc/cmake/CMakeLLAsmCompiler.cmake.in b/libclc/cmake/CMakeLLAsmCompiler.cmake.in deleted file mode 100644 index 2b00f69234dd2..0000000000000 --- a/libclc/cmake/CMakeLLAsmCompiler.cmake.in +++ /dev/null @@ -1,10 +0,0 @@ -set(CMAKE_LLAsm_PREPROCESSOR "@CMAKE_LLAsm_PREPROCESSOR@") -set(CMAKE_LLAsm_COMPILER "@CMAKE_LLAsm_COMPILER@") -set(CMAKE_LLAsm_ARCHIVE "@CMAKE_LLAsm_ARCHIVE@") -set(CMAKE_LLAsm_COMPILER_LOADED 1) - -set(CMAKE_LLAsm_SOURCE_FILE_EXTENSIONS ll) -set(CMAKE_LLAsm_OUTPUT_EXTENSION .bc) -set(CMAKE_LLAsm_OUTPUT_EXTENSION_REPLACE 1) -set(CMAKE_STATIC_LIBRARY_PREFIX_LLAsm "") -set(CMAKE_STATIC_LIBRARY_SUFFIX_LLAsm ".bc") diff --git a/libclc/cmake/CMakeLLAsmInformation.cmake b/libclc/cmake/CMakeLLAsmInformation.cmake deleted file mode 100644 index 35ec3081da0f7..0000000000000 --- a/libclc/cmake/CMakeLLAsmInformation.cmake +++ /dev/null @@ -1,12 +0,0 @@ -if(NOT CMAKE_LLAsm_COMPILE_OBJECT) - set(CMAKE_LLAsm_COMPILE_OBJECT - "${CMAKE_LLAsm_PREPROCESSOR} -E -P -x cl -o .temp" - " -o .temp") -endif() - -if(NOT CMAKE_LLAsm_CREATE_STATIC_LIBRARY) - set(CMAKE_LLAsm_CREATE_STATIC_LIBRARY - " -o ") -endif() - -set(CMAKE_INCLUDE_FLAG_LLAsm "-I") diff --git a/libclc/cmake/CMakeTestCLCCompiler.cmake b/libclc/cmake/CMakeTestCLCCompiler.cmake deleted file mode 100644 index 869fcc3d01ab0..0000000000000 --- a/libclc/cmake/CMakeTestCLCCompiler.cmake +++ /dev/null @@ -1,56 +0,0 @@ -if(CMAKE_CLC_COMPILER_FORCED) - # The compiler configuration was forced by the user. - # Assume the user has configured all compiler information. - set(CMAKE_CLC_COMPILER_WORKS TRUE) - return() -endif() - -include(CMakeTestCompilerCommon) - -# Remove any cached result from an older CMake version. -# We now store this in CMakeCCompiler.cmake. -unset(CMAKE_CLC_COMPILER_WORKS CACHE) - -# This file is used by EnableLanguage in cmGlobalGenerator to -# determine that that selected CLC compiler can actually compile -# and link the most basic of programs. If not, a fatal error -# is set and cmake stops processing commands and will not generate -# any makefiles or projects. -if(NOT CMAKE_CLC_COMPILER_WORKS) - PrintTestCompilerStatus("CLC" "") - file(WRITE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/testCLCCompiler.cl - "__kernel void test_k(global int * a)\n" - "{ *a = 1; }\n") - try_compile(CMAKE_CLC_COMPILER_WORKS ${CMAKE_BINARY_DIR} - ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/testCLCCompiler.cl - # We never generate executable so bypass the link step - CMAKE_FLAGS -DCMAKE_CLC_LINK_EXECUTABLE='echo' - OUTPUT_VARIABLE __CMAKE_CLC_COMPILER_OUTPUT) - # Move result from cache to normal variable. - set(CMAKE_CLC_COMPILER_WORKS ${CMAKE_CLC_COMPILER_WORKS}) - unset(CMAKE_CLC_COMPILER_WORKS CACHE) - set(CLC_TEST_WAS_RUN 1) -endif() - -if(NOT CMAKE_CLC_COMPILER_WORKS) - PrintTestCompilerStatus("CLC" " -- broken") - file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log - "Determining if the CLC compiler works failed with " - "the following output:\n${__CMAKE_CLC_COMPILER_OUTPUT}\n\n") - message(FATAL_ERROR "The CLC compiler \"${CMAKE_CLC_COMPILER}\" " - "is not able to compile a simple test program.\nIt fails " - "with the following output:\n ${__CMAKE_CLC_COMPILER_OUTPUT}\n\n" - "CMake will not be able to correctly generate this project.") -else() - if(CLC_TEST_WAS_RUN) - PrintTestCompilerStatus("CLC" " -- works") - file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log - "Determining if the CLC compiler works passed with " - "the following output:\n${__CMAKE_CLC_COMPILER_OUTPUT}\n\n") - endif() - - include(${CMAKE_PLATFORM_INFO_DIR}/CMakeCLCCompiler.cmake) - -endif() - -unset(__CMAKE_CLC_COMPILER_OUTPUT) diff --git a/libclc/cmake/CMakeTestLLAsmCompiler.cmake b/libclc/cmake/CMakeTestLLAsmCompiler.cmake deleted file mode 100644 index 35948ee07a940..0000000000000 --- a/libclc/cmake/CMakeTestLLAsmCompiler.cmake +++ /dev/null @@ -1,56 +0,0 @@ -if(CMAKE_LLAsm_COMPILER_FORCED) - # The compiler configuration was forced by the user. - # Assume the user has configured all compiler information. - set(CMAKE_LLAsm_COMPILER_WORKS TRUE) - return() -endif() - -include(CMakeTestCompilerCommon) - -# Remove any cached result from an older CMake version. -# We now store this in CMakeCCompiler.cmake. -unset(CMAKE_LLAsm_COMPILER_WORKS CACHE) - -# This file is used by EnableLanguage in cmGlobalGenerator to -# determine that that selected llvm assembler can actually compile -# and link the most basic of programs. If not, a fatal error -# is set and cmake stops processing commands and will not generate -# any makefiles or projects. -if(NOT CMAKE_LLAsm_COMPILER_WORKS) - PrintTestCompilerStatus("LLAsm" "") - file(WRITE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/testLLAsmCompiler.ll - "define i32 @test() {\n" - "ret i32 0 }\n" ) - try_compile(CMAKE_LLAsm_COMPILER_WORKS ${CMAKE_BINARY_DIR} - ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/testLLAsmCompiler.ll - # We never generate executable so bypass the link step - CMAKE_FLAGS -DCMAKE_LLAsm_LINK_EXECUTABLE='echo' - OUTPUT_VARIABLE __CMAKE_LLAsm_COMPILER_OUTPUT) - # Move result from cache to normal variable. - set(CMAKE_LLAsm_COMPILER_WORKS ${CMAKE_LLAsm_COMPILER_WORKS}) - unset(CMAKE_LLAsm_COMPILER_WORKS CACHE) - set(LLAsm_TEST_WAS_RUN 1) -endif() - -if(NOT CMAKE_LLAsm_COMPILER_WORKS) - PrintTestCompilerStatus("LLAsm" " -- broken") - file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log - "Determining if the LLAsm compiler works failed with " - "the following output:\n${__CMAKE_LLAsm_COMPILER_OUTPUT}\n\n") - message(FATAL_ERROR "The LLAsm compiler \"${CMAKE_LLAsm_COMPILER}\" " - "is not able to compile a simple test program.\nIt fails " - "with the following output:\n ${__CMAKE_LLAsm_COMPILER_OUTPUT}\n\n" - "CMake will not be able to correctly generate this project.") -else() - if(LLAsm_TEST_WAS_RUN) - PrintTestCompilerStatus("LLAsm" " -- works") - file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log - "Determining if the LLAsm compiler works passed with " - "the following output:\n${__CMAKE_LLAsm_COMPILER_OUTPUT}\n\n") - endif() - - include(${CMAKE_PLATFORM_INFO_DIR}/CMakeLLAsmCompiler.cmake) - -endif() - -unset(__CMAKE_LLAsm_COMPILER_OUTPUT) diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake new file mode 100644 index 0000000000000..5e09cde8035c2 --- /dev/null +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -0,0 +1,156 @@ +# Compiles an OpenCL C - or assembles an LL file - to bytecode +# +# Arguments: +# * TRIPLE +# Target triple for which to compile the bytecode file. +# * INPUT +# File to compile/assemble to bytecode +# * OUTPUT +# Bytecode file to generate +# * EXTRA_OPTS ... +# List of compiler options to use. Note that some are added by default. +# * DEPENDENCIES ... +# List of extra dependencies to inject +# +# Depends on the libclc::clang and libclc::llvm-as targets for compiling and +# assembling, respectively. +function(compile_to_bc) + cmake_parse_arguments(ARG + "" + "TRIPLE;INPUT;OUTPUT" + "EXTRA_OPTS;DEPENDENCIES" + ${ARGN} + ) + + # If this is an LLVM IR file (identified soley by its file suffix), + # pre-process it with clang to a temp file, then assemble that to bytecode. + set( TMP_SUFFIX ) + get_filename_component( FILE_EXT ${ARG_INPUT} EXT ) + if( NOT ${FILE_EXT} STREQUAL ".ll" ) + # Pass '-c' when not running the preprocessor + set( PP_OPTS -c ) + else() + set( PP_OPTS -E;-P ) + set( TMP_SUFFIX .tmp ) + endif() + + set( TARGET_ARG ) + if( ARG_TRIPLE ) + set( TARGET_ARG "-target" ${ARG_TRIPLE} ) + endif() + + add_custom_command( + OUTPUT ${ARG_OUTPUT}${TMP_SUFFIX} + COMMAND libclc::clang + ${TARGET_ARG} + ${PP_OPTS} + ${ARG_EXTRA_OPTS} + -MD -MF ${ARG_OUTPUT}.d -MT ${ARG_OUTPUT}${TMP_SUFFIX} + # LLVM 13 enables standard includes by default - we don't want + # those when pre-processing IR. We disable it unconditionally. + $<$:-cl-no-stdinc> + -emit-llvm + -o ${ARG_OUTPUT}${TMP_SUFFIX} + -x cl + ${ARG_INPUT} + DEPENDS + libclc::clang + ${ARG_INPUT} + ${ARG_DEPENDENCIES} + DEPFILE ${ARG_OUTPUT}.d + ) + + if( ${FILE_EXT} STREQUAL ".ll" ) + add_custom_command( + OUTPUT ${ARG_OUTPUT} + COMMAND libclc::llvm-as -o ${ARG_OUTPUT} ${ARG_OUTPUT}${TMP_SUFFIX} + DEPENDS libclc::llvm-as ${ARG_OUTPUT}${TMP_SUFFIX} + ) + endif() +endfunction() + +# Links together one or more bytecode files +# +# Arguments: +# * TARGET +# Custom target to create +# * INPUT ... +# List of bytecode files to link together +function(link_bc) + cmake_parse_arguments(ARG + "" + "TARGET" + "INPUTS" + ${ARGN} + ) + + add_custom_command( + OUTPUT ${ARG_TARGET}.bc + COMMAND libclc::llvm-link -o ${ARG_TARGET}.bc ${ARG_INPUTS} + DEPENDS libclc::llvm-link ${ARG_INPUTS} + ) + + add_custom_target( ${ARG_TARGET} ALL DEPENDS ${ARG_TARGET}.bc ) + set_target_properties( ${ARG_TARGET} PROPERTIES TARGET_FILE ${ARG_TARGET}.bc ) +endfunction() + +# Decomposes and returns variables based on a libclc triple and architecture +# combination. Returns data via one or more optional output variables. +# +# Arguments: +# * TRIPLE +# libclc target triple to query +# * DEVICE +# libclc device to query +# +# Optional Arguments: +# * CPU +# Variable name to be set to the target CPU +# * ARCH_SUFFIX +# Variable name to be set to the triple/architecture suffix +# * CLANG_TRIPLE +# Variable name to be set to the normalized clang triple +function(get_libclc_device_info) + cmake_parse_arguments(ARG + "" + "TRIPLE;DEVICE;CPU;ARCH_SUFFIX;CLANG_TRIPLE" + "" + ${ARGN} + ) + + if( NOT ARG_TRIPLE OR NOT ARG_DEVICE ) + message( FATAL_ERROR "Must provide both TRIPLE and DEVICE" ) + endif() + + string( REPLACE "-" ";" TRIPLE ${ARG_TRIPLE} ) + list( GET TRIPLE 0 ARCH ) + + # Some targets don't have a specific device architecture to target + if( ARG_DEVICE STREQUAL none OR ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 ) + set( cpu ) + set( arch_suffix "${ARG_TRIPLE}" ) + else() + set( cpu "${ARG_DEVICE}" ) + set( arch_suffix "${ARG_DEVICE}-${ARG_TRIPLE}" ) + endif() + + if( ARG_CPU ) + set( ${ARG_CPU} ${cpu} PARENT_SCOPE ) + endif() + + if( ARG_ARCH_SUFFIX ) + set( ${ARG_ARCH_SUFFIX} ${arch_suffix} PARENT_SCOPE ) + endif() + + # Some libclc targets are not real clang triples: return their canonical + # triples. + if( ARCH STREQUAL spirv OR ARCH STREQUAL clspv ) + set( ARG_TRIPLE "spir--" ) + elseif( ARCH STREQUAL spirv64 OR ARCH STREQUAL clspv64 ) + set( ARG_TRIPLE "spir64--" ) + endif() + + if( ARG_CLANG_TRIPLE ) + set( ${ARG_CLANG_TRIPLE} ${ARG_TRIPLE} PARENT_SCOPE ) + endif() +endfunction() diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index ee2736b5fbc57..579e909e53d46 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -48,7 +48,6 @@ cl_khr_int64_extended_atomics/atom_max.cl cl_khr_int64_extended_atomics/atom_min.cl cl_khr_int64_extended_atomics/atom_or.cl cl_khr_int64_extended_atomics/atom_xor.cl -convert.cl common/degrees.cl common/mix.cl common/radians.cl diff --git a/llvm/tools/CMakeLists.txt b/llvm/tools/CMakeLists.txt index cde57367934e4..db66dad5dc0db 100644 --- a/llvm/tools/CMakeLists.txt +++ b/llvm/tools/CMakeLists.txt @@ -52,6 +52,9 @@ add_llvm_implicit_projects() add_llvm_external_project(polly) +# libclc depends on clang +add_llvm_external_project(libclc) + # Add subprojects specified using LLVM_EXTERNAL_PROJECTS foreach(p ${LLVM_EXTERNAL_PROJECTS}) add_llvm_external_project(${p}) From 0d96422768908a8235f05a5d3b1d43ecc6038004 Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Thu, 11 Apr 2024 09:30:48 -0700 Subject: [PATCH 152/886] [Object][Wasm] Move wasm Object tests into their own directory (NFC) (#81072) --- .../{wasm-bad-data-symbol.yaml => Wasm/bad-data-symbol.yaml} | 0 .../bad-metadata-version.yaml} | 0 llvm/test/Object/Wasm/bad-reloc-type.test | 3 +++ .../{wasm-bad-relocation.yaml => Wasm/bad-relocation.yaml} | 0 llvm/test/Object/Wasm/bad-symbol-type.test | 3 +++ .../{wasm-duplicate-name.test => Wasm/duplicate-name.test} | 0 .../Object/{wasm-invalid-file.yaml => Wasm/invalid-file.yaml} | 0 .../invalid-section-order.test} | 2 +- .../{wasm-invalid-start.test => Wasm/invalid-start.test} | 0 .../linked-namesec-with-linkingsec.yaml} | 0 .../linked-symbol-table.yaml} | 0 llvm/test/Object/Wasm/missing-version.test | 2 ++ .../{wasm-obj2yaml-tables.test => Wasm/obj2yaml-tables.test} | 2 +- .../relocs-and-producers.yaml} | 0 llvm/test/Object/Wasm/string-outside-section.test | 3 +++ llvm/test/Object/wasm-bad-reloc-type.test | 3 --- llvm/test/Object/wasm-bad-symbol-type.test | 3 --- llvm/test/Object/wasm-missing-version.test | 2 -- llvm/test/Object/wasm-string-outside-section.test | 3 --- 19 files changed, 13 insertions(+), 13 deletions(-) rename llvm/test/Object/{wasm-bad-data-symbol.yaml => Wasm/bad-data-symbol.yaml} (100%) rename llvm/test/Object/{wasm-bad-metadata-version.yaml => Wasm/bad-metadata-version.yaml} (100%) create mode 100644 llvm/test/Object/Wasm/bad-reloc-type.test rename llvm/test/Object/{wasm-bad-relocation.yaml => Wasm/bad-relocation.yaml} (100%) create mode 100644 llvm/test/Object/Wasm/bad-symbol-type.test rename llvm/test/Object/{wasm-duplicate-name.test => Wasm/duplicate-name.test} (100%) rename llvm/test/Object/{wasm-invalid-file.yaml => Wasm/invalid-file.yaml} (100%) rename llvm/test/Object/{wasm-invalid-section-order.test => Wasm/invalid-section-order.test} (82%) rename llvm/test/Object/{wasm-invalid-start.test => Wasm/invalid-start.test} (100%) rename llvm/test/Object/{wasm-linked-namesec-with-linkingsec.yaml => Wasm/linked-namesec-with-linkingsec.yaml} (100%) rename llvm/test/Object/{wasm-linked-symbol-table.yaml => Wasm/linked-symbol-table.yaml} (100%) create mode 100644 llvm/test/Object/Wasm/missing-version.test rename llvm/test/Object/{wasm-obj2yaml-tables.test => Wasm/obj2yaml-tables.test} (98%) rename llvm/test/Object/{wasm-relocs-and-producers.yaml => Wasm/relocs-and-producers.yaml} (100%) create mode 100644 llvm/test/Object/Wasm/string-outside-section.test delete mode 100644 llvm/test/Object/wasm-bad-reloc-type.test delete mode 100644 llvm/test/Object/wasm-bad-symbol-type.test delete mode 100644 llvm/test/Object/wasm-missing-version.test delete mode 100644 llvm/test/Object/wasm-string-outside-section.test diff --git a/llvm/test/Object/wasm-bad-data-symbol.yaml b/llvm/test/Object/Wasm/bad-data-symbol.yaml similarity index 100% rename from llvm/test/Object/wasm-bad-data-symbol.yaml rename to llvm/test/Object/Wasm/bad-data-symbol.yaml diff --git a/llvm/test/Object/wasm-bad-metadata-version.yaml b/llvm/test/Object/Wasm/bad-metadata-version.yaml similarity index 100% rename from llvm/test/Object/wasm-bad-metadata-version.yaml rename to llvm/test/Object/Wasm/bad-metadata-version.yaml diff --git a/llvm/test/Object/Wasm/bad-reloc-type.test b/llvm/test/Object/Wasm/bad-reloc-type.test new file mode 100644 index 0000000000000..4e210271c0b93 --- /dev/null +++ b/llvm/test/Object/Wasm/bad-reloc-type.test @@ -0,0 +1,3 @@ +RUN: not llvm-objdump -s %p/../Inputs/WASM/bad-reloc-type.wasm 2>&1 | FileCheck %s + +CHECK: invalid relocation type: 63 diff --git a/llvm/test/Object/wasm-bad-relocation.yaml b/llvm/test/Object/Wasm/bad-relocation.yaml similarity index 100% rename from llvm/test/Object/wasm-bad-relocation.yaml rename to llvm/test/Object/Wasm/bad-relocation.yaml diff --git a/llvm/test/Object/Wasm/bad-symbol-type.test b/llvm/test/Object/Wasm/bad-symbol-type.test new file mode 100644 index 0000000000000..5b9770f18bb62 --- /dev/null +++ b/llvm/test/Object/Wasm/bad-symbol-type.test @@ -0,0 +1,3 @@ +RUN: not llvm-objdump -s %p/../Inputs/WASM/bad-symbol-type.wasm 2>&1 | FileCheck %s + +CHECK: invalid symbol type: 63 diff --git a/llvm/test/Object/wasm-duplicate-name.test b/llvm/test/Object/Wasm/duplicate-name.test similarity index 100% rename from llvm/test/Object/wasm-duplicate-name.test rename to llvm/test/Object/Wasm/duplicate-name.test diff --git a/llvm/test/Object/wasm-invalid-file.yaml b/llvm/test/Object/Wasm/invalid-file.yaml similarity index 100% rename from llvm/test/Object/wasm-invalid-file.yaml rename to llvm/test/Object/Wasm/invalid-file.yaml diff --git a/llvm/test/Object/wasm-invalid-section-order.test b/llvm/test/Object/Wasm/invalid-section-order.test similarity index 82% rename from llvm/test/Object/wasm-invalid-section-order.test rename to llvm/test/Object/Wasm/invalid-section-order.test index 9a67f09150e5e..93cf9eb57059c 100644 --- a/llvm/test/Object/wasm-invalid-section-order.test +++ b/llvm/test/Object/Wasm/invalid-section-order.test @@ -1,4 +1,4 @@ -# RUN: not obj2yaml %p/Inputs/WASM/invalid-section-order.wasm 2>&1 | FileCheck %s +# RUN: not obj2yaml %p/../Inputs/WASM/invalid-section-order.wasm 2>&1 | FileCheck %s # CHECK: {{.*}}: out of order section type: 10 # Inputs/WASM/invalid-section-order.wasm is generated from this ll file, by diff --git a/llvm/test/Object/wasm-invalid-start.test b/llvm/test/Object/Wasm/invalid-start.test similarity index 100% rename from llvm/test/Object/wasm-invalid-start.test rename to llvm/test/Object/Wasm/invalid-start.test diff --git a/llvm/test/Object/wasm-linked-namesec-with-linkingsec.yaml b/llvm/test/Object/Wasm/linked-namesec-with-linkingsec.yaml similarity index 100% rename from llvm/test/Object/wasm-linked-namesec-with-linkingsec.yaml rename to llvm/test/Object/Wasm/linked-namesec-with-linkingsec.yaml diff --git a/llvm/test/Object/wasm-linked-symbol-table.yaml b/llvm/test/Object/Wasm/linked-symbol-table.yaml similarity index 100% rename from llvm/test/Object/wasm-linked-symbol-table.yaml rename to llvm/test/Object/Wasm/linked-symbol-table.yaml diff --git a/llvm/test/Object/Wasm/missing-version.test b/llvm/test/Object/Wasm/missing-version.test new file mode 100644 index 0000000000000..754658e9d0ddc --- /dev/null +++ b/llvm/test/Object/Wasm/missing-version.test @@ -0,0 +1,2 @@ +# RUN: not llvm-objdump -h %p/../Inputs/WASM/missing-version.wasm 2>&1 | FileCheck %s +# CHECK: {{.*}}: missing version number diff --git a/llvm/test/Object/wasm-obj2yaml-tables.test b/llvm/test/Object/Wasm/obj2yaml-tables.test similarity index 98% rename from llvm/test/Object/wasm-obj2yaml-tables.test rename to llvm/test/Object/Wasm/obj2yaml-tables.test index 870ffd179a1c2..877238437b010 100644 --- a/llvm/test/Object/wasm-obj2yaml-tables.test +++ b/llvm/test/Object/Wasm/obj2yaml-tables.test @@ -1,4 +1,4 @@ -RUN: obj2yaml %p/Inputs/WASM/multi-table.wasm | FileCheck %s +RUN: obj2yaml %p/../Inputs/WASM/multi-table.wasm | FileCheck %s # CHECK: - Type: TABLE diff --git a/llvm/test/Object/wasm-relocs-and-producers.yaml b/llvm/test/Object/Wasm/relocs-and-producers.yaml similarity index 100% rename from llvm/test/Object/wasm-relocs-and-producers.yaml rename to llvm/test/Object/Wasm/relocs-and-producers.yaml diff --git a/llvm/test/Object/Wasm/string-outside-section.test b/llvm/test/Object/Wasm/string-outside-section.test new file mode 100644 index 0000000000000..31f4a6080d9bd --- /dev/null +++ b/llvm/test/Object/Wasm/string-outside-section.test @@ -0,0 +1,3 @@ +RUN: not --crash llvm-objdump -s %p/../Inputs/WASM/string-outside-section.wasm 2>&1 | FileCheck %s + +CHECK: LLVM ERROR: EOF while reading string diff --git a/llvm/test/Object/wasm-bad-reloc-type.test b/llvm/test/Object/wasm-bad-reloc-type.test deleted file mode 100644 index 000acbd55fce5..0000000000000 --- a/llvm/test/Object/wasm-bad-reloc-type.test +++ /dev/null @@ -1,3 +0,0 @@ -RUN: not llvm-objdump -s %p/Inputs/WASM/bad-reloc-type.wasm 2>&1 | FileCheck %s - -CHECK: invalid relocation type: 63 diff --git a/llvm/test/Object/wasm-bad-symbol-type.test b/llvm/test/Object/wasm-bad-symbol-type.test deleted file mode 100644 index 4b7c30ad8b738..0000000000000 --- a/llvm/test/Object/wasm-bad-symbol-type.test +++ /dev/null @@ -1,3 +0,0 @@ -RUN: not llvm-objdump -s %p/Inputs/WASM/bad-symbol-type.wasm 2>&1 | FileCheck %s - -CHECK: invalid symbol type: 63 diff --git a/llvm/test/Object/wasm-missing-version.test b/llvm/test/Object/wasm-missing-version.test deleted file mode 100644 index beb0b5e9105bf..0000000000000 --- a/llvm/test/Object/wasm-missing-version.test +++ /dev/null @@ -1,2 +0,0 @@ -# RUN: not llvm-objdump -h %p/Inputs/WASM/missing-version.wasm 2>&1 | FileCheck %s -# CHECK: {{.*}}: missing version number diff --git a/llvm/test/Object/wasm-string-outside-section.test b/llvm/test/Object/wasm-string-outside-section.test deleted file mode 100644 index 3fa6217bae8e3..0000000000000 --- a/llvm/test/Object/wasm-string-outside-section.test +++ /dev/null @@ -1,3 +0,0 @@ -RUN: not --crash llvm-objdump -s %p/Inputs/WASM/string-outside-section.wasm 2>&1 | FileCheck %s - -CHECK: LLVM ERROR: EOF while reading string From 3c4b673af05f53e8a4d1a382b5c86367ea512c9e Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 11 Apr 2024 12:36:56 -0400 Subject: [PATCH 153/886] [libc++] Fix -Wgnu-include-next in stddef.h (#88214) As reported in #86843, we must have #pragma GCC system_header before we use #include_next, otherwise the compiler may not understand that we're in a system header and may issue a diagnostic for our usage of --- libcxx/include/stddef.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libcxx/include/stddef.h b/libcxx/include/stddef.h index 470b5408336c6..1583e78e3739b 100644 --- a/libcxx/include/stddef.h +++ b/libcxx/include/stddef.h @@ -26,6 +26,10 @@ #include <__config> +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + // Note: This include is outside of header guards because we sometimes get included multiple times // with different defines and the underlying will know how to deal with that. #include_next @@ -33,10 +37,6 @@ #ifndef _LIBCPP_STDDEF_H # define _LIBCPP_STDDEF_H -# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -# endif - # ifdef __cplusplus typedef decltype(nullptr) nullptr_t; # endif From 357f6c7826437f6527db6f99f756a34fb5e0f716 Mon Sep 17 00:00:00 2001 From: abidh Date: Thu, 11 Apr 2024 17:53:25 +0100 Subject: [PATCH 154/886] [flang] Add design document for debug info generation. (#86939) This document discusses some options where the debug metadata can be generated. It also goes through various language constructs and explains how the debug metadata will look like for that construct and how we can extract that information. The real point of discussion is how and where to extract the information about various language features to generate the debug metadata. The structure of the metadata itself is mostly settled as that is dictated by the DWARF and structure of LLVM IR metadata. The classic flang and gfortran generate quite similar DWARF for the various language constructs. This document is based on what Kiran posted in https://reviews.llvm.org/D138534. --------- Co-authored-by: Tom Eccles Co-authored-by: Kiran Chandramohan --- flang/docs/DebugGeneration.md | 442 ++++++++++++++++++++++++++++++++++ flang/docs/index.md | 1 + 2 files changed, 443 insertions(+) create mode 100644 flang/docs/DebugGeneration.md diff --git a/flang/docs/DebugGeneration.md b/flang/docs/DebugGeneration.md new file mode 100644 index 0000000000000..9409d7e07b104 --- /dev/null +++ b/flang/docs/DebugGeneration.md @@ -0,0 +1,442 @@ +# Debug Generation + +Application developers spend a significant time debugging the applications that +they create. Hence it is important that a compiler provide support for a good +debug experience. DWARF[1] is the standard debugging file format used by +compilers and debuggers. The LLVM infrastructure supports debug info generation +using metadata[2]. Support for generating debug metadata is present +in MLIR by way of MLIR attributes. Flang can leverage these MLIR attributes to +generate good debug information. + +We can break the work for debug generation into two separate tasks: +1) Line Table generation +2) Full debug generation +The support for Fortran Debug in LLVM infrastructure[3] has made great progress +due to many Fortran frontends adopting LLVM as the backend as well as the +availability of the Classic Flang compiler. + +## Driver Flags +By default, Flang will not generate any debug or linetable information. +Debug information will be generated if the following flags are present. + +-gline-tables-only, -g1 : Emit debug line number tables only +-g : Emit full debug info + +## Line Table Generation + +There is existing AddDebugFoundationPass which add `FusedLoc` with a +`SubprogramAttr` on FuncOp. This allows MLIR to generate LLVM IR metadata +for that function. However, following values are hardcoded at the moment. These +will instead be passed from the driver. + +- Details of the compiler (name and version and git hash). +- Language Standard. We can set it to Fortran95 for now and periodically +revise it when full support for later standards is available. +- Optimisation Level. +- Type of debug generated (linetable/full debug). +- Calling Convention: `DW_CC_normal` by default and `DW_CC_program` if it is +the main program. + +`DISubroutineTypeAttr` currently has a fixed type. This will be changed to +match the signature of the actual function/subroutine. + + +## Full Debug Generation + +Full debug info will include metadata to describe functions, variables and +types. Flang will generate debug metadata in the form of MLIR attributes. These +attributes will be converted to the format expected by LLVM IR in DebugTranslation[4]. + +Debug metadata generation can be broken down in 2 steps. + +1. MLIR attributes are generated by reading information from AST or FIR. This +step can happen anytime before or during conversion to LLVM dialect. An example +of the metadata generated in this step is `DILocalVariableAttr` or +`DIDerivedTypeAttr`. + +2. Changes that can only happen during or after conversion to LLVM dialect. The +example of this is passing `DIGlobalVariableExpressionAttr` while +creating `LLVM::GlobalOp`. Another example will be generation of `DbgDeclareOp` +that is required for local variables. It can only be created after conversion to +LLVM dialect as it requires LLVM.Ptr type. The changes required for step 2 are +quite minimal. The bulk of the work happens in step 1. + +One design decision that we need to make is to decide where to perform step 1. +Here are some possible options: + +**During conversion to LLVM dialect** + +Pros: +1. Do step 1 and 2 in one place. +2. No chance of missing any change introduced by an earlier transformation. + +Cons: +1. Passing a lot of information from the driver as discussed in the line table +section above may muddle interface of FIRToLLVMConversion. +2. `DeclareOp` is removed before this pass. +3. Even if `DeclareOp` is retained, creating debug metadata while some ops have +been converted to LLVMdialect and others are not may cause its own issues. We +have to walk the ops chain to extract the information which may be problematic +in this case. +4. Some source information is lost by this point. Examples include +information about namelists, source line information about field of derived +types etc. + +**During a pass before conversion to LLVM dialect** + +This is similar to what AddDebugFoundationPass is currently doing. + +Pros: +1. One central location dedicated to debug information processing. This can +result in a cleaner implementation. +2. Similar to above, less chance of missing any change introduced by an earlier +transformation. + +Cons: +1. Step 2 still need to happen during conversion to LLVM dialect. But +changes required for step 2 are quite minimal. +2. Similar to above, some source information may be lost by this point. + +**During Lowering from AST** + +Pros +1. We have better source information. + +Cons: +1. There may be change in the code after lowering which may not be +reflected in debug information. +2. Comments on an earlier PR [5] advised against this approach. + +## Design + +The design below assumes that we are extracting the information from FIR. +If we generate debug metadata during lowering then the description below +may need to change. Although the generated metadata remains the same in +both cases. + +The AddDebugFoundationPass will be renamed to AddDebugInfo Pass. The +information mentioned in the line info section above will be passed to it from +the driver. This pass will run quite late in the pipeline but before +`DeclareOp` is removed. + +In this pass, we will iterate through the `GlobalOp`, `TypeInfoOp`, `FuncOp` +and `DeclareOp` to extract the source information and build the MLIR +attributes. A class will be added to handle conversion of MLIR and FIR types to +`DITypeAttr`. + +Following sections provide details of how various language constructs will be +handled. In these sections, the LLVM IR metadata and MLIR attributes have been +used interchangeably. As an example, `DILocalVariableAttr` is an MLIR attribute +which gets translated to LLVM IR's `DILocalVariable`. + +### Variables + +#### Local Variables + In MLIR, local variables are represented by `DILocalVariableAttr` which + stores information like source location and type. They also require a + `DbgDeclareOp` which binds `DILocalVariableAttr` with a location. + + In FIR, `DeclareOp` has source information about the variable. The + `DeclareOp` will be processed to create `DILocalVariableAttr`. This attr is + attached to the memref op of the `DeclareOp` using a `FusedLoc` approach. + + During conversion to LLVM dialect, when an op is encountered that has a + `DILocalVariableAttr` in its `FusedLoc`, a `DbgDeclareOp` is created which + binds the attr with its location. + + The change in the IR look like as follows: + +``` + original fir + %2 = fir.alloca i32 loc(#loc4) + %3 = fir.declare %2 {uniq_name = "_QMhelperFchangeEi"} + + Fir with FusedLoc. + + %2 = fir.alloca i32 loc(#loc38) + %3 = fir.declare %2 {uniq_name = "_QMhelperFchangeEi"} + #di_local_variable5 = #llvm.di_local_variable + #loc38 = loc(fused<#di_local_variable5>[#loc4]) + + After conversion to llvm dialect + + #di_local_variable = #llvm.di_local_variable + %1 = llvm.alloca %0 x i64 + llvm.intr.dbg.declare #di_local_variable = %1 +``` + +#### Function Arguments + +Arguments work in similar way, but they present a difficulty that `DeclareOp`'s +memref points to `BlockArgument`. Unlike the op in local variable case, +the `BlockArgument` are not handled by the FIRToLLVMLowering. This can easily +be handled by adding after conversion to LLVM dialect either in FIRToLLVMLowering +or in a separate pass. + +### Module + +In debug metadata, the Fortran module will be represented by `DIModuleAttr`. +The variables or functions inside module will have scope pointing to the parent module. + +``` +module helper + real glr + ... +end module helper + +!1 = !DICompileUnit(language: DW_LANG_Fortran90 ...) +!2 = !DIModule(scope: !1, name: "helper" ...) +!3 = !DIGlobalVariable(scope: !2, name: "glr" ...) + +Use of a module results in the following metadata. +!4 = !DIImportedEntity(tag: DW_TAG_imported_module, entity: !2) +``` + +Modules are not first class entities in the FIR. So there is no way to get +the location where they are declared in source file. + +But the information that a variable or function is part of a module +can be extracted from its mangled name along with name of the module. There is +a `GlobalOp` generated for each module variable in FIR and there is also a +`DeclareOp` in each function where the module variable is used. + +We will use the `GlobalOp` to generate the `DIModuleAttr` and associated +`DIGlobalVariableAttr`. A `DeclareOp` for module variable will be used +to generate `DIImportedEntityAttr`. Care will be taken to avoid generating +duplicate `DIImportedEntityAttr` entries in same function. + +### Derived Types + +A derived type will be represented in metadata by `DICompositeType` with a tag of +`DW_TAG_structure_type`. It will have elements which point to the components. + +``` + type :: t_pair + integer :: i + real :: x + end type +!1 = !DICompositeType(tag: DW_TAG_structure_type, name: "t_pair", elements: !2 ...) +!2 = !{!3, !4} +!3 = !DIDerivedType(tag: DW_TAG_member, scope: !1, name: "i", size: 32, offset: 0, baseType: !5 ...) +!4 = !DIDerivedType(tag: DW_TAG_member, scope: !1, name: "x", size: 32, offset: 32, baseType: !6 ...) +!5 = !DIBasicType(tag: DW_TAG_base_type, name: "integer" ...) +!6 = !DIBasicType(tag: DW_TAG_base_type, name: "real" ...) +``` + +In FIR, `RecordType` and `TypeInfoOp` can be used to get information about the +location of the derived type and the types of its components. We may also use +`FusedLoc` on `TypeInfoOp` to encode location information for all the components +of the derived type. + +### CommonBlocks + +A common block will be represented in metadata by `DICommonBlockAttr` which +will be used as scope by the variable inside common block. `DIExpression` +can be used to give the offset of any given variable inside the global storage +for common block. + +``` +integer a, b +common /test/ a, b + +;@test_ = common global [8 x i8] zeroinitializer, !dbg !5, !dbg !6 +!1 = !DISubprogram() +!2 = !DICommonBlock(scope: !1, name: "test" ...) +!3 = !DIGlobalVariable(scope: !2, name: "a" ...) +!4 = !DIExpression() +!5 = !DIGlobalVariableExpression(var: !3, expr: !4) +!6 = !DIGlobalVariable(scope: !2, name: "b" ...) +!7 = !DIExpression(DW_OP_plus_uconst, 4) +!8 = !DIGlobalVariableExpression(var: !6, expr: !7) +``` + +In FIR, a common block results in a `GlobalOp` with common linkage. Every +function where the common block is used has `DeclareOp` for that variable. +This `DeclareOp` will point to global storage through +`CoordinateOp` and `AddrOfOp`. The `CoordinateOp` has the offset of the +location of this variable in global storage. There is enough information to +generate the required metadata. Although it requires walking up the chain from +`DeclaredOp` to locate `CoordinateOp` and `AddrOfOp`. + +### Arrays + +The type of fixed size array is represented using `DICompositeType`. The +`DISubrangeAttr` is used to provide bounds in any given dimensions. + +``` +integer abc(4,5) + +!1 = !DICompositeType(tag: DW_TAG_array_type, baseType: !5, elements: !2 ...) +!2 = !{ !3, !4 } +!3 = !DISubrange(lowerBound: 1, upperBound: 4 ...) +!4 = !DISubrange(lowerBound: 1, upperBound: 5 ...) +!5 = !DIBasicType(tag: DW_TAG_base_type, name: "integer" ...) + +``` + +#### Adjustable + +The debug metadata for the adjustable array looks similar to fixed sized array +with one change. The bounds are not constant values but point to a +`DILocalVariableAttr`. + +In FIR, the `DeclareOp` points to a `ShapeOp` and we can walk the chain +to get the value that represents the array bound in any dimension. We will +create a `DILocalVariableAttr` that will point to that location. This +variable will be used in the `DISubrangeAttr`. Note that this +`DILocalVariableAttr` does not correspond to any source variable. + +#### Assumed Size + +This is treated as raw array. Debug information will not provide any upper bound +information for the last dimension. + +#### Assumed Shape +The assumed shape array will use the similar representation as fixed size +array but there will be 2 differences. + +1. There will be a `datalocation` field which will be an expression. This will +enable debugger to get the data pointer from array descriptor. + +2. The field in `DISubrangeAttr` for array bounds will be expression which will +allow the debugger to get the bounds from descriptor. + +``` +integer(4), intent(out) :: a(:,:) + +!1 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, elements: !2, dataLocation: !3) +!2 = !{!5, !7} +!3 = !DIExpression(DW_OP_push_object_address, DW_OP_deref) +!4 = !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 32, DW_OP_deref) +!5 = !DISubrange(lowerBound: !1, upperBound: !4 ...) +!6 = !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 56, DW_OP_deref) +!7 = !DISubrange(lowerBound: !1, upperBound: !6, ...) +!8 = !DIBasicType(tag: DW_TAG_base_type, name: "integer" ...) +``` + +In assumed shape case, the rank can be determined from the FIR's `SequenceType`. +This allows us to generate a `DISubrangeAttr` in each dimension. + +#### Assumed Rank + +This is currently unsupported in flang. Its representation will be similar to +array representation for assumed shape array with the following difference. + +1. `DICompositeTypeAttr` will have a rank field which will be an expression. +It will be used to get the rank value from descriptor. +2. Instead of `DISubrangeType` for each dimension, there will be a single +`DIGenericSubrange` which will allow debuggers to calculate bounds in any +dimension. + +### Pointers and Allocatables +The pointer and allocatable will be represented using `DICompositeTypeAttr`. It +is quirk of DWARF that scalar allocatable or pointer variables will show up in +the debug info as pointer to scalar while array pointer or allocatable +variables show up as arrays. The behavior is same in gfortran and classic flang. + +``` + integer, allocatable :: ar(:) + integer, pointer :: sc + +!1 = !DILocalVariable(name: "sc", type: !2) +!2 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !3, associated: !9 ...) +!3 = !DIBasicType(tag: DW_TAG_base_type, name: "integer", ...) +!4 = !DILocalVariable(name: "ar", type: !5 ...) +!5 = !DICompositeType(tag: DW_TAG_array_type, baseType: !3, elements: !6, dataLocation: !8, allocated: !9) +!6 = !{!7} +!7 = !DISubrange(lowerBound: !10, upperBound: !11 ...) +!8 = !DIExpression(DW_OP_push_object_address, DW_OP_deref) +!9 = !DIExpression(DW_OP_push_object_address, DW_OP_deref, DW_OP_lit0, DW_OP_ne) +!10 = !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 24, DW_OP_deref) +!11 = !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 32, DW_OP_deref) + +``` + +IN FIR, these variable are represent as > or +fir.box>. There is also `allocatable` or `pointer` attribute on +the `DeclareOp`. This allows us to generate allocated/associated status of +these variables. The metadata to get the information from the descriptor is +similar to arrays. + +### Strings + +The `DIStringTypeAttr` can represent both fixed size and allocatable strings. For +the allocatable case, the `stringLengthExpression` and `stringLocationExpression` +are used to provide the length and the location of the string respectively. + +``` + character(len=:), allocatable :: var + character(len=20) :: fixed + +!1 = !DILocalVariable(name: "var", type: !2) +!2 = !DIStringType(name: "character(*)", stringLengthExpression: !4, stringLocationExpression: !3 ...) +!3 = !DIExpression(DW_OP_push_object_address, DW_OP_deref) +!4 = !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 8) + +!5 = !DILocalVariable(name: "fixed", type: !6) +!6 = !DIStringType(name: "character (20)", size: 160) + +``` + +### Association + +They will be treated like normal variables. Although we may require to handle +the case where the `DeclareOp` of one variable points to the `DeclareOp` of +another variable (e.g. a => b). + +### Namelists + +FIR does not seem to have a way to extract information about namelists. + +``` +namelist /abc/ x3, y3 + +(gdb) p abc +$1 = ( x3 = 100, y3 = 500 ) +(gdb) p x3 +$2 = 100 +(gdb) p y3 +$3 = 500 +``` + +Even without namelist support, we should be able to see the value of the +individual variables like `x3` and `y3` in the above example. But we would not +be able to evaluate the namelist and have the debugger prints the value of all +the variables in it as shown above for `abc`. + +## Missing metadata in MLIR + +Some metadata types that are needed for fortran are present in LLVM IR but are +absent from MLIR. A non comprehensive list is given below. + +1. `DICommonBlockAttr` +2. `DIGenericSubrangeAttr` +3. `DISubrangeAttr` in MLIR takes IntegerAttr at the moment so only works +with fixed sizes arrays. It needs to also accept `DIExpressionAttr` or +`DILocalVariableAttr` to support assumed shape and adjustable arrays. +4. The `DICompositeTypeAttr` will need to have field for `datalocation`, +`rank`, `allocated` and `associated`. +5. `DIStringTypeAttr` + +# Testing + +- LLVM LIT tests will be added to test: + - the driver and ensure that it passes the line table and full debug + info generation appropriately. + - that the pass works as expected and generates debug info. Test will be + with `fir-opt`. + - with `flang -fc1` that end-to-end debug info generation works. +- Manual external tests will be written to ensure that the following works + in debug tools + - Break at lines. + - Break at functions. + - print type (ptype) of function names. + - print values and types (ptype) of various type of variables +- Manually run `GDB`'s gdb.fortran testsuite with llvm-flang. + +# Resources +- [1] https://dwarfstd.org/doc/DWARF5.pdf +- [2] https://llvm.org/docs/LangRef.html#metadata +- [3] https://archive.fosdem.org/2022/schedule/event/llvm_fortran_debug/ +- [4] https://github.com/llvm/llvm-project/blob/main/mlir/lib/Target/LLVMIR/DebugTranslation.cpp +- [5] https://github.com/llvm/llvm-project/pull/84202 diff --git a/flang/docs/index.md b/flang/docs/index.md index 4a0b145df10b0..70478fa0936d0 100644 --- a/flang/docs/index.md +++ b/flang/docs/index.md @@ -47,6 +47,7 @@ on how to get in touch with us and to learn more about the current status. Character ComplexOperations ControlFlowGraph + DebugGeneration Directives DoConcurrent Extensions From f135d224a6d078ca3b0722db94e1d772fdbd68ad Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 11 Apr 2024 09:50:41 -0700 Subject: [PATCH 155/886] [SLP][NFC]Add a test with the zext feeding into sitofp instructions. --- .../X86/sitofp-minbitwidth-node.ll | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll new file mode 100644 index 0000000000000..00dd9b7e6fb47 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=slp-vectorizer -mtriple=x86_64 -mcpu=k8 -mattr=+sse4.1 -S < %s | FileCheck %s + +define void @foo(ptr %ptr) { +; CHECK-LABEL: define void @foo( +; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 328 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 334 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[GEP0]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP4:%.*]] = sitofp <2 x i16> [[TMP3]] to <2 x double> +; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i16>, ptr [[GEP3]], align 2 +; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i16> [[TMP5]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> [[TMP6]], [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = sitofp <2 x i32> [[TMP7]] to <2 x double> +; CHECK-NEXT: [[TMP9:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP9]], i32 1 +; CHECK-NEXT: [[FCMP:%.*]] = fcmp olt double [[TMP11]], [[TMP10]] +; CHECK-NEXT: ret void +; + %gep0 = getelementptr inbounds i8, ptr %ptr, i64 328 + %gep1 = getelementptr inbounds i8, ptr %ptr, i64 330 + + %gep3 = getelementptr inbounds i8, ptr %ptr, i64 334 + %gep4 = getelementptr inbounds i8, ptr %ptr, i64 336 + + %ld0 = load i16, ptr %gep0, align 8 + %ld1 = load i16, ptr %gep1, align 2 + + %zext0 = zext i16 %ld0 to i32 + %zext1 = zext i16 %ld1 to i32 + + %xor0 = xor i32 %zext0, 65535 + %xor1 = xor i32 %zext1, 65535 + + %sitofp0 = sitofp i32 %xor0 to double + %sitofp1 = sitofp i32 %xor1 to double + + %ld3 = load i16, ptr %gep3, align 2 + %ld4 = load i16, ptr %gep4, align 8 + + %zext3 = zext i16 %ld3 to i32 + %zext4 = zext i16 %ld4 to i32 + + %sub30 = sub nsw i32 %zext3, %zext0 + %sub41 = sub nsw i32 %zext4, %zext1 + + %sitofp30 = sitofp i32 %sub30 to double + %sitofp41 = sitofp i32 %sub41 to double + + %fdiv030 = fdiv double %sitofp0, %sitofp30 + %fdiv141 = fdiv double %sitofp1, %sitofp41 + %fcmp = fcmp olt double %fdiv141, %fdiv030 + ret void +} From 3749e0d43fb56cf22cc72274c287b7bfdda9821d Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 11 Apr 2024 09:54:41 -0700 Subject: [PATCH 156/886] [memprof] Use structured binding (NFC) (#88363) --- llvm/lib/ProfileData/MemProfReader.cpp | 20 +++++++++----------- llvm/tools/llvm-profdata/llvm-profdata.cpp | 4 ++-- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp index 4ccec26597c09..580867a9083fd 100644 --- a/llvm/lib/ProfileData/MemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -142,13 +142,13 @@ CallStackMap readStackInfo(const char *Ptr) { // any stack ids observed previously map to a different set of program counter // addresses. bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { - for (const auto &IdStack : From) { - auto I = To.find(IdStack.first); + for (const auto &[Id, Stack] : From) { + auto I = To.find(Id); if (I == To.end()) { - To[IdStack.first] = IdStack.second; + To[Id] = Stack; } else { // Check that the PCs are the same (in order). - if (IdStack.second != I->second) + if (Stack != I->second) return true; } } @@ -275,10 +275,10 @@ void RawMemProfReader::printYAML(raw_ostream &OS) { } // Print out the merged contents of the profiles. OS << " Records:\n"; - for (const auto &Entry : *this) { + for (const auto &[GUID, Record] : *this) { OS << " -\n"; - OS << " FunctionGUID: " << Entry.first << "\n"; - Entry.second.print(OS); + OS << " FunctionGUID: " << GUID << "\n"; + Record.print(OS); } } @@ -405,9 +405,7 @@ Error RawMemProfReader::mapRawProfileToRecords() { // Convert the raw profile callstack data into memprof records. While doing so // keep track of related contexts so that we can fill these in later. - for (const auto &Entry : CallstackProfileData) { - const uint64_t StackId = Entry.first; - + for (const auto &[StackId, MIB] : CallstackProfileData) { auto It = StackMap.find(StackId); if (It == StackMap.end()) return make_error( @@ -455,7 +453,7 @@ Error RawMemProfReader::mapRawProfileToRecords() { auto Result = FunctionProfileData.insert({F.Function, IndexedMemProfRecord()}); IndexedMemProfRecord &Record = Result.first->second; - Record.AllocSites.emplace_back(Callstack, CSId, Entry.second); + Record.AllocSites.emplace_back(Callstack, CSId, MIB); if (!F.IsInlineFrame) break; diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 6a70773613b7f..66a4cea035347 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -679,8 +679,8 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, } const auto &FunctionProfileData = Reader->getProfileData(); // Add the memprof records into the writer context. - for (const auto &I : FunctionProfileData) { - WC->Writer.addMemProfRecord(/*Id=*/I.first, /*Record=*/I.second); + for (const auto &[GUID, Record] : FunctionProfileData) { + WC->Writer.addMemProfRecord(GUID, Record); } return; } From db9a17a4075d2ba4cf9edfa90018da6c11908e2a Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 11 Apr 2024 09:56:01 -0700 Subject: [PATCH 157/886] [memprof] Use std::optional (NFC) (#88366) --- llvm/lib/ProfileData/InstrProfReader.cpp | 9 ++++----- llvm/unittests/ProfileData/InstrProfTest.cpp | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 884334ed070e8..a35366a106a32 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1506,13 +1507,11 @@ IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) { // Setup a callback to convert from frame ids to frame using the on-disk // FrameData hash table. - memprof::FrameId LastUnmappedFrameId = 0; - bool HasFrameMappingError = false; + std::optional LastUnmappedFrameId; auto IdToFrameCallback = [&](const memprof::FrameId Id) { auto FrIter = MemProfFrameTable->find(Id); if (FrIter == MemProfFrameTable->end()) { LastUnmappedFrameId = Id; - HasFrameMappingError = true; return memprof::Frame(0, 0, 0, false); } return *FrIter; @@ -1521,10 +1520,10 @@ IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) { memprof::MemProfRecord Record(*Iter, IdToFrameCallback); // Check that all frame ids were successfully converted to frames. - if (HasFrameMappingError) { + if (LastUnmappedFrameId) { return make_error(instrprof_error::hash_mismatch, "memprof frame not found for frame id " + - Twine(LastUnmappedFrameId)); + Twine(*LastUnmappedFrameId)); } return Record; } diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index 732f8fd792f8d..82701c47daf71 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -19,6 +19,7 @@ #include "llvm/Testing/Support/Error.h" #include "gtest/gtest.h" #include +#include using namespace llvm; using ::testing::EndsWith; @@ -433,21 +434,19 @@ TEST_F(InstrProfTest, test_memprof) { ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded()); const memprof::MemProfRecord &Record = RecordOr.get(); - memprof::FrameId LastUnmappedFrameId = 0; - bool HasFrameMappingError = false; + std::optional LastUnmappedFrameId; auto IdToFrameCallback = [&](const memprof::FrameId Id) { auto Iter = IdToFrameMap.find(Id); if (Iter == IdToFrameMap.end()) { LastUnmappedFrameId = Id; - HasFrameMappingError = true; return memprof::Frame(0, 0, 0, false); } return Iter->second; }; const memprof::MemProfRecord WantRecord(IndexedMR, IdToFrameCallback); - ASSERT_FALSE(HasFrameMappingError) - << "could not map frame id: " << LastUnmappedFrameId; + ASSERT_FALSE(LastUnmappedFrameId.has_value()) + << "could not map frame id: " << *LastUnmappedFrameId; EXPECT_THAT(WantRecord, EqualsRecord(Record)); } From 8c3cb6b55b688b767e5d65bcc2891b17322e8d05 Mon Sep 17 00:00:00 2001 From: Chelsea Cassanova Date: Thu, 11 Apr 2024 09:56:22 -0700 Subject: [PATCH 158/886] =?UTF-8?q?Reland=20"[lldb][sbdebugger]=20Move=20S?= =?UTF-8?q?BDebugger=20Broadcast=20bit=20enum=20into=20ll=E2=80=A6=20(#883?= =?UTF-8?q?31)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …db-enumerations.h" (#88324)" This reverts commit 9f6d08f2566a26144ea1753f80aebb1f2ecfdc63. This broke the build because of a usage of one of the original SBDebugger broadcast bits that wasn't updated in the original commit. --- lldb/include/lldb/API/SBDebugger.h | 7 ------- lldb/include/lldb/lldb-enumerations.h | 8 ++++++++ .../diagnostic_reporting/TestDiagnosticReporting.py | 2 +- .../progress_reporting/TestProgressReporting.py | 2 +- .../clang_modules/TestClangModuleBuildProgress.py | 2 +- lldb/test/API/macosx/rosetta/TestRosetta.py | 2 +- lldb/tools/lldb-dap/lldb-dap.cpp | 4 ++-- 7 files changed, 14 insertions(+), 13 deletions(-) diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h index 62b2f91f5076d..cf5409a12a056 100644 --- a/lldb/include/lldb/API/SBDebugger.h +++ b/lldb/include/lldb/API/SBDebugger.h @@ -42,13 +42,6 @@ class LLDB_API SBInputReader { class LLDB_API SBDebugger { public: - FLAGS_ANONYMOUS_ENUM(){ - eBroadcastBitProgress = (1 << 0), - eBroadcastBitWarning = (1 << 1), - eBroadcastBitError = (1 << 2), - eBroadcastBitProgressCategory = (1 << 3), - }; - SBDebugger(); SBDebugger(const lldb::SBDebugger &rhs); diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index 646f7bfda9847..f3b07ea6d2039 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -1339,6 +1339,14 @@ enum AddressMaskRange { eAddressMaskRangeAll = eAddressMaskRangeAny, }; +/// Used by the debugger to indicate which events are being broadcasted. +enum DebuggerBroadcastBit { + eBroadcastBitProgress = (1 << 0), + eBroadcastBitWarning = (1 << 1), + eBroadcastBitError = (1 << 2), + eBroadcastBitProgressCategory = (1 << 3), +}; + } // namespace lldb #endif // LLDB_LLDB_ENUMERATIONS_H diff --git a/lldb/test/API/functionalities/diagnostic_reporting/TestDiagnosticReporting.py b/lldb/test/API/functionalities/diagnostic_reporting/TestDiagnosticReporting.py index 36a3be695628f..6353e3e8cbedb 100644 --- a/lldb/test/API/functionalities/diagnostic_reporting/TestDiagnosticReporting.py +++ b/lldb/test/API/functionalities/diagnostic_reporting/TestDiagnosticReporting.py @@ -15,7 +15,7 @@ def setUp(self): self.broadcaster = self.dbg.GetBroadcaster() self.listener = lldbutil.start_listening_from( self.broadcaster, - lldb.SBDebugger.eBroadcastBitWarning | lldb.SBDebugger.eBroadcastBitError, + lldb.eBroadcastBitWarning | lldb.eBroadcastBitError, ) def test_dwarf_symbol_loading_diagnostic_report(self): diff --git a/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py b/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py index 9af53845ca1b7..98988d7624da3 100644 --- a/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py +++ b/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py @@ -13,7 +13,7 @@ def setUp(self): TestBase.setUp(self) self.broadcaster = self.dbg.GetBroadcaster() self.listener = lldbutil.start_listening_from( - self.broadcaster, lldb.SBDebugger.eBroadcastBitProgress + self.broadcaster, lldb.eBroadcastBitProgress ) def test_dwarf_symbol_loading_progress_report(self): diff --git a/lldb/test/API/functionalities/progress_reporting/clang_modules/TestClangModuleBuildProgress.py b/lldb/test/API/functionalities/progress_reporting/clang_modules/TestClangModuleBuildProgress.py index 228f676aedf6a..33c7c269c081e 100644 --- a/lldb/test/API/functionalities/progress_reporting/clang_modules/TestClangModuleBuildProgress.py +++ b/lldb/test/API/functionalities/progress_reporting/clang_modules/TestClangModuleBuildProgress.py @@ -34,7 +34,7 @@ def test_clang_module_build_progress_report(self): # other unrelated progress events. broadcaster = self.dbg.GetBroadcaster() listener = lldbutil.start_listening_from( - broadcaster, lldb.SBDebugger.eBroadcastBitProgress + broadcaster, lldb.eBroadcastBitProgress ) # Trigger module builds. diff --git a/lldb/test/API/macosx/rosetta/TestRosetta.py b/lldb/test/API/macosx/rosetta/TestRosetta.py index ce40de475ef16..669db95a1624c 100644 --- a/lldb/test/API/macosx/rosetta/TestRosetta.py +++ b/lldb/test/API/macosx/rosetta/TestRosetta.py @@ -49,7 +49,7 @@ def test_rosetta(self): if rosetta_debugserver_installed(): broadcaster = self.dbg.GetBroadcaster() listener = lldbutil.start_listening_from( - broadcaster, lldb.SBDebugger.eBroadcastBitWarning + broadcaster, lldb.eBroadcastBitWarning ) target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index 55f8c920e6001..25c5ad56e3d6f 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -420,8 +420,8 @@ void SendStdOutStdErr(lldb::SBProcess &process) { void ProgressEventThreadFunction() { lldb::SBListener listener("lldb-dap.progress.listener"); - g_dap.debugger.GetBroadcaster().AddListener( - listener, lldb::SBDebugger::eBroadcastBitProgress); + g_dap.debugger.GetBroadcaster().AddListener(listener, + lldb::eBroadcastBitProgress); g_dap.broadcaster.AddListener(listener, eBroadcastBitStopProgressThread); lldb::SBEvent event; bool done = false; From 2ea7ec9737e3ca4e2ce23bf606e79e7066beae0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 11 Apr 2024 10:03:29 +0200 Subject: [PATCH 159/886] [clang][Interp][NFC] Expand pointer unittests Test integral pointers as well. --- clang/unittests/AST/Interp/toAPValue.cpp | 56 +++++++++++++++++++----- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/clang/unittests/AST/Interp/toAPValue.cpp b/clang/unittests/AST/Interp/toAPValue.cpp index d0dfb40d51495..be7929228d283 100644 --- a/clang/unittests/AST/Interp/toAPValue.cpp +++ b/clang/unittests/AST/Interp/toAPValue.cpp @@ -20,7 +20,9 @@ TEST(ToAPValue, Pointers) { " A a[3];\n" "};\n" "constexpr S d = {{{true, false}, {false, true}, {false, false}}};\n" - "constexpr const bool *b = &d.a[1].z;\n"; + "constexpr const bool *b = &d.a[1].z;\n" + "const void *p = (void*)12;\n" + "const void *nullp = (void*)0;\n"; auto AST = tooling::buildASTFromCodeWithArgs( Code, {"-fexperimental-new-constant-interpreter"}); @@ -41,15 +43,49 @@ TEST(ToAPValue, Pointers) { return Prog.getPtrGlobal(*Prog.getGlobal(D)); }; - const Pointer &GP = getGlobalPtr("b"); - const Pointer &P = GP.deref(); - ASSERT_TRUE(P.isLive()); - APValue A = P.toAPValue(); - ASSERT_TRUE(A.isLValue()); - ASSERT_TRUE(A.hasLValuePath()); - const auto &Path = A.getLValuePath(); - ASSERT_EQ(Path.size(), 3u); - ASSERT_EQ(A.getLValueBase(), getDecl("d")); + { + const Pointer &GP = getGlobalPtr("b"); + const Pointer &P = GP.deref(); + ASSERT_TRUE(P.isLive()); + APValue A = P.toAPValue(); + ASSERT_TRUE(A.isLValue()); + ASSERT_TRUE(A.hasLValuePath()); + const auto &Path = A.getLValuePath(); + ASSERT_EQ(Path.size(), 3u); + ASSERT_EQ(A.getLValueBase(), getDecl("d")); + // FIXME: Also test all path elements. + } + + { + const ValueDecl *D = getDecl("p"); + ASSERT_NE(D, nullptr); + const Pointer &GP = getGlobalPtr("p"); + const Pointer &P = GP.deref(); + ASSERT_TRUE(P.isIntegralPointer()); + APValue A = P.toAPValue(); + ASSERT_TRUE(A.isLValue()); + ASSERT_TRUE(A.getLValueBase().isNull()); + APSInt I; + bool Success = A.toIntegralConstant(I, D->getType(), AST->getASTContext()); + ASSERT_TRUE(Success); + ASSERT_EQ(I, 12); + } + + { + const ValueDecl *D = getDecl("nullp"); + ASSERT_NE(D, nullptr); + const Pointer &GP = getGlobalPtr("nullp"); + const Pointer &P = GP.deref(); + ASSERT_TRUE(P.isIntegralPointer()); + APValue A = P.toAPValue(); + ASSERT_TRUE(A.isLValue()); + ASSERT_TRUE(A.getLValueBase().isNull()); + ASSERT_TRUE(A.isNullPointer()); + APSInt I; + bool Success = A.toIntegralConstant(I, D->getType(), AST->getASTContext()); + ASSERT_TRUE(Success); + ASSERT_EQ(I, 0); + } } TEST(ToAPValue, FunctionPointers) { From 64c3997939cf2d9b4fd1c24c89724d0b47afcd03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 11 Apr 2024 17:53:57 +0200 Subject: [PATCH 160/886] [clang][Interp] Allow initializing static class members We need to handle this when registering global variables. --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 34 ++++---- clang/lib/AST/Interp/Interp.cpp | 99 +++++++++++++++--------- clang/lib/AST/Interp/Program.cpp | 2 +- clang/test/AST/Interp/cxx23.cpp | 17 ++-- clang/test/AST/Interp/records.cpp | 32 +++++++- 5 files changed, 118 insertions(+), 66 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 84bacd457c85b..01ec31e4077f7 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -2778,26 +2778,34 @@ bool ByteCodeExprGen::visitVarDecl(const VarDecl *VD) { std::optional VarT = classify(VD->getType()); if (Context::shouldBeGloballyIndexed(VD)) { - // We've already seen and initialized this global. - if (P.getGlobal(VD)) - return true; - - std::optional GlobalIndex = P.createGlobal(VD, Init); - - if (!GlobalIndex) - return false; - - if (Init) { + auto initGlobal = [&](unsigned GlobalIndex) -> bool { + assert(Init); DeclScope LocalScope(this, VD); if (VarT) { if (!this->visit(Init)) return false; - return this->emitInitGlobal(*VarT, *GlobalIndex, VD); + return this->emitInitGlobal(*VarT, GlobalIndex, VD); } - return this->visitGlobalInitializer(Init, *GlobalIndex); + return this->visitGlobalInitializer(Init, GlobalIndex); + }; + + // We've already seen and initialized this global. + if (std::optional GlobalIndex = P.getGlobal(VD)) { + if (P.getPtrGlobal(*GlobalIndex).isInitialized()) + return true; + + // The previous attempt at initialization might've been unsuccessful, + // so let's try this one. + return Init && initGlobal(*GlobalIndex); } - return true; + + std::optional GlobalIndex = P.createGlobal(VD, Init); + + if (!GlobalIndex) + return false; + + return !Init || initGlobal(*GlobalIndex); } else { VariableScope LocalScope(this); if (VarT) { diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp index e5e2c932f500b..2607e07432516 100644 --- a/clang/lib/AST/Interp/Interp.cpp +++ b/clang/lib/AST/Interp/Interp.cpp @@ -56,22 +56,65 @@ static bool Jf(InterpState &S, CodePtr &PC, int32_t Offset) { return true; } +static void diagnoseMissingInitializer(InterpState &S, CodePtr OpPC, + const ValueDecl *VD) { + const SourceInfo &E = S.Current->getSource(OpPC); + S.FFDiag(E, diag::note_constexpr_var_init_unknown, 1) << VD; + S.Note(VD->getLocation(), diag::note_declared_at) << VD->getSourceRange(); +} + +static void diagnoseNonConstVariable(InterpState &S, CodePtr OpPC, + const ValueDecl *VD); +static bool diagnoseUnknownDecl(InterpState &S, CodePtr OpPC, + const ValueDecl *D) { + const SourceInfo &E = S.Current->getSource(OpPC); + + if (isa(D)) { + if (S.getLangOpts().CPlusPlus11) { + S.FFDiag(E, diag::note_constexpr_function_param_value_unknown) << D; + S.Note(D->getLocation(), diag::note_declared_at) << D->getSourceRange(); + } else { + S.FFDiag(E); + } + } else if (const auto *VD = dyn_cast(D)) { + if (!VD->getType().isConstQualified()) { + diagnoseNonConstVariable(S, OpPC, VD); + return false; + } + + // const, but no initializer. + if (!VD->getAnyInitializer()) { + diagnoseMissingInitializer(S, OpPC, VD); + return false; + } + } + return false; +} + static void diagnoseNonConstVariable(InterpState &S, CodePtr OpPC, const ValueDecl *VD) { if (!S.getLangOpts().CPlusPlus) return; const SourceInfo &Loc = S.Current->getSource(OpPC); + if (const auto *VarD = dyn_cast(VD); + VarD && VarD->getType().isConstQualified() && + !VarD->getAnyInitializer()) { + diagnoseMissingInitializer(S, OpPC, VD); + return; + } - if (VD->getType()->isIntegralOrEnumerationType()) + if (VD->getType()->isIntegralOrEnumerationType()) { S.FFDiag(Loc, diag::note_constexpr_ltor_non_const_int, 1) << VD; - else - S.FFDiag(Loc, - S.getLangOpts().CPlusPlus11 - ? diag::note_constexpr_ltor_non_constexpr - : diag::note_constexpr_ltor_non_integral, - 1) - << VD << VD->getType(); + S.Note(VD->getLocation(), diag::note_declared_at); + return; + } + + S.FFDiag(Loc, + S.getLangOpts().CPlusPlus11 ? diag::note_constexpr_ltor_non_constexpr + : diag::note_constexpr_ltor_non_integral, + 1) + << VD << VD->getType(); S.Note(VD->getLocation(), diag::note_declared_at); } @@ -202,6 +245,9 @@ bool CheckExtern(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { if (!Ptr.isExtern()) return true; + if (Ptr.isInitialized()) + return true; + if (!S.checkingPotentialConstantExpression() && S.getLangOpts().CPlusPlus) { const auto *VD = Ptr.getDeclDesc()->asValueDecl(); diagnoseNonConstVariable(S, OpPC, VD); @@ -369,9 +415,15 @@ bool CheckInitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr, if (const auto *VD = Ptr.getDeclDesc()->asVarDecl(); VD && VD->hasGlobalStorage()) { const SourceInfo &Loc = S.Current->getSource(OpPC); - S.FFDiag(Loc, diag::note_constexpr_var_init_non_constant, 1) << VD; - S.Note(VD->getLocation(), diag::note_declared_at); + if (VD->getAnyInitializer()) { + S.FFDiag(Loc, diag::note_constexpr_var_init_non_constant, 1) << VD; + S.Note(VD->getLocation(), diag::note_declared_at); + } else { + diagnoseMissingInitializer(S, OpPC, VD); + } + return false; } + if (!S.checkingPotentialConstantExpression()) { S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_access_uninit) << AK << /*uninitialized=*/true << S.Current->getRange(OpPC); @@ -598,33 +650,6 @@ bool CheckFloatResult(InterpState &S, CodePtr OpPC, const Floating &Result, return true; } -static bool diagnoseUnknownDecl(InterpState &S, CodePtr OpPC, - const ValueDecl *D) { - const SourceInfo &E = S.Current->getSource(OpPC); - - if (isa(D)) { - if (S.getLangOpts().CPlusPlus11) { - S.FFDiag(E, diag::note_constexpr_function_param_value_unknown) << D; - S.Note(D->getLocation(), diag::note_declared_at) << D->getSourceRange(); - } else { - S.FFDiag(E); - } - } else if (const auto *VD = dyn_cast(D)) { - if (!VD->getType().isConstQualified()) { - diagnoseNonConstVariable(S, OpPC, VD); - return false; - } - - // const, but no initializer. - if (!VD->getAnyInitializer()) { - S.FFDiag(E, diag::note_constexpr_var_init_unknown, 1) << VD; - S.Note(VD->getLocation(), diag::note_declared_at) << VD->getSourceRange(); - return false; - } - } - return false; -} - /// We aleady know the given DeclRefExpr is invalid for some reason, /// now figure out why and print appropriate diagnostics. bool CheckDeclRef(InterpState &S, CodePtr OpPC, const DeclRefExpr *DR) { diff --git a/clang/lib/AST/Interp/Program.cpp b/clang/lib/AST/Interp/Program.cpp index 82367164743fc..e6f22e79451e9 100644 --- a/clang/lib/AST/Interp/Program.cpp +++ b/clang/lib/AST/Interp/Program.cpp @@ -177,7 +177,7 @@ std::optional Program::createGlobal(const ValueDecl *VD, bool IsStatic, IsExtern; if (const auto *Var = dyn_cast(VD)) { IsStatic = Context::shouldBeGloballyIndexed(VD); - IsExtern = !Var->getAnyInitializer(); + IsExtern = Var->hasExternalStorage(); } else if (isa(VD)) { IsStatic = true; IsExtern = false; diff --git a/clang/test/AST/Interp/cxx23.cpp b/clang/test/AST/Interp/cxx23.cpp index 042e29613aa75..f0325eef6d87c 100644 --- a/clang/test/AST/Interp/cxx23.cpp +++ b/clang/test/AST/Interp/cxx23.cpp @@ -5,23 +5,18 @@ /// FIXME: The new interpreter is missing all the 'control flows through...' diagnostics. -constexpr int f(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ - // expected20-error {{constexpr function never produces a constant expression}} +constexpr int f(int n) { // ref20-error {{constexpr function never produces a constant expression}} static const int m = n; // ref20-note {{control flows through the definition of a static variable}} \ // ref20-warning {{is a C++23 extension}} \ - // expected20-warning {{is a C++23 extension}} \ - // expected20-note {{declared here}} \ + // expected20-warning {{is a C++23 extension}} - return m; // expected20-note {{initializer of 'm' is not a constant expression}} + return m; } -constexpr int g(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ - // expected20-error {{constexpr function never produces a constant expression}} +constexpr int g(int n) { // ref20-error {{constexpr function never produces a constant expression}} thread_local const int m = n; // ref20-note {{control flows through the definition of a thread_local variable}} \ // ref20-warning {{is a C++23 extension}} \ - // expected20-warning {{is a C++23 extension}} \ - // expected20-note {{declared here}} - return m; // expected20-note {{initializer of 'm' is not a constant expression}} - + // expected20-warning {{is a C++23 extension}} + return m; } constexpr int c_thread_local(int n) { // ref20-error {{constexpr function never produces a constant expression}} \ diff --git a/clang/test/AST/Interp/records.cpp b/clang/test/AST/Interp/records.cpp index 0f76e0cfe9927..f251497ed7018 100644 --- a/clang/test/AST/Interp/records.cpp +++ b/clang/test/AST/Interp/records.cpp @@ -1,11 +1,11 @@ -// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -verify=expected,both %s // RUN: %clang_cc1 -fexperimental-new-constant-interpreter -std=c++14 -verify=expected,both %s +// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -std=c++17 -verify=expected,both %s +// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -std=c++17 -triple i686 -verify=expected,both %s // RUN: %clang_cc1 -fexperimental-new-constant-interpreter -std=c++20 -verify=expected,both %s -// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -triple i686 -verify=expected,both %s -// RUN: %clang_cc1 -verify=ref,both %s // RUN: %clang_cc1 -verify=ref,both -std=c++14 %s +// RUN: %clang_cc1 -verify=ref,both -std=c++17 %s +// RUN: %clang_cc1 -verify=ref,both -std=c++17 -triple i686 %s // RUN: %clang_cc1 -verify=ref,both -std=c++20 %s -// RUN: %clang_cc1 -verify=ref,both -triple i686 %s /// Used to crash. struct Empty {}; @@ -1285,3 +1285,27 @@ namespace { } } #endif + +namespace pr18633 { + struct A1 { + static const int sz; + static const int sz2; + }; + const int A1::sz2 = 11; + template + void func () { + int arr[A1::sz]; + // both-warning@-1 {{variable length arrays in C++ are a Clang extension}} + // both-note@-2 {{initializer of 'sz' is unknown}} + // both-note@-9 {{declared here}} + } + template + void func2 () { + int arr[A1::sz2]; + } + const int A1::sz = 12; + void func2() { + func(); + func2(); + } +} From 5122a2c2320c7b14f6585e63b7fc43ac82a550c2 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 11 Apr 2024 10:07:24 -0700 Subject: [PATCH 161/886] [mlir][sparse] allow for direct-out passing of sparse tensor buffers (#88327) In order to support various external frameworks (JAX vs PyTorch) we need a bit more flexibility in [dis]assembling external buffers to and from sparse tensors in MLIR land. This PR adds a direct-out option that avoids the rigid pre-allocated for copy-out semantics. Note that over time, we expect the [dis]assemble operations to converge into something that supports all sorts of external frameworks. Until then, this option helps in experimenting with different options. --- .../Dialect/SparseTensor/Transforms/Passes.h | 3 +- .../Dialect/SparseTensor/Transforms/Passes.td | 9 ++ .../Transforms/SparseAssembler.cpp | 87 ++++++++++++------- .../Transforms/SparseTensorConversion.cpp | 9 +- .../Transforms/SparseTensorPasses.cpp | 3 +- .../Dialect/SparseTensor/external_direct.mlir | 52 +++++++++++ 6 files changed, 125 insertions(+), 38 deletions(-) create mode 100644 mlir/test/Dialect/SparseTensor/external_direct.mlir diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h index 61b07d222d156..d6d038ef65bdf 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h @@ -60,9 +60,10 @@ enum class SparseEmitStrategy { // The SparseAssembler pass. //===----------------------------------------------------------------------===// -void populateSparseAssembler(RewritePatternSet &patterns); +void populateSparseAssembler(RewritePatternSet &patterns, bool directOut); std::unique_ptr createSparseAssembler(); +std::unique_ptr createSparseAssembler(bool directOut); //===----------------------------------------------------------------------===// // The SparseReinterpretMap pass. diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td index 58e2d6f32386c..4706d5ba2f218 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td @@ -23,12 +23,21 @@ def SparseAssembler : Pass<"sparse-assembler", "ModuleOp"> { sparse tensors as numpy arrays from and to Python. Note that eventual bufferization decisions (e.g. who [de]allocates the underlying memory) should be resolved in agreement with the external runtime. + + By default, the pass uses the [dis]assemble operations to input and output + sparse tensors. When the direct-out option is set, however, the output + directly returns the MLIR allocated buffers to the external runtime. }]; let constructor = "mlir::createSparseAssembler()"; let dependentDialects = [ + "bufferization::BufferizationDialect", "sparse_tensor::SparseTensorDialect", "tensor::TensorDialect", ]; + let options = [ + Option<"directOut", "direct-out", "bool", + "false", "Directly returns buffers externally">, + ]; } def SparseReinterpretMap : Pass<"sparse-reinterpret-map", "ModuleOp"> { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseAssembler.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseAssembler.cpp index a91d32a23cac9..eafbe95b7aebe 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseAssembler.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseAssembler.cpp @@ -8,6 +8,7 @@ #include "Utils/CodegenUtils.h" +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensorStorageLayout.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" @@ -24,7 +25,7 @@ using namespace sparse_tensor; // Convert type range to new types range, with sparse tensors externalized. static void convTypes(TypeRange types, SmallVectorImpl &convTypes, - SmallVectorImpl *extraTypes = nullptr) { + SmallVectorImpl *extraTypes, bool directOut) { for (auto type : types) { // All "dense" data passes through unmodified. if (!getSparseTensorEncoding(type)) { @@ -32,31 +33,33 @@ static void convTypes(TypeRange types, SmallVectorImpl &convTypes, continue; } - // Convert the external representation of the position/coordinate array + // Convert the external representations of the pos/crd/val arrays. const SparseTensorType stt(cast(type)); - foreachFieldAndTypeInSparseTensor(stt, [&convTypes, extraTypes]( - Type t, FieldIndex, - SparseTensorFieldKind kind, - Level, LevelType) { - if (kind == SparseTensorFieldKind::CrdMemRef || - kind == SparseTensorFieldKind::PosMemRef || - kind == SparseTensorFieldKind::ValMemRef) { - ShapedType st = t.cast(); - auto rtp = RankedTensorType::get(st.getShape(), st.getElementType()); - convTypes.push_back(rtp); - if (extraTypes) - extraTypes->push_back(rtp); - } - return true; - }); + foreachFieldAndTypeInSparseTensor( + stt, [&convTypes, extraTypes, directOut](Type t, FieldIndex, + SparseTensorFieldKind kind, + Level, LevelType) { + if (kind == SparseTensorFieldKind::PosMemRef || + kind == SparseTensorFieldKind::CrdMemRef || + kind == SparseTensorFieldKind::ValMemRef) { + auto rtp = t.cast(); + if (!directOut) { + rtp = RankedTensorType::get(rtp.getShape(), rtp.getElementType()); + if (extraTypes) + extraTypes->push_back(rtp); + } + convTypes.push_back(rtp); + } + return true; + }); } } // Convert input and output values to [dis]assemble ops for sparse tensors. static void convVals(OpBuilder &builder, Location loc, TypeRange types, ValueRange fromVals, ValueRange extraVals, - SmallVectorImpl &toVals, unsigned extra, - bool isIn) { + SmallVectorImpl &toVals, unsigned extra, bool isIn, + bool directOut) { unsigned idx = 0; for (auto type : types) { // All "dense" data passes through unmodified. @@ -73,18 +76,29 @@ static void convVals(OpBuilder &builder, Location loc, TypeRange types, if (!isIn) inputs.push_back(fromVals[idx++]); // The sparse tensor to disassemble - // Collect the external representations of the pos/crd arrays. + // Collect the external representations of the pos/crd/val arrays. foreachFieldAndTypeInSparseTensor(stt, [&, isIn](Type t, FieldIndex, SparseTensorFieldKind kind, - Level, LevelType) { - if (kind == SparseTensorFieldKind::CrdMemRef || - kind == SparseTensorFieldKind::PosMemRef || + Level lv, LevelType) { + if (kind == SparseTensorFieldKind::PosMemRef || + kind == SparseTensorFieldKind::CrdMemRef || kind == SparseTensorFieldKind::ValMemRef) { if (isIn) { inputs.push_back(fromVals[idx++]); + } else if (directOut) { + Value mem; + if (kind == SparseTensorFieldKind::PosMemRef) + mem = builder.create(loc, inputs[0], + lv); + else if (kind == SparseTensorFieldKind::CrdMemRef) + mem = builder.create(loc, inputs[0], + lv); + else + mem = builder.create(loc, inputs[0]); + toVals.push_back(mem); } else { - ShapedType st = t.cast(); - auto rtp = RankedTensorType::get(st.getShape(), st.getElementType()); + ShapedType rtp = t.cast(); + rtp = RankedTensorType::get(rtp.getShape(), rtp.getElementType()); inputs.push_back(extraVals[extra++]); retTypes.push_back(rtp); cntTypes.push_back(builder.getIndexType()); @@ -97,7 +111,7 @@ static void convVals(OpBuilder &builder, Location loc, TypeRange types, // Assemble multiple inputs into a single sparse tensor. auto a = builder.create(loc, rtp, inputs); toVals.push_back(a.getResult()); - } else { + } else if (!directOut) { // Disassemble a single sparse input into multiple outputs. // Note that this includes the counters, which are dropped. unsigned len = retTypes.size(); @@ -144,11 +158,14 @@ namespace { // return ..., t1..tn, ... // } // -// TODO: refine output sparse tensors to work well with external framework +// (with a direct-out variant without the disassemble). // struct SparseFuncAssembler : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; + SparseFuncAssembler(MLIRContext *context, bool dO) + : OpRewritePattern(context), directOut(dO) {} + LogicalResult matchAndRewrite(func::FuncOp funcOp, PatternRewriter &rewriter) const override { // Only rewrite public entry methods. @@ -159,8 +176,8 @@ struct SparseFuncAssembler : public OpRewritePattern { SmallVector inputTypes; SmallVector outputTypes; SmallVector extraTypes; - convTypes(funcOp.getArgumentTypes(), inputTypes); - convTypes(funcOp.getResultTypes(), outputTypes, &extraTypes); + convTypes(funcOp.getArgumentTypes(), inputTypes, nullptr, false); + convTypes(funcOp.getResultTypes(), outputTypes, &extraTypes, directOut); // Only sparse inputs or outputs need a wrapper method. if (inputTypes.size() == funcOp.getArgumentTypes().size() && @@ -192,7 +209,7 @@ struct SparseFuncAssembler : public OpRewritePattern { // Convert inputs. SmallVector inputs; convVals(rewriter, loc, funcOp.getArgumentTypes(), body->getArguments(), - ValueRange(), inputs, 0, /*isIn=*/true); + ValueRange(), inputs, /*extra=*/0, /*isIn=*/true, directOut); // Call the original, now private method. A subsequent inlining pass can // determine whether cloning the method body in place is worthwhile. @@ -203,7 +220,7 @@ struct SparseFuncAssembler : public OpRewritePattern { // Convert outputs and return. SmallVector outputs; convVals(rewriter, loc, funcOp.getResultTypes(), call.getResults(), - body->getArguments(), outputs, extra, /*isIn=*/false); + body->getArguments(), outputs, extra, /*isIn=*/false, directOut); rewriter.create(loc, outputs); // Finally, migrate a potential c-interface property. @@ -215,6 +232,9 @@ struct SparseFuncAssembler : public OpRewritePattern { } return success(); } + +private: + const bool directOut; }; } // namespace @@ -223,6 +243,7 @@ struct SparseFuncAssembler : public OpRewritePattern { // Public method for populating conversion rules. //===----------------------------------------------------------------------===// -void mlir::populateSparseAssembler(RewritePatternSet &patterns) { - patterns.add(patterns.getContext()); +void mlir::populateSparseAssembler(RewritePatternSet &patterns, + bool directOut) { + patterns.add(patterns.getContext(), directOut); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index c52fa3751e6b4..f0d162bdb84d9 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -767,6 +767,12 @@ class SparseTensorAssembleConverter : public OpConversionPattern { }; /// Sparse conversion rule for the sparse_tensor.disassemble operator. +/// Note that the current implementation simply exposes the buffers to +/// the external client. This assumes the client only reads the buffers +/// (usually copying it to the external data structures, such as numpy +/// arrays). The semantics of the disassemble operation technically +/// require that the copying is done here already using the out-levels +/// and out-values clause. class SparseTensorDisassembleConverter : public OpConversionPattern { public: @@ -774,9 +780,6 @@ class SparseTensorDisassembleConverter LogicalResult matchAndRewrite(DisassembleOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - // We simply expose the buffers to the external client. This - // assumes the client only reads the buffers (usually copying it - // to the external data structures, such as numpy arrays). Location loc = op->getLoc(); auto stt = getSparseTensorType(op.getTensor()); SmallVector retVal; diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp index acea25f023980..b42d58634a36c 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp @@ -50,11 +50,12 @@ namespace { struct SparseAssembler : public impl::SparseAssemblerBase { SparseAssembler() = default; SparseAssembler(const SparseAssembler &pass) = default; + SparseAssembler(bool dO) { directOut = dO; } void runOnOperation() override { auto *ctx = &getContext(); RewritePatternSet patterns(ctx); - populateSparseAssembler(patterns); + populateSparseAssembler(patterns, directOut); (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); } }; diff --git a/mlir/test/Dialect/SparseTensor/external_direct.mlir b/mlir/test/Dialect/SparseTensor/external_direct.mlir new file mode 100644 index 0000000000000..78c4a295686b3 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/external_direct.mlir @@ -0,0 +1,52 @@ +// RUN: mlir-opt %s --sparse-assembler="direct-out=True" -split-input-file | FileCheck %s + +// ----- + +// CHECK-LABEL: func.func @sparse_in( +// CHECK-SAME: %[[B:.*0]]: tensor, +// CHECK-SAME: %[[C:.*1]]: tensor, +// CHECK-SAME: %[[A:.*]]: tensor) -> tensor<64x64xf32> { +// CHECK: %[[I:.*]] = sparse_tensor.assemble (%[[B]], %[[C]]), %[[A]] +// CHECK: %[[F:.*]] = call @_internal_sparse_in(%[[I]]) +// CHECK: return %[[F]] : tensor<64x64xf32> +// CHECK: } +// CHECK: func.func private @_internal_sparse_in +#sparse = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }> +func.func @sparse_in(%arg0: tensor<64x64xf32, #sparse>) -> tensor<64x64xf32> { + %0 = sparse_tensor.convert %arg0 : tensor<64x64xf32, #sparse> to tensor<64x64xf32> + return %0 : tensor<64x64xf32> +} + +// ----- + +// CHECK-LABEL: func.func @sparse_out( +// CHECK-SAME: %[[X:.*0]]: tensor<64x64xf32>) +// CHECK: %[[F:.*]] = call @_internal_sparse_out(%[[X]]) +// CHECK: %[[P:.*]] = sparse_tensor.positions %[[F]] +// CHECK: %[[C:.*]] = sparse_tensor.coordinates %[[F]] +// CHECK: %[[V:.*]] = sparse_tensor.values %[[F]] +// CHECK: return %[[P]], %[[C]], %[[V]] +// CHECK: } +// CHECK: func.func private @_internal_sparse_out +#sparse = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }> +func.func @sparse_out(%arg0: tensor<64x64xf32>) -> tensor<64x64xf32, #sparse> { + %0 = sparse_tensor.convert %arg0 : tensor<64x64xf32> to tensor<64x64xf32, #sparse> + return %0 : tensor<64x64xf32, #sparse> +} + +// ----- + +// CHECK-LABEL: func.func @sparse_out2( +// CHECK-SAME: %[[X:.*0]]: tensor<64x64xf32>) +// CHECK: %[[F:.*]]:2 = call @_internal_sparse_out2(%[[X]]) +// CHECK: %[[P:.*]] = sparse_tensor.positions %[[F]]#1 +// CHECK: %[[C:.*]] = sparse_tensor.coordinates %[[F]]#1 +// CHECK: %[[V:.*]] = sparse_tensor.values %[[F]]#1 +// CHECK: return %[[F]]#0, %[[P]], %[[C]], %[[V]] +// CHECK: } +// CHECK: func.func private @_internal_sparse_out2 +#sparse = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }> +func.func @sparse_out2(%arg0: tensor<64x64xf32>) -> (tensor<64x64xf32>, tensor<64x64xf32, #sparse>) { + %0 = sparse_tensor.convert %arg0 : tensor<64x64xf32> to tensor<64x64xf32, #sparse> + return %arg0, %0 : tensor<64x64xf32>, tensor<64x64xf32, #sparse> +} From f626a35086d90f25986e3f06e01a54cca91250d8 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 11 Apr 2024 10:11:58 -0700 Subject: [PATCH 162/886] [libc] Codify header inclusion policy (#87017) When supporting "overlay" vs "fullbuild" modes, "what ABI are you using?" becomes a fundamental question to have concrete answers for. Overlay mode MUST match the ABI of the system being overlayed onto; fullbuild more flexible (the only system ABI relevant is the OS kernel). When implementing llvm-libc we generally prefer the include-what-you use style of avoiding transitive dependencies (since that makes refactoring headers more painful, and slows down build times). So what header do you include for any given type or function declaration? For any given userspace program, the answer is straightforward. But for llvm-libc which is trying to support multiple ABIs (at least one per configuration), the answer is perhaps less clear. This proposal seeks to add one layer of indirection relative to what's being done today. It then converts users of sigset_t and struct epoll_event and the epoll implemenations over to this convention as an example. --- libc/docs/dev/code_style.rst | 33 +++++++++++++++++++ libc/docs/usage_modes.rst | 6 +++- libc/hdr/CMakeLists.txt | 2 ++ libc/hdr/types/CMakeLists.txt | 23 +++++++++++++ libc/hdr/types/sigset_t.h | 21 ++++++++++++ libc/hdr/types/struct_epoll_event.h | 21 ++++++++++++ libc/hdr/types/struct_timespec.h | 21 ++++++++++++ libc/src/signal/linux/CMakeLists.txt | 16 +++++---- libc/src/signal/linux/raise.cpp | 5 +-- libc/src/signal/linux/sigaction.cpp | 7 ++-- libc/src/signal/linux/sigaddset.cpp | 4 +-- libc/src/signal/linux/sigdelset.cpp | 4 +-- libc/src/signal/linux/sigfillset.cpp | 4 +-- libc/src/signal/linux/signal_utils.h | 3 +- libc/src/signal/linux/sigprocmask.cpp | 6 ++-- libc/src/signal/sigaddset.h | 2 +- libc/src/signal/sigdelset.h | 2 +- libc/src/signal/sigemptyset.h | 2 +- libc/src/signal/sigfillset.h | 2 +- libc/src/signal/sigprocmask.h | 2 +- libc/src/sys/epoll/epoll_pwait.h | 7 ++-- libc/src/sys/epoll/epoll_pwait2.h | 9 ++--- libc/src/sys/epoll/epoll_wait.h | 5 +-- libc/src/sys/epoll/linux/CMakeLists.txt | 14 +++++--- libc/src/sys/epoll/linux/epoll_pwait.cpp | 10 ++---- libc/src/sys/epoll/linux/epoll_pwait2.cpp | 12 +++---- libc/src/sys/epoll/linux/epoll_wait.cpp | 9 ++--- libc/src/sys/select/linux/select.cpp | 8 ++--- .../llvm-project-overlay/libc/BUILD.bazel | 13 ++++++++ 29 files changed, 200 insertions(+), 73 deletions(-) create mode 100644 libc/hdr/types/CMakeLists.txt create mode 100644 libc/hdr/types/sigset_t.h create mode 100644 libc/hdr/types/struct_epoll_event.h create mode 100644 libc/hdr/types/struct_timespec.h diff --git a/libc/docs/dev/code_style.rst b/libc/docs/dev/code_style.rst index 22a18b7a4cc1d..ee4e4257c9fa8 100644 --- a/libc/docs/dev/code_style.rst +++ b/libc/docs/dev/code_style.rst @@ -186,3 +186,36 @@ We expect contributions to be free of warnings from the `minimum supported compiler versions`__ (and newer). .. __: https://libc.llvm.org/compiler_support.html#minimum-supported-versions + +Header Inclusion Policy +======================= + +Because llvm-libc supports +`Overlay Mode `__ and +`Fullbuild Mode `__ care must be +taken when ``#include``'ing certain headers. + +The ``include/`` directory contains public facing headers that users must +consume for fullbuild mode. As such, types defined here will have ABI +implications as these definitions may differ from the underlying system for +overlay mode and are NEVER appropriate to include in ``libc/src/`` without +preprocessor guards for ``LLVM_LIBC_FULL_BUILD``. + +Consider the case where an implementation in ``libc/src/`` may wish to refer to +a ``sigset_t``, what header should be included? ````, ````, +````? + +None of the above. Instead, code under ``src/`` should ``#include +"hdr/types/sigset_t.h"`` which contains preprocessor guards on +``LLVM_LIBC_FULL_BUILD`` to either include the public type (fullbuild mode) or +the underlying system header (overlay mode). + +Implementations in ``libc/src/`` should NOT be ``#include``'ing using ``<>`` or +``"include/*``, except for these "proxy" headers that first check for +``LLVM_LIBC_FULL_BUILD``. + +These "proxy" headers are similarly used when referring to preprocessor +defines. Code under ``libc/src/`` should ``#include`` a proxy header from +``hdr/``, which contains a guard on ``LLVM_LIBC_FULL_BUILD`` to either include +our header from ``libc/include/`` (fullbuild) or the corresponding underlying +system header (overlay). diff --git a/libc/docs/usage_modes.rst b/libc/docs/usage_modes.rst index 11c10623b61db..8e5dcca6e0a75 100644 --- a/libc/docs/usage_modes.rst +++ b/libc/docs/usage_modes.rst @@ -6,6 +6,10 @@ The libc can used in two different modes: #. The **overlay** mode: In this mode, the link order semantics are exploited to overlay implementations from LLVM's libc over the system libc. See - :ref:`overlay_mode` for more information about this mode. + :ref:`overlay_mode` for more information about this mode. In this mode, libc + uses the ABI of the system it's being overlayed onto. Headers are NOT + generated. libllvmlibc.a is the only build artifact. #. The **fullbuild** mode: In this mode, LLVM's libc is used as the only libc for the binary. See :ref:`fullbuild_mode` for information about this mode. + In this mode, libc uses its own ABI. Headers are generated along with a + libc.a. diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt index 5a1acd9d17ab4..38ef56e3f04c0 100644 --- a/libc/hdr/CMakeLists.txt +++ b/libc/hdr/CMakeLists.txt @@ -40,3 +40,5 @@ add_proxy_header_library( libc.include.llvm-libc-macros.fenv_macros libc.include.fenv ) + +add_subdirectory(types) diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt new file mode 100644 index 0000000000000..b685d82fd8cc8 --- /dev/null +++ b/libc/hdr/types/CMakeLists.txt @@ -0,0 +1,23 @@ +add_proxy_header_library( + sigset_t + HDRS + sigset_t.h + FULL_BUILD_DEPENDS + libc.include.llvm-libc-types.sigset_t +) + +add_proxy_header_library( + struct_epoll_event + HDRS + struct_epoll_event.h + FULL_BUILD_DEPENDS + libc.include.llvm-libc-types.struct_epoll_event +) + +add_proxy_header_library( + struct_timespec + HDRS + struct_timespec.h + FULL_BUILD_DEPENDS + libc.include.llvm-libc-types.struct_timespec +) diff --git a/libc/hdr/types/sigset_t.h b/libc/hdr/types/sigset_t.h new file mode 100644 index 0000000000000..695ec3029f686 --- /dev/null +++ b/libc/hdr/types/sigset_t.h @@ -0,0 +1,21 @@ +//===-- Proxy for sigset_t ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_HDR_TYPES_SIGSET_T_H +#define LLVM_LIBC_HDR_TYPES_SIGSET_T_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-types/sigset_t.h" + +#else + +#include + +#endif // LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_TYPES_SIGSET_T_H diff --git a/libc/hdr/types/struct_epoll_event.h b/libc/hdr/types/struct_epoll_event.h new file mode 100644 index 0000000000000..5bb98ce05bb28 --- /dev/null +++ b/libc/hdr/types/struct_epoll_event.h @@ -0,0 +1,21 @@ +//===-- Proxy for struct epoll_event --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_HDR_TYPES_STRUCT_EPOLL_EVENT_H +#define LLVM_LIBC_HDR_TYPES_STRUCT_EPOLL_EVENT_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-types/struct_epoll_event.h" + +#else + +#include + +#endif // LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_TYPES_STRUCT_EPOLL_EVENT_H diff --git a/libc/hdr/types/struct_timespec.h b/libc/hdr/types/struct_timespec.h new file mode 100644 index 0000000000000..1f121f3d24d82 --- /dev/null +++ b/libc/hdr/types/struct_timespec.h @@ -0,0 +1,21 @@ +//===-- Proxy for struct timespec ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_HDR_TYPES_STRUCT_TIMESPEC_H +#define LLVM_LIBC_HDR_TYPES_STRUCT_TIMESPEC_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-types/struct_timespec.h" + +#else + +#include + +#endif // LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_TYPES_STRUCT_TIMESPEC_H diff --git a/libc/src/signal/linux/CMakeLists.txt b/libc/src/signal/linux/CMakeLists.txt index 77a2453b25a0a..7606b4b21d3dd 100644 --- a/libc/src/signal/linux/CMakeLists.txt +++ b/libc/src/signal/linux/CMakeLists.txt @@ -3,6 +3,8 @@ add_header_library( HDRS signal_utils.h DEPENDS + libc.hdr.types.sigset_t + libc.include.signal libc.include.sys_syscall libc.src.__support.OSUtil.osutil ) @@ -28,7 +30,7 @@ add_entrypoint_object( ../raise.h DEPENDS .signal_utils - libc.include.signal + libc.hdr.types.sigset_t libc.include.sys_syscall libc.src.__support.OSUtil.osutil ) @@ -57,7 +59,7 @@ add_entrypoint_object( ../sigaction.h DEPENDS .__restore - libc.include.signal + libc.hdr.types.sigset_t libc.include.sys_syscall libc.src.__support.OSUtil.osutil libc.src.errno.errno @@ -84,7 +86,7 @@ add_entrypoint_object( ../sigprocmask.h DEPENDS .signal_utils - libc.include.signal + libc.hdr.types.sigset_t libc.include.sys_syscall libc.src.__support.OSUtil.osutil libc.src.errno.errno @@ -98,7 +100,7 @@ add_entrypoint_object( ../sigemptyset.h DEPENDS .signal_utils - libc.include.signal + libc.hdr.types.sigset_t libc.src.errno.errno ) @@ -110,7 +112,7 @@ add_entrypoint_object( ../sigaddset.h DEPENDS .signal_utils - libc.include.signal + libc.hdr.types.sigset_t libc.src.errno.errno ) @@ -133,7 +135,7 @@ add_entrypoint_object( ../sigfillset.h DEPENDS .signal_utils - libc.include.signal + libc.hdr.types.sigset_t libc.src.errno.errno ) @@ -145,6 +147,6 @@ add_entrypoint_object( ../sigdelset.h DEPENDS .signal_utils - libc.include.signal + libc.hdr.types.sigset_t libc.src.errno.errno ) diff --git a/libc/src/signal/linux/raise.cpp b/libc/src/signal/linux/raise.cpp index dd6f5eb4b3575..2250df5478444 100644 --- a/libc/src/signal/linux/raise.cpp +++ b/libc/src/signal/linux/raise.cpp @@ -7,14 +7,15 @@ //===----------------------------------------------------------------------===// #include "src/signal/raise.h" -#include "src/signal/linux/signal_utils.h" +#include "hdr/types/sigset_t.h" #include "src/__support/common.h" +#include "src/signal/linux/signal_utils.h" namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(int, raise, (int sig)) { - ::sigset_t sigset; + sigset_t sigset; block_all_signals(sigset); long pid = LIBC_NAMESPACE::syscall_impl(SYS_getpid); long tid = LIBC_NAMESPACE::syscall_impl(SYS_gettid); diff --git a/libc/src/signal/linux/sigaction.cpp b/libc/src/signal/linux/sigaction.cpp index 7ddc2dc5cbcc7..7b220e5c37f6f 100644 --- a/libc/src/signal/linux/sigaction.cpp +++ b/libc/src/signal/linux/sigaction.cpp @@ -7,12 +7,11 @@ //===----------------------------------------------------------------------===// #include "src/signal/sigaction.h" -#include "src/errno/libc_errno.h" -#include "src/signal/linux/signal_utils.h" +#include "hdr/types/sigset_t.h" #include "src/__support/common.h" - -#include +#include "src/errno/libc_errno.h" +#include "src/signal/linux/signal_utils.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/signal/linux/sigaddset.cpp b/libc/src/signal/linux/sigaddset.cpp index 536391734e058..8fc5d43180e28 100644 --- a/libc/src/signal/linux/sigaddset.cpp +++ b/libc/src/signal/linux/sigaddset.cpp @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// #include "src/signal/sigaddset.h" + +#include "hdr/types/sigset_t.h" #include "src/__support/common.h" #include "src/errno/libc_errno.h" #include "src/signal/linux/signal_utils.h" -#include - namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(int, sigaddset, (sigset_t * set, int signum)) { diff --git a/libc/src/signal/linux/sigdelset.cpp b/libc/src/signal/linux/sigdelset.cpp index 5cb645e461cf8..997f4574c05d0 100644 --- a/libc/src/signal/linux/sigdelset.cpp +++ b/libc/src/signal/linux/sigdelset.cpp @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// #include "src/signal/sigdelset.h" + +#include "hdr/types/sigset_t.h" #include "src/__support/common.h" #include "src/errno/libc_errno.h" #include "src/signal/linux/signal_utils.h" -#include - namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(int, sigdelset, (sigset_t * set, int signum)) { diff --git a/libc/src/signal/linux/sigfillset.cpp b/libc/src/signal/linux/sigfillset.cpp index e17c85a897ce7..d98bbf7f619cc 100644 --- a/libc/src/signal/linux/sigfillset.cpp +++ b/libc/src/signal/linux/sigfillset.cpp @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// #include "src/signal/sigfillset.h" + +#include "hdr/types/sigset_t.h" #include "src/__support/common.h" #include "src/errno/libc_errno.h" #include "src/signal/linux/signal_utils.h" -#include - namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(int, sigfillset, (sigset_t * set)) { diff --git a/libc/src/signal/linux/signal_utils.h b/libc/src/signal/linux/signal_utils.h index 5e9dd9a5c53ab..3fd0cc0b7b459 100644 --- a/libc/src/signal/linux/signal_utils.h +++ b/libc/src/signal/linux/signal_utils.h @@ -9,10 +9,11 @@ #ifndef LLVM_LIBC_SRC_SIGNAL_LINUX_SIGNAL_UTILS_H #define LLVM_LIBC_SRC_SIGNAL_LINUX_SIGNAL_UTILS_H +#include "hdr/types/sigset_t.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" -#include +#include // sigaction #include #include // For syscall numbers. diff --git a/libc/src/signal/linux/sigprocmask.cpp b/libc/src/signal/linux/sigprocmask.cpp index 79a35dd59d75c..0e94efb6400c0 100644 --- a/libc/src/signal/linux/sigprocmask.cpp +++ b/libc/src/signal/linux/sigprocmask.cpp @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// #include "src/signal/sigprocmask.h" + +#include "hdr/types/sigset_t.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. +#include "src/__support/common.h" #include "src/errno/libc_errno.h" #include "src/signal/linux/signal_utils.h" -#include "src/__support/common.h" - -#include #include // For syscall numbers. namespace LIBC_NAMESPACE { diff --git a/libc/src/signal/sigaddset.h b/libc/src/signal/sigaddset.h index 626eb20a295c8..c703b46bc6059 100644 --- a/libc/src/signal/sigaddset.h +++ b/libc/src/signal/sigaddset.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_SIGNAL_SIGADDSET_H #define LLVM_LIBC_SRC_SIGNAL_SIGADDSET_H -#include +#include "hdr/types/sigset_t.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/signal/sigdelset.h b/libc/src/signal/sigdelset.h index c4fdb9975fa3d..7bdb6e6d18fdd 100644 --- a/libc/src/signal/sigdelset.h +++ b/libc/src/signal/sigdelset.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_SIGNAL_SIGDELSET_H #define LLVM_LIBC_SRC_SIGNAL_SIGDELSET_H -#include +#include "hdr/types/sigset_t.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/signal/sigemptyset.h b/libc/src/signal/sigemptyset.h index f3763d1f4f3d4..661fd33b888e0 100644 --- a/libc/src/signal/sigemptyset.h +++ b/libc/src/signal/sigemptyset.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_SIGNAL_SIGEMPTYSET_H #define LLVM_LIBC_SRC_SIGNAL_SIGEMPTYSET_H -#include +#include "hdr/types/sigset_t.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/signal/sigfillset.h b/libc/src/signal/sigfillset.h index d8e3168871ea8..2849aacf953b1 100644 --- a/libc/src/signal/sigfillset.h +++ b/libc/src/signal/sigfillset.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_SIGNAL_SIGFILLSET_H #define LLVM_LIBC_SRC_SIGNAL_SIGFILLSET_H -#include +#include "hdr/types/sigset_t.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/signal/sigprocmask.h b/libc/src/signal/sigprocmask.h index e0658860579e4..8569578eb68ca 100644 --- a/libc/src/signal/sigprocmask.h +++ b/libc/src/signal/sigprocmask.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_SIGNAL_SIGPROCMASK_H #define LLVM_LIBC_SRC_SIGNAL_SIGPROCMASK_H -#include +#include "hdr/types/sigset_t.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/sys/epoll/epoll_pwait.h b/libc/src/sys/epoll/epoll_pwait.h index 9dcb55533009f..801aa97610001 100644 --- a/libc/src/sys/epoll/epoll_pwait.h +++ b/libc/src/sys/epoll/epoll_pwait.h @@ -9,11 +9,8 @@ #ifndef LLVM_LIBC_SRC_SYS_EPOLL_EPOLL_PWAIT_H #define LLVM_LIBC_SRC_SYS_EPOLL_EPOLL_PWAIT_H -// TODO: Use this include once the include headers are also using quotes. -// #include "include/llvm-libc-types/sigset_t.h" -// #include "include/llvm-libc-types/struct_epoll_event.h" - -#include +#include "hdr/types/sigset_t.h" +#include "hdr/types/struct_epoll_event.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/sys/epoll/epoll_pwait2.h b/libc/src/sys/epoll/epoll_pwait2.h index 622ede6a0f9f9..7fc528b2fd25d 100644 --- a/libc/src/sys/epoll/epoll_pwait2.h +++ b/libc/src/sys/epoll/epoll_pwait2.h @@ -9,12 +9,9 @@ #ifndef LLVM_LIBC_SRC_SYS_EPOLL_EPOLL_PWAIT2_H #define LLVM_LIBC_SRC_SYS_EPOLL_EPOLL_PWAIT2_H -// TODO: Use this include once the include headers are also using quotes. -// #include "include/llvm-libc-types/sigset_t.h" -// #include "include/llvm-libc-types/struct_epoll_event.h" -// #include "include/llvm-libc-types/struct_timespec.h" - -#include +#include "hdr/types/sigset_t.h" +#include "hdr/types/struct_epoll_event.h" +#include "hdr/types/struct_timespec.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/sys/epoll/epoll_wait.h b/libc/src/sys/epoll/epoll_wait.h index d51c9100846ce..b546e91e4c2ee 100644 --- a/libc/src/sys/epoll/epoll_wait.h +++ b/libc/src/sys/epoll/epoll_wait.h @@ -9,10 +9,7 @@ #ifndef LLVM_LIBC_SRC_SYS_EPOLL_EPOLL_WAIT_H #define LLVM_LIBC_SRC_SYS_EPOLL_EPOLL_WAIT_H -// TODO: Use this include once the include headers are also using quotes. -// #include "include/llvm-libc-types/struct_epoll_event.h" - -#include +#include "hdr/types/struct_epoll_event.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/sys/epoll/linux/CMakeLists.txt b/libc/src/sys/epoll/linux/CMakeLists.txt index a27905d962dc5..586aac7055dc1 100644 --- a/libc/src/sys/epoll/linux/CMakeLists.txt +++ b/libc/src/sys/epoll/linux/CMakeLists.txt @@ -5,7 +5,9 @@ add_entrypoint_object( HDRS ../epoll_wait.h DEPENDS - libc.include.sys_epoll + libc.hdr.types.sigset_t + libc.hdr.types.struct_epoll_event + libc.hdr.types.struct_timespec libc.include.sys_syscall libc.src.__support.OSUtil.osutil libc.src.errno.errno @@ -18,7 +20,9 @@ add_entrypoint_object( HDRS ../epoll_pwait.h DEPENDS - libc.include.sys_epoll + libc.hdr.types.sigset_t + libc.hdr.types.struct_epoll_event + libc.hdr.types.struct_timespec libc.include.signal libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -32,10 +36,12 @@ add_entrypoint_object( HDRS ../epoll_pwait2.h DEPENDS - libc.include.sys_epoll + libc.hdr.types.sigset_t + libc.hdr.types.struct_epoll_event + libc.hdr.types.struct_timespec libc.include.signal - libc.include.time libc.include.sys_syscall + libc.include.time libc.src.__support.OSUtil.osutil libc.src.errno.errno ) diff --git a/libc/src/sys/epoll/linux/epoll_pwait.cpp b/libc/src/sys/epoll/linux/epoll_pwait.cpp index ee1b4e66e9844..ac012944a9577 100644 --- a/libc/src/sys/epoll/linux/epoll_pwait.cpp +++ b/libc/src/sys/epoll/linux/epoll_pwait.cpp @@ -8,17 +8,13 @@ #include "src/sys/epoll/epoll_pwait.h" +#include "hdr/types/sigset_t.h" +#include "hdr/types/struct_epoll_event.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" - #include "src/errno/libc_errno.h" -#include // For syscall numbers. -// TODO: Use this include once the include headers are also using quotes. -// #include "include/llvm-libc-types/sigset_t.h" -// #include "include/llvm-libc-types/struct_epoll_event.h" - -#include +#include // For syscall numbers. namespace LIBC_NAMESPACE { diff --git a/libc/src/sys/epoll/linux/epoll_pwait2.cpp b/libc/src/sys/epoll/linux/epoll_pwait2.cpp index 671dede2a1058..3c42e38deb22b 100644 --- a/libc/src/sys/epoll/linux/epoll_pwait2.cpp +++ b/libc/src/sys/epoll/linux/epoll_pwait2.cpp @@ -8,18 +8,14 @@ #include "src/sys/epoll/epoll_pwait2.h" +#include "hdr/types/sigset_t.h" +#include "hdr/types/struct_epoll_event.h" +#include "hdr/types/struct_timespec.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" - #include "src/errno/libc_errno.h" -#include // For syscall numbers. -// TODO: Use this include once the include headers are also using quotes. -// #include "include/llvm-libc-types/sigset_t.h" -// #include "include/llvm-libc-types/struct_epoll_event.h" -// #include "include/llvm-libc-types/struct_timespec.h" - -#include +#include // For syscall numbers. namespace LIBC_NAMESPACE { diff --git a/libc/src/sys/epoll/linux/epoll_wait.cpp b/libc/src/sys/epoll/linux/epoll_wait.cpp index 0c43edf764545..18dd6e2b83543 100644 --- a/libc/src/sys/epoll/linux/epoll_wait.cpp +++ b/libc/src/sys/epoll/linux/epoll_wait.cpp @@ -8,16 +8,13 @@ #include "src/sys/epoll/epoll_wait.h" +#include "hdr/types/sigset_t.h" +#include "hdr/types/struct_epoll_event.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" #include "src/errno/libc_errno.h" -#include // For syscall numbers. - -// TODO: Use this include once the include headers are also using quotes. -// #include "include/llvm-libc-types/sigset_t.h" -// #include "include/llvm-libc-types/struct_epoll_event.h" -#include +#include // For syscall numbers. namespace LIBC_NAMESPACE { diff --git a/libc/src/sys/select/linux/select.cpp b/libc/src/sys/select/linux/select.cpp index 3f387c14ec560..9034b75e5c29e 100644 --- a/libc/src/sys/select/linux/select.cpp +++ b/libc/src/sys/select/linux/select.cpp @@ -8,14 +8,14 @@ #include "src/sys/select/select.h" +#include "hdr/types/sigset_t.h" +#include "hdr/types/struct_timespec.h" #include "src/__support/CPP/limits.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" - #include "src/errno/libc_errno.h" -#include -#include // For size_t -#include + +#include // For size_t #include // For syscall numbers. namespace LIBC_NAMESPACE { diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index d38dc3029f74f..c61904a967117 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -3417,6 +3417,15 @@ libc_function( ############################## sys/epoll targets ############################### +libc_support_library( + name = "types_sigset_t", + hdrs = ["hdr/types/sigset_t.h"], +) +libc_support_library( + name = "types_struct_epoll_event", + hdrs = ["hdr/types/struct_epoll_event.h"], +) + libc_function( name = "epoll_wait", srcs = ["src/sys/epoll/linux/epoll_wait.cpp"], @@ -3429,6 +3438,8 @@ libc_function( deps = [ ":__support_osutil_syscall", ":errno", + ":types_sigset_t", + ":types_struct_epoll_event", ], ) @@ -3444,6 +3455,8 @@ libc_function( deps = [ ":__support_osutil_syscall", ":errno", + ":types_sigset_t", + ":types_struct_epoll_event", ], ) From 4e6d18f40642c2cc8e124bbe55810b2d9b2ac9c0 Mon Sep 17 00:00:00 2001 From: Krystian Stasiowski Date: Thu, 11 Apr 2024 13:20:05 -0400 Subject: [PATCH 163/886] [Clang][AST] Track whether template template parameters used the 'typename' keyword (#88139) This patch adds a `Typename` bit-field to `TemplateTemplateParmDecl` which stores whether the template template parameter was declared with the `typename` keyword. --- clang/docs/ReleaseNotes.rst | 4 ++ clang/include/clang/AST/DeclTemplate.h | 49 +++++++++++++------ clang/include/clang/Sema/Sema.h | 2 +- clang/lib/AST/ASTContext.cpp | 2 +- clang/lib/AST/ASTImporter.cpp | 3 +- clang/lib/AST/DeclPrinter.cpp | 5 +- clang/lib/AST/DeclTemplate.cpp | 18 +++---- clang/lib/Parse/ParseTemplate.cpp | 9 ++-- clang/lib/Sema/SemaTemplate.cpp | 24 ++++----- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 6 ++- clang/lib/Serialization/ASTReaderDecl.cpp | 1 + clang/lib/Serialization/ASTWriterDecl.cpp | 1 + clang/unittests/AST/DeclPrinterTest.cpp | 3 +- 13 files changed, 75 insertions(+), 52 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 93318871fa9f6..93a380411604b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -360,6 +360,9 @@ Improvements to Clang's diagnostics Added the ``-Wtentative-definition-array`` warning group to cover this. Fixes #GH87766 +- Clang now uses the correct type-parameter-key (``class`` or ``typename``) when printing + template template parameter declarations. + Improvements to Clang's time-trace ---------------------------------- @@ -535,6 +538,7 @@ Bug Fixes to C++ Support Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ - Clang now properly preserves ``FoundDecls`` within a ``ConceptReference``. (#GH82628) +- The presence of the ``typename`` keyword is now stored in ``TemplateTemplateParmDecl``. Miscellaneous Bug Fixes ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index cb598cb81840d..f24e71ff22964 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -1581,26 +1581,36 @@ class TemplateTemplateParmDecl final DefaultArgStorage; DefArgStorage DefaultArgument; + /// Whether this template template parameter was declaration with + /// the 'typename' keyword. + /// + /// If false, it was declared with the 'class' keyword. + LLVM_PREFERRED_TYPE(bool) + unsigned Typename : 1; + /// Whether this parameter is a parameter pack. - bool ParameterPack; + LLVM_PREFERRED_TYPE(bool) + unsigned ParameterPack : 1; /// Whether this template template parameter is an "expanded" /// parameter pack, meaning that it is a pack expansion and we /// already know the set of template parameters that expansion expands to. - bool ExpandedParameterPack = false; + LLVM_PREFERRED_TYPE(bool) + unsigned ExpandedParameterPack : 1; /// The number of parameters in an expanded parameter pack. unsigned NumExpandedParams = 0; - TemplateTemplateParmDecl(DeclContext *DC, SourceLocation L, - unsigned D, unsigned P, bool ParameterPack, - IdentifierInfo *Id, TemplateParameterList *Params) + TemplateTemplateParmDecl(DeclContext *DC, SourceLocation L, unsigned D, + unsigned P, bool ParameterPack, IdentifierInfo *Id, + bool Typename, TemplateParameterList *Params) : TemplateDecl(TemplateTemplateParm, DC, L, Id, Params), - TemplateParmPosition(D, P), ParameterPack(ParameterPack) {} + TemplateParmPosition(D, P), Typename(Typename), + ParameterPack(ParameterPack), ExpandedParameterPack(false) {} - TemplateTemplateParmDecl(DeclContext *DC, SourceLocation L, - unsigned D, unsigned P, - IdentifierInfo *Id, TemplateParameterList *Params, + TemplateTemplateParmDecl(DeclContext *DC, SourceLocation L, unsigned D, + unsigned P, IdentifierInfo *Id, bool Typename, + TemplateParameterList *Params, ArrayRef Expansions); void anchor() override; @@ -1613,14 +1623,13 @@ class TemplateTemplateParmDecl final static TemplateTemplateParmDecl *Create(const ASTContext &C, DeclContext *DC, SourceLocation L, unsigned D, unsigned P, bool ParameterPack, - IdentifierInfo *Id, + IdentifierInfo *Id, bool Typename, TemplateParameterList *Params); - static TemplateTemplateParmDecl *Create(const ASTContext &C, DeclContext *DC, - SourceLocation L, unsigned D, - unsigned P, - IdentifierInfo *Id, - TemplateParameterList *Params, - ArrayRef Expansions); + static TemplateTemplateParmDecl * + Create(const ASTContext &C, DeclContext *DC, SourceLocation L, unsigned D, + unsigned P, IdentifierInfo *Id, bool Typename, + TemplateParameterList *Params, + ArrayRef Expansions); static TemplateTemplateParmDecl *CreateDeserialized(ASTContext &C, unsigned ID); @@ -1634,6 +1643,14 @@ class TemplateTemplateParmDecl final using TemplateParmPosition::setPosition; using TemplateParmPosition::getIndex; + /// Whether this template template parameter was declared with + /// the 'typename' keyword. + bool wasDeclaredWithTypename() const { return Typename; } + + /// Set whether this template template parameter was declared with + /// the 'typename' or 'class' keyword. + void setDeclaredWithTypename(bool withTypename) { Typename = withTypename; } + /// Whether this template template parameter is a template /// parameter pack. /// diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 0ee4f3c8e127f..f2c55b13b6d32 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -9064,7 +9064,7 @@ class Sema final : public SemaBase { Expr *DefaultArg); NamedDecl *ActOnTemplateTemplateParameter( Scope *S, SourceLocation TmpLoc, TemplateParameterList *Params, - SourceLocation EllipsisLoc, IdentifierInfo *ParamName, + bool Typename, SourceLocation EllipsisLoc, IdentifierInfo *ParamName, SourceLocation ParamNameLoc, unsigned Depth, unsigned Position, SourceLocation EqualLoc, ParsedTemplateArgument DefaultArg); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 2fa6aedca4c6a..6ce233704a588 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -799,7 +799,7 @@ ASTContext::getCanonicalTemplateTemplateParmDecl( TemplateTemplateParmDecl *CanonTTP = TemplateTemplateParmDecl::Create( *this, getTranslationUnitDecl(), SourceLocation(), TTP->getDepth(), - TTP->getPosition(), TTP->isParameterPack(), nullptr, + TTP->getPosition(), TTP->isParameterPack(), nullptr, /*Typename=*/false, TemplateParameterList::Create(*this, SourceLocation(), SourceLocation(), CanonParams, SourceLocation(), /*RequiresClause=*/nullptr)); diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index d5ec5ee409156..a5e43fc631667 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -5952,7 +5952,8 @@ ASTNodeImporter::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) { ToD, D, Importer.getToContext(), Importer.getToContext().getTranslationUnitDecl(), *LocationOrErr, D->getDepth(), D->getPosition(), D->isParameterPack(), - (*NameOrErr).getAsIdentifierInfo(), *TemplateParamsOrErr)) + (*NameOrErr).getAsIdentifierInfo(), D->wasDeclaredWithTypename(), + *TemplateParamsOrErr)) return ToD; if (D->hasDefaultArgument()) { diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp index 6afdb6cfccb14..c66774dd1df15 100644 --- a/clang/lib/AST/DeclPrinter.cpp +++ b/clang/lib/AST/DeclPrinter.cpp @@ -1218,7 +1218,10 @@ void DeclPrinter::VisitTemplateDecl(const TemplateDecl *D) { if (const TemplateTemplateParmDecl *TTP = dyn_cast(D)) { - Out << "class"; + if (TTP->wasDeclaredWithTypename()) + Out << "typename"; + else + Out << "class"; if (TTP->isParameterPack()) Out << " ..."; diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp index 571ed81a42e40..5aa2484197372 100644 --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -805,10 +805,10 @@ void TemplateTemplateParmDecl::anchor() {} TemplateTemplateParmDecl::TemplateTemplateParmDecl( DeclContext *DC, SourceLocation L, unsigned D, unsigned P, - IdentifierInfo *Id, TemplateParameterList *Params, + IdentifierInfo *Id, bool Typename, TemplateParameterList *Params, ArrayRef Expansions) : TemplateDecl(TemplateTemplateParm, DC, L, Id, Params), - TemplateParmPosition(D, P), ParameterPack(true), + TemplateParmPosition(D, P), Typename(Typename), ParameterPack(true), ExpandedParameterPack(true), NumExpandedParams(Expansions.size()) { if (!Expansions.empty()) std::uninitialized_copy(Expansions.begin(), Expansions.end(), @@ -819,26 +819,26 @@ TemplateTemplateParmDecl * TemplateTemplateParmDecl::Create(const ASTContext &C, DeclContext *DC, SourceLocation L, unsigned D, unsigned P, bool ParameterPack, IdentifierInfo *Id, - TemplateParameterList *Params) { + bool Typename, TemplateParameterList *Params) { return new (C, DC) TemplateTemplateParmDecl(DC, L, D, P, ParameterPack, Id, - Params); + Typename, Params); } TemplateTemplateParmDecl * TemplateTemplateParmDecl::Create(const ASTContext &C, DeclContext *DC, SourceLocation L, unsigned D, unsigned P, - IdentifierInfo *Id, + IdentifierInfo *Id, bool Typename, TemplateParameterList *Params, ArrayRef Expansions) { return new (C, DC, additionalSizeToAlloc(Expansions.size())) - TemplateTemplateParmDecl(DC, L, D, P, Id, Params, Expansions); + TemplateTemplateParmDecl(DC, L, D, P, Id, Typename, Params, Expansions); } TemplateTemplateParmDecl * TemplateTemplateParmDecl::CreateDeserialized(ASTContext &C, unsigned ID) { return new (C, ID) TemplateTemplateParmDecl(nullptr, SourceLocation(), 0, 0, - false, nullptr, nullptr); + false, nullptr, false, nullptr); } TemplateTemplateParmDecl * @@ -847,7 +847,7 @@ TemplateTemplateParmDecl::CreateDeserialized(ASTContext &C, unsigned ID, auto *TTP = new (C, ID, additionalSizeToAlloc(NumExpansions)) TemplateTemplateParmDecl(nullptr, SourceLocation(), 0, 0, nullptr, - nullptr, std::nullopt); + false, nullptr, std::nullopt); TTP->NumExpandedParams = NumExpansions; return TTP; } @@ -1469,7 +1469,7 @@ createMakeIntegerSeqParameterList(const ASTContext &C, DeclContext *DC) { // template class IntSeq auto *TemplateTemplateParm = TemplateTemplateParmDecl::Create( C, DC, SourceLocation(), /*Depth=*/0, /*Position=*/0, - /*ParameterPack=*/false, /*Id=*/nullptr, TPL); + /*ParameterPack=*/false, /*Id=*/nullptr, /*Typename=*/false, TPL); TemplateTemplateParm->setImplicit(true); // typename T diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp index 03257500426e5..b07ce451e878e 100644 --- a/clang/lib/Parse/ParseTemplate.cpp +++ b/clang/lib/Parse/ParseTemplate.cpp @@ -805,10 +805,12 @@ NamedDecl *Parser::ParseTemplateTemplateParameter(unsigned Depth, // identifier, comma, or greater. Provide a fixit if the identifier, comma, // or greater appear immediately or after 'struct'. In the latter case, // replace the keyword with 'class'. + bool TypenameKeyword = false; if (!TryConsumeToken(tok::kw_class)) { bool Replace = Tok.isOneOf(tok::kw_typename, tok::kw_struct); const Token &Next = Tok.is(tok::kw_struct) ? NextToken() : Tok; if (Tok.is(tok::kw_typename)) { + TypenameKeyword = true; Diag(Tok.getLocation(), getLangOpts().CPlusPlus17 ? diag::warn_cxx14_compat_template_template_param_typename @@ -878,10 +880,9 @@ NamedDecl *Parser::ParseTemplateTemplateParameter(unsigned Depth, } } - return Actions.ActOnTemplateTemplateParameter(getCurScope(), TemplateLoc, - ParamList, EllipsisLoc, - ParamName, NameLoc, Depth, - Position, EqualLoc, DefaultArg); + return Actions.ActOnTemplateTemplateParameter( + getCurScope(), TemplateLoc, ParamList, TypenameKeyword, EllipsisLoc, + ParamName, NameLoc, Depth, Position, EqualLoc, DefaultArg); } /// ParseNonTypeTemplateParameter - Handle the parsing of non-type diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 951e5a31cab3b..e0f5e53dc2481 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -1630,26 +1630,20 @@ NamedDecl *Sema::ActOnNonTypeTemplateParameter(Scope *S, Declarator &D, /// ActOnTemplateTemplateParameter - Called when a C++ template template /// parameter (e.g. T in template