Skip to content

Conversation

dtcxzyw
Copy link
Member

@dtcxzyw dtcxzyw commented Mar 5, 2025

convergencectrl doesn't imply any memory access.
Closes #129856.

@llvmbot
Copy link
Member

llvmbot commented Mar 5, 2025

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-transforms

Author: Yingwei Zheng (dtcxzyw)

Changes

convergencectrl doesn't imply any memory access.
Closes #129856.


Full diff: https://github.com/llvm/llvm-project/pull/129874.diff

3 Files Affected:

  • (modified) llvm/lib/IR/Instructions.cpp (+5-3)
  • (added) llvm/test/Transforms/DCE/op_bundles.ll (+12)
  • (modified) llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll (+4-4)
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index b5d1bc81b9d95..d2cf0ae2c1778 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -609,15 +609,17 @@ bool CallBase::hasReadingOperandBundles() const {
   // Implementation note: this is a conservative implementation of operand
   // bundle semantics, where *any* non-assume operand bundle (other than
   // ptrauth) forces a callsite to be at least readonly.
-  return hasOperandBundlesOtherThan(
-             {LLVMContext::OB_ptrauth, LLVMContext::OB_kcfi}) &&
+  return hasOperandBundlesOtherThan({LLVMContext::OB_ptrauth,
+                                     LLVMContext::OB_kcfi,
+                                     LLVMContext::OB_convergencectrl}) &&
          getIntrinsicID() != Intrinsic::assume;
 }
 
 bool CallBase::hasClobberingOperandBundles() const {
   return hasOperandBundlesOtherThan(
              {LLVMContext::OB_deopt, LLVMContext::OB_funclet,
-              LLVMContext::OB_ptrauth, LLVMContext::OB_kcfi}) &&
+              LLVMContext::OB_ptrauth, LLVMContext::OB_kcfi,
+              LLVMContext::OB_convergencectrl}) &&
          getIntrinsicID() != Intrinsic::assume;
 }
 
diff --git a/llvm/test/Transforms/DCE/op_bundles.ll b/llvm/test/Transforms/DCE/op_bundles.ll
new file mode 100644
index 0000000000000..0d3b4db8265e8
--- /dev/null
+++ b/llvm/test/Transforms/DCE/op_bundles.ll
@@ -0,0 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s -passes=dce  | FileCheck %s
+
+define void @dead_readfirstlane_convergencetoken(<2 x i32> %src) convergent {
+; CHECK-LABEL: define void @dead_readfirstlane_convergencetoken(
+; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    ret void
+;
+  %t = tail call token @llvm.experimental.convergence.entry()
+  %vec = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src) [ "convergencectrl"(token %t) ]
+  ret void
+}
diff --git a/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll b/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll
index 633cf091da508..88eff971b9576 100644
--- a/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll
+++ b/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll
@@ -7,14 +7,14 @@ define i32 @nested(i32 %src) #0 {
 ; CHECK-SAME: i32 [[A0:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[BB15160:.*:]]
 ; CHECK-NEXT:    [[T1:%.*]] = call token @llvm.experimental.convergence.entry()
-; CHECK-NEXT:    %"vl15001llvm.experimental.convergence.anchor()" = call token @llvm.experimental.convergence.anchor()
-; CHECK-NEXT:    %"op68297llvm.amdgcn.readfirstlane.i32([[A0]], vl15001llvm.experimental.convergence.anchor())" = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[A0]]) [ "convergencectrl"(token %"vl15001llvm.experimental.convergence.anchor()") ]
-; CHECK-NEXT:    ret i32 undef
+; CHECK-NEXT:    %"vl77672llvm.experimental.convergence.anchor()" = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT:    %"op68297(vl77672)" = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[A0]]) [ "convergencectrl"(token %"vl77672llvm.experimental.convergence.anchor()") ]
+; CHECK-NEXT:    ret i32 %"op68297(vl77672)"
 ;
   %t1 = call token @llvm.experimental.convergence.entry()
   %t2 = call token @llvm.experimental.convergence.anchor()
   %r2 = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t2) ]
-  ret i32 undef
+  ret i32 %r2
 }
 
 ; Function Attrs: convergent nounwind readnone

@llvmbot
Copy link
Member

llvmbot commented Mar 5, 2025

@llvm/pr-subscribers-llvm-ir

Author: Yingwei Zheng (dtcxzyw)

Changes

convergencectrl doesn't imply any memory access.
Closes #129856.


Full diff: https://github.com/llvm/llvm-project/pull/129874.diff

3 Files Affected:

  • (modified) llvm/lib/IR/Instructions.cpp (+5-3)
  • (added) llvm/test/Transforms/DCE/op_bundles.ll (+12)
  • (modified) llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll (+4-4)
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index b5d1bc81b9d95..d2cf0ae2c1778 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -609,15 +609,17 @@ bool CallBase::hasReadingOperandBundles() const {
   // Implementation note: this is a conservative implementation of operand
   // bundle semantics, where *any* non-assume operand bundle (other than
   // ptrauth) forces a callsite to be at least readonly.
-  return hasOperandBundlesOtherThan(
-             {LLVMContext::OB_ptrauth, LLVMContext::OB_kcfi}) &&
+  return hasOperandBundlesOtherThan({LLVMContext::OB_ptrauth,
+                                     LLVMContext::OB_kcfi,
+                                     LLVMContext::OB_convergencectrl}) &&
          getIntrinsicID() != Intrinsic::assume;
 }
 
 bool CallBase::hasClobberingOperandBundles() const {
   return hasOperandBundlesOtherThan(
              {LLVMContext::OB_deopt, LLVMContext::OB_funclet,
-              LLVMContext::OB_ptrauth, LLVMContext::OB_kcfi}) &&
+              LLVMContext::OB_ptrauth, LLVMContext::OB_kcfi,
+              LLVMContext::OB_convergencectrl}) &&
          getIntrinsicID() != Intrinsic::assume;
 }
 
diff --git a/llvm/test/Transforms/DCE/op_bundles.ll b/llvm/test/Transforms/DCE/op_bundles.ll
new file mode 100644
index 0000000000000..0d3b4db8265e8
--- /dev/null
+++ b/llvm/test/Transforms/DCE/op_bundles.ll
@@ -0,0 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s -passes=dce  | FileCheck %s
+
+define void @dead_readfirstlane_convergencetoken(<2 x i32> %src) convergent {
+; CHECK-LABEL: define void @dead_readfirstlane_convergencetoken(
+; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    ret void
+;
+  %t = tail call token @llvm.experimental.convergence.entry()
+  %vec = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src) [ "convergencectrl"(token %t) ]
+  ret void
+}
diff --git a/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll b/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll
index 633cf091da508..88eff971b9576 100644
--- a/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll
+++ b/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll
@@ -7,14 +7,14 @@ define i32 @nested(i32 %src) #0 {
 ; CHECK-SAME: i32 [[A0:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[BB15160:.*:]]
 ; CHECK-NEXT:    [[T1:%.*]] = call token @llvm.experimental.convergence.entry()
-; CHECK-NEXT:    %"vl15001llvm.experimental.convergence.anchor()" = call token @llvm.experimental.convergence.anchor()
-; CHECK-NEXT:    %"op68297llvm.amdgcn.readfirstlane.i32([[A0]], vl15001llvm.experimental.convergence.anchor())" = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[A0]]) [ "convergencectrl"(token %"vl15001llvm.experimental.convergence.anchor()") ]
-; CHECK-NEXT:    ret i32 undef
+; CHECK-NEXT:    %"vl77672llvm.experimental.convergence.anchor()" = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT:    %"op68297(vl77672)" = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[A0]]) [ "convergencectrl"(token %"vl77672llvm.experimental.convergence.anchor()") ]
+; CHECK-NEXT:    ret i32 %"op68297(vl77672)"
 ;
   %t1 = call token @llvm.experimental.convergence.entry()
   %t2 = call token @llvm.experimental.convergence.anchor()
   %r2 = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t2) ]
-  ret i32 undef
+  ret i32 %r2
 }
 
 ; Function Attrs: convergent nounwind readnone

;
%t1 = call token @llvm.experimental.convergence.entry()
%t2 = call token @llvm.experimental.convergence.anchor()
%r2 = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t2) ]
ret i32 undef
ret i32 %r2
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IRNormalizer does not set name for dead values.

Copy link
Contributor

@arsenm arsenm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing test update in instcombine/amdgpu? The patch I submitted a while ago had one of these leftover (the regression in 5c375c3)

@dtcxzyw dtcxzyw force-pushed the convergent-bundle branch from a4744bf to 74d812e Compare March 5, 2025 12:49
@dtcxzyw
Copy link
Member Author

dtcxzyw commented Mar 5, 2025

Missing test update in instcombine/amdgpu? The patch I submitted a while ago had one of these leftover (the regression in 5c375c3)

Rebased

@llvmbot llvmbot added backend:AMDGPU llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes labels Mar 5, 2025
@dtcxzyw dtcxzyw merged commit 844a1d5 into llvm:main Mar 5, 2025
11 of 13 checks passed
@dtcxzyw dtcxzyw deleted the convergent-bundle branch March 5, 2025 14:14
@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 5, 2025

LLVM Buildbot has detected a new failure on builder openmp-offload-amdgpu-runtime running on omp-vega20-0 while building llvm at step 6 "test-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/30/builds/16964

Here is the relevant piece of the build log for the reference
Step 6 (test-openmp) failure: test (failure)
******************** TEST 'libomp :: tasking/issue-94260-2.c' FAILED ********************
Exit Code: -11

Command Output (stdout):
--
# RUN: at line 1
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/clang -fopenmp   -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/openmp/runtime/test -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -fno-omit-frame-pointer -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/openmp/runtime/test/ompt /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic && /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# executed command: /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/clang -fopenmp -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/openmp/runtime/test -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -fno-omit-frame-pointer -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/openmp/runtime/test/ompt /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic
# note: command had no output on stdout or stderr
# executed command: /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# note: command had no output on stdout or stderr
# error: command failed with exit status: -11

--

********************


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:ir llvm:transforms
Projects
None yet
Development

Successfully merging this pull request may close these issues.

Dead calls with convergence token uses are not deleted
4 participants