-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AMDGPU][GlobalIsel] Use isRegisterClassType for G_FREEZE and G_IMPLICIT_DEF #101331
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-globalisel Author: None (sstipanovic) ChangesG_FREEZE was legal for <13 x S32> which caused an infinite loop in the combiner Full diff: https://github.com/llvm/llvm-project/pull/101331.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index c6c4b8f930647..8f99ad6e393cd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -889,10 +889,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(0, S16, S64);
getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
- .legalIf(isRegisterType(0))
// s1 and s16 are special cases because they have legal operations on
// them, but don't really occupy registers in the normal way.
- .legalFor({S1, S16})
+ .legalFor({S32, S64, V2S16, S16, V4S16, S1, S128, S256})
+ .legalFor(AllS32Vectors)
+ .legalFor(AllS64Vectors)
+ .legalFor(AddrSpaces64)
+ .legalFor(AddrSpaces32)
+ .legalFor(AddrSpaces128)
+ .legalIf(isPointer(0))
+ .clampNumElements(0, V16S32, V32S32)
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
.clampScalarOrElt(0, S32, MaxScalar)
.widenScalarToNextPow2(0, 32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/freeze_implicit_def_legalizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/freeze_implicit_def_legalizer.ll
new file mode 100644
index 0000000000000..2fcc6e9571022
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/freeze_implicit_def_legalizer.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p32:32:32-p64:32:32-p65:32:32"
+target triple = "amdgcn--amdpal"
+
+define amdgpu_cs void @_amdgpu_cs_main(i64 %0) {
+; GFX10-LABEL: _amdgpu_cs_main:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:32
+; GFX10-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(1)
+; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v5
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_cmp_gt_f32_e64 s0, 0, v7
+; GFX10-NEXT: s_and_b32 s0, vcc_lo, s0
+; GFX10-NEXT: s_and_saveexec_b32 s1, s0
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: _amdgpu_cs_main:
+; GFX11: ; %bb.0: ; %.entry
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off offset:32
+; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(1)
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v5
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0, v1
+; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0
+; GFX11-NEXT: s_and_saveexec_b32 s1, s0
+; GFX11-NEXT: s_endpgm
+.entry:
+ %1 = inttoptr i64 %0 to ptr addrspace(1)
+ %2 = load float, ptr addrspace(1) %1, align 4
+ %3 = call float @llvm.fabs.f32(float %2)
+ %4 = fcmp olt float %3, 1.000000e+00
+ %5 = getelementptr i8, ptr addrspace(1) %1, i64 4
+ %6 = load float, ptr addrspace(1) %5, align 4
+ %.fr.i0 = freeze float %6
+ %7 = getelementptr i8, ptr addrspace(1) %1, i64 16
+ %8 = load float, ptr addrspace(1) %7, align 4
+ %.fr123.i0 = freeze float %8
+ %9 = fadd float %.fr123.i0, 0.000000e+00
+ %10 = call float @llvm.fabs.f32(float %9)
+ %11 = and i1 false, %4
+ %12 = getelementptr i8, ptr addrspace(1) %1, i64 20
+ %13 = load float, ptr addrspace(1) %12, align 4
+ %14 = call float @llvm.fabs.f32(float %13)
+ %15 = fcmp olt float %14, 1.000000e+00
+ %16 = and i1 %15, false
+ %17 = getelementptr i8, ptr addrspace(1) %1, i64 24
+ %18 = load float, ptr addrspace(1) %17, align 4
+ %19 = call float @llvm.fabs.f32(float %18)
+ %20 = fcmp olt float %19, 1.000000e+00
+ %21 = and i1 %20, false
+ %22 = getelementptr i8, ptr addrspace(1) %1, i64 28
+ %23 = load float, ptr addrspace(1) %22, align 4
+ %.fr128.i0 = freeze float %23
+ %24 = fadd float %.fr128.i0, 0.000000e+00
+ %25 = call float @llvm.fabs.f32(float %24)
+ %26 = getelementptr i8, ptr addrspace(1) %1, i64 36
+ %27 = load float, ptr addrspace(1) %26, align 4
+ %28 = call float @llvm.fabs.f32(float %27)
+ %29 = fcmp olt float %28, 1.000000e+00
+ %30 = and i1 %29, false
+ %31 = getelementptr i8, ptr addrspace(1) %1, i64 40
+ %32 = load float, ptr addrspace(1) %31, align 4
+ %.fr133.i0 = freeze float %32
+ %33 = fadd float %.fr133.i0, 0.000000e+00
+ %34 = call float @llvm.fabs.f32(float %33)
+ %35 = getelementptr i8, ptr addrspace(1) %1, i64 44
+ %36 = load float, ptr addrspace(1) %35, align 4
+ %37 = fcmp olt float %36, 0.000000e+00
+ %.i112 = getelementptr i8, ptr addrspace(1) %1, i64 8
+ %.ii1 = load float, ptr addrspace(1) %.i112, align 4
+ %.i213 = getelementptr i8, ptr addrspace(1) %1, i64 12
+ %.ii2 = load float, ptr addrspace(1) %.i213, align 4
+ %.fr.i1 = freeze float %.ii1
+ %.fr.i2 = freeze float %.ii2
+ %38 = fcmp olt float %.fr.i0, 0.000000e+00
+ %39 = fadd float %.fr.i1, 0.000000e+00
+ %40 = call float @llvm.fabs.f32(float %39)
+ %41 = fadd float %.fr.i2, 0.000000e+00
+ %42 = call float @llvm.fabs.f32(float %41)
+ %43 = and i1 %37, %38
+ %.i124 = getelementptr i8, ptr addrspace(1) %1, i64 32
+ %.ii125 = load float, ptr addrspace(1) %.i124, align 4
+ %.fr128.i1 = freeze float %.ii125
+ %44 = fadd float %.fr128.i1, 0.000000e+00
+ %45 = call float @llvm.fabs.f32(float %44)
+ %.i234 = getelementptr i8, ptr addrspace(1) %1, i64 48
+ %.ii235 = load float, ptr addrspace(1) %.i234, align 4
+ %.fr133.i2 = freeze float %.ii235
+ %46 = fadd float %.fr133.i2, 0.000000e+00
+ %47 = call float @llvm.fabs.f32(float %46)
+ br i1 %43, label %48, label %53
+
+48: ; preds = %.entry
+ %49 = call i64 @llvm.amdgcn.s.getpc()
+ %50 = and i64 %49, 1
+ %51 = inttoptr i64 %50 to ptr addrspace(4)
+ %52 = load <4 x i32>, ptr addrspace(4) %51, align 16
+ br label %53
+
+53: ; preds = %48, %.entry
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.fabs.f32(float) #0
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i64 @llvm.amdgcn.s.getpc() #0
+
+; uselistorder directives
+uselistorder ptr @llvm.fabs.f32, { 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }
+
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
llvm/test/CodeGen/AMDGPU/GlobalISel/freeze_implicit_def_legalizer.ll
Outdated
Show resolved
Hide resolved
llvm/test/CodeGen/AMDGPU/GlobalISel/freeze_implicit_def_legalizer.ll
Outdated
Show resolved
Hide resolved
llvm/test/CodeGen/AMDGPU/GlobalISel/freeze_implicit_def_legalizer.ll
Outdated
Show resolved
Hide resolved
llvm/test/CodeGen/AMDGPU/GlobalISel/freeze_implicit_def_legalizer.ll
Outdated
Show resolved
Hide resolved
llvm/test/CodeGen/AMDGPU/GlobalISel/freeze_implicit_def_legalizer.ll
Outdated
Show resolved
Hide resolved
for G_FREEZE and G_IMPLICIT_DEF Change-Id: Ia22467410a92424c0bc8d307f1fcaea79d10d4c9
Excuse me, I have reverted this. See, https://lab.llvm.org/buildbot/#/builders/127/builds/891 |
… G_IMPLICIT_DEF (llvm#101331)" This reverts commit 4fc08b6.
…6ed1b3b93 Local branch amd-gfx c176ed1 Merged main:caf0897c9c7f6f2a142af06bff8680a23d1d4bf5 into amd-gfx:bd53c0b2e9b9 Remote branch main 63b2595 [AMDGPU][GlobalIsel] Use isRegisterClassType for G_FREEZE and G_IMPLICIT_DEF (llvm#101331)
G_FREEZE was legal for <13 x S32> which caused an infinite loop in the combiner