Skip to content

Commit d1d5b16

Browse files
committed
[AMDGPU][GlobalIsel] Use isRegType to check for legal types for G_FREEZE
& G_IMPLICIT_DEF Change-Id: I9d5c52744b77b3820d755a6b2cd872730e6a99c7
1 parent c66d25d commit d1d5b16

File tree

2 files changed

+128
-2
lines changed

2 files changed

+128
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -889,10 +889,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
889889
.clampScalar(0, S16, S64);
890890

891891
getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
892-
.legalIf(isRegisterType(0))
893892
// s1 and s16 are special cases because they have legal operations on
894893
// them, but don't really occupy registers in the normal way.
895-
.legalFor({S1, S16})
894+
.legalFor({S32, S64, V2S16, S16, V4S16, S1, S128, S256})
895+
.legalFor(AllS32Vectors)
896+
.legalFor(AllS64Vectors)
897+
.legalFor(AddrSpaces64)
898+
.legalFor(AddrSpaces32)
899+
.legalFor(AddrSpaces128)
900+
.legalIf(isPointer(0))
901+
.clampNumElements(0, V16S32, V32S32)
896902
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
897903
.clampScalarOrElt(0, S32, MaxScalar)
898904
.widenScalarToNextPow2(0, 32)
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
3+
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
4+
5+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p32:32:32-p64:32:32-p65:32:32"
6+
target triple = "amdgcn--amdpal"
7+
8+
define amdgpu_cs void @_amdgpu_cs_main(i64 %0) {
9+
; GFX10-LABEL: _amdgpu_cs_main:
10+
; GFX10: ; %bb.0: ; %.entry
11+
; GFX10-NEXT: s_clause 0x1
12+
; GFX10-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:32
13+
; GFX10-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
14+
; GFX10-NEXT: s_waitcnt vmcnt(1)
15+
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v5
16+
; GFX10-NEXT: s_waitcnt vmcnt(0)
17+
; GFX10-NEXT: v_cmp_gt_f32_e64 s0, 0, v7
18+
; GFX10-NEXT: s_and_b32 s0, vcc_lo, s0
19+
; GFX10-NEXT: s_and_saveexec_b32 s1, s0
20+
; GFX10-NEXT: s_endpgm
21+
;
22+
; GFX11-LABEL: _amdgpu_cs_main:
23+
; GFX11: ; %bb.0: ; %.entry
24+
; GFX11-NEXT: s_clause 0x1
25+
; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off offset:32
26+
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
27+
; GFX11-NEXT: s_waitcnt vmcnt(1)
28+
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v5
29+
; GFX11-NEXT: s_waitcnt vmcnt(0)
30+
; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0, v1
31+
; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0
32+
; GFX11-NEXT: s_and_saveexec_b32 s1, s0
33+
; GFX11-NEXT: s_endpgm
34+
.entry:
35+
%1 = inttoptr i64 %0 to ptr addrspace(1)
36+
%2 = load float, ptr addrspace(1) %1, align 4
37+
%3 = call float @llvm.fabs.f32(float %2)
38+
%4 = fcmp olt float %3, 1.000000e+00
39+
%5 = getelementptr i8, ptr addrspace(1) %1, i64 4
40+
%6 = load float, ptr addrspace(1) %5, align 4
41+
%.fr.i0 = freeze float %6
42+
%7 = getelementptr i8, ptr addrspace(1) %1, i64 16
43+
%8 = load float, ptr addrspace(1) %7, align 4
44+
%.fr123.i0 = freeze float %8
45+
%9 = fadd float %.fr123.i0, 0.000000e+00
46+
%10 = call float @llvm.fabs.f32(float %9)
47+
%11 = and i1 false, %4
48+
%12 = getelementptr i8, ptr addrspace(1) %1, i64 20
49+
%13 = load float, ptr addrspace(1) %12, align 4
50+
%14 = call float @llvm.fabs.f32(float %13)
51+
%15 = fcmp olt float %14, 1.000000e+00
52+
%16 = and i1 %15, false
53+
%17 = getelementptr i8, ptr addrspace(1) %1, i64 24
54+
%18 = load float, ptr addrspace(1) %17, align 4
55+
%19 = call float @llvm.fabs.f32(float %18)
56+
%20 = fcmp olt float %19, 1.000000e+00
57+
%21 = and i1 %20, false
58+
%22 = getelementptr i8, ptr addrspace(1) %1, i64 28
59+
%23 = load float, ptr addrspace(1) %22, align 4
60+
%.fr128.i0 = freeze float %23
61+
%24 = fadd float %.fr128.i0, 0.000000e+00
62+
%25 = call float @llvm.fabs.f32(float %24)
63+
%26 = getelementptr i8, ptr addrspace(1) %1, i64 36
64+
%27 = load float, ptr addrspace(1) %26, align 4
65+
%28 = call float @llvm.fabs.f32(float %27)
66+
%29 = fcmp olt float %28, 1.000000e+00
67+
%30 = and i1 %29, false
68+
%31 = getelementptr i8, ptr addrspace(1) %1, i64 40
69+
%32 = load float, ptr addrspace(1) %31, align 4
70+
%.fr133.i0 = freeze float %32
71+
%33 = fadd float %.fr133.i0, 0.000000e+00
72+
%34 = call float @llvm.fabs.f32(float %33)
73+
%35 = getelementptr i8, ptr addrspace(1) %1, i64 44
74+
%36 = load float, ptr addrspace(1) %35, align 4
75+
%37 = fcmp olt float %36, 0.000000e+00
76+
%.i112 = getelementptr i8, ptr addrspace(1) %1, i64 8
77+
%.ii1 = load float, ptr addrspace(1) %.i112, align 4
78+
%.i213 = getelementptr i8, ptr addrspace(1) %1, i64 12
79+
%.ii2 = load float, ptr addrspace(1) %.i213, align 4
80+
%.fr.i1 = freeze float %.ii1
81+
%.fr.i2 = freeze float %.ii2
82+
%38 = fcmp olt float %.fr.i0, 0.000000e+00
83+
%39 = fadd float %.fr.i1, 0.000000e+00
84+
%40 = call float @llvm.fabs.f32(float %39)
85+
%41 = fadd float %.fr.i2, 0.000000e+00
86+
%42 = call float @llvm.fabs.f32(float %41)
87+
%43 = and i1 %37, %38
88+
%.i124 = getelementptr i8, ptr addrspace(1) %1, i64 32
89+
%.ii125 = load float, ptr addrspace(1) %.i124, align 4
90+
%.fr128.i1 = freeze float %.ii125
91+
%44 = fadd float %.fr128.i1, 0.000000e+00
92+
%45 = call float @llvm.fabs.f32(float %44)
93+
%.i234 = getelementptr i8, ptr addrspace(1) %1, i64 48
94+
%.ii235 = load float, ptr addrspace(1) %.i234, align 4
95+
%.fr133.i2 = freeze float %.ii235
96+
%46 = fadd float %.fr133.i2, 0.000000e+00
97+
%47 = call float @llvm.fabs.f32(float %46)
98+
br i1 %43, label %48, label %53
99+
100+
48: ; preds = %.entry
101+
%49 = call i64 @llvm.amdgcn.s.getpc()
102+
%50 = and i64 %49, 1
103+
%51 = inttoptr i64 %50 to ptr addrspace(4)
104+
%52 = load <4 x i32>, ptr addrspace(4) %51, align 16
105+
br label %53
106+
107+
53: ; preds = %48, %.entry
108+
ret void
109+
}
110+
111+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
112+
declare float @llvm.fabs.f32(float) #0
113+
114+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
115+
declare noundef i64 @llvm.amdgcn.s.getpc() #0
116+
117+
; uselistorder directives
118+
uselistorder ptr @llvm.fabs.f32, { 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }
119+
120+
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

0 commit comments

Comments
 (0)