Skip to content

Commit db5bcb2

Browse files
authored
GlobalISel: Fix combine duplicating atomic loads (#111730)
The sext_inreg (load) combine was not deleting the old load instruction, and it would never be deleted if volatile or atomic.
1 parent 4ccd2b0 commit db5bcb2

File tree

5 files changed

+42
-146
lines changed

5 files changed

+42
-146
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,6 +1110,9 @@ void CombinerHelper::applySextInRegOfLoad(
11101110
Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
11111111
LoadDef->getPointerReg(), *NewMMO);
11121112
MI.eraseFromParent();
1113+
1114+
// Not all loads can be deleted, so make sure the old one is removed.
1115+
LoadDef->eraseFromParent();
11131116
}
11141117

11151118
/// Return true if 'MI' is a load or a store that may be fold it's address

llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll

Lines changed: 18 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -27,32 +27,12 @@ define i32 @atomic_load_flat_monotonic_i8_zext_to_i32(ptr %ptr) {
2727
}
2828

2929
define i32 @atomic_load_flat_monotonic_i8_sext_to_i32(ptr %ptr) {
30-
; GFX7-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
31-
; GFX7: ; %bb.0:
32-
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33-
; GFX7-NEXT: flat_load_sbyte v2, v[0:1] glc
34-
; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc
35-
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
36-
; GFX7-NEXT: v_mov_b32_e32 v0, v2
37-
; GFX7-NEXT: s_setpc_b64 s[30:31]
38-
;
39-
; GFX8-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
40-
; GFX8: ; %bb.0:
41-
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42-
; GFX8-NEXT: flat_load_sbyte v2, v[0:1] glc
43-
; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc
44-
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
45-
; GFX8-NEXT: v_mov_b32_e32 v0, v2
46-
; GFX8-NEXT: s_setpc_b64 s[30:31]
47-
;
48-
; GFX9-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
49-
; GFX9: ; %bb.0:
50-
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51-
; GFX9-NEXT: flat_load_sbyte v2, v[0:1] glc
52-
; GFX9-NEXT: flat_load_ubyte v3, v[0:1] glc
53-
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
54-
; GFX9-NEXT: v_mov_b32_e32 v0, v2
55-
; GFX9-NEXT: s_setpc_b64 s[30:31]
30+
; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
31+
; GCN: ; %bb.0:
32+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33+
; GCN-NEXT: flat_load_sbyte v0, v[0:1] glc
34+
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
35+
; GCN-NEXT: s_setpc_b64 s[30:31]
5636
%load = load atomic i8, ptr %ptr monotonic, align 1
5737
%ext = sext i8 %load to i32
5838
ret i32 %ext
@@ -71,32 +51,12 @@ define i16 @atomic_load_flat_monotonic_i8_zext_to_i16(ptr %ptr) {
7151
}
7252

7353
define i16 @atomic_load_flat_monotonic_i8_sext_to_i16(ptr %ptr) {
74-
; GFX7-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
75-
; GFX7: ; %bb.0:
76-
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77-
; GFX7-NEXT: flat_load_sbyte v2, v[0:1] glc
78-
; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc
79-
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
80-
; GFX7-NEXT: v_mov_b32_e32 v0, v2
81-
; GFX7-NEXT: s_setpc_b64 s[30:31]
82-
;
83-
; GFX8-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
84-
; GFX8: ; %bb.0:
85-
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86-
; GFX8-NEXT: flat_load_sbyte v2, v[0:1] glc
87-
; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc
88-
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
89-
; GFX8-NEXT: v_mov_b32_e32 v0, v2
90-
; GFX8-NEXT: s_setpc_b64 s[30:31]
91-
;
92-
; GFX9-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
93-
; GFX9: ; %bb.0:
94-
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95-
; GFX9-NEXT: flat_load_sbyte v2, v[0:1] glc
96-
; GFX9-NEXT: flat_load_ubyte v3, v[0:1] glc
97-
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
98-
; GFX9-NEXT: v_mov_b32_e32 v0, v2
99-
; GFX9-NEXT: s_setpc_b64 s[30:31]
54+
; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
55+
; GCN: ; %bb.0:
56+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
57+
; GCN-NEXT: flat_load_sbyte v0, v[0:1] glc
58+
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
59+
; GCN-NEXT: s_setpc_b64 s[30:31]
10060
%load = load atomic i8, ptr %ptr monotonic, align 1
10161
%ext = sext i8 %load to i16
10262
ret i16 %ext
@@ -126,32 +86,12 @@ define i32 @atomic_load_flat_monotonic_i16_zext_to_i32(ptr %ptr) {
12686
}
12787

12888
define i32 @atomic_load_flat_monotonic_i16_sext_to_i32(ptr %ptr) {
129-
; GFX7-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
130-
; GFX7: ; %bb.0:
131-
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132-
; GFX7-NEXT: flat_load_sshort v2, v[0:1] glc
133-
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
134-
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
135-
; GFX7-NEXT: v_mov_b32_e32 v0, v2
136-
; GFX7-NEXT: s_setpc_b64 s[30:31]
137-
;
138-
; GFX8-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
139-
; GFX8: ; %bb.0:
140-
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141-
; GFX8-NEXT: flat_load_sshort v2, v[0:1] glc
142-
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
143-
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
144-
; GFX8-NEXT: v_mov_b32_e32 v0, v2
145-
; GFX8-NEXT: s_setpc_b64 s[30:31]
146-
;
147-
; GFX9-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
148-
; GFX9: ; %bb.0:
149-
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150-
; GFX9-NEXT: flat_load_sshort v2, v[0:1] glc
151-
; GFX9-NEXT: flat_load_ushort v3, v[0:1] glc
152-
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
153-
; GFX9-NEXT: v_mov_b32_e32 v0, v2
154-
; GFX9-NEXT: s_setpc_b64 s[30:31]
89+
; GCN-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
90+
; GCN: ; %bb.0:
91+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92+
; GCN-NEXT: flat_load_sshort v0, v[0:1] glc
93+
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
94+
; GCN-NEXT: s_setpc_b64 s[30:31]
15595
%load = load atomic i16, ptr %ptr monotonic, align 2
15696
%ext = sext i16 %load to i32
15797
ret i32 %ext

llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll

Lines changed: 12 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -82,37 +82,28 @@ define i32 @atomic_load_global_monotonic_i8_sext_to_i32(ptr addrspace(1) %ptr) {
8282
; GFX6-NEXT: s_mov_b32 s6, 0
8383
; GFX6-NEXT: s_mov_b32 s7, 0x100f000
8484
; GFX6-NEXT: s_mov_b64 s[4:5], 0
85-
; GFX6-NEXT: buffer_load_sbyte v2, v[0:1], s[4:7], 0 addr64 glc
86-
; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
85+
; GFX6-NEXT: buffer_load_sbyte v0, v[0:1], s[4:7], 0 addr64 glc
8786
; GFX6-NEXT: s_waitcnt vmcnt(0)
88-
; GFX6-NEXT: v_mov_b32_e32 v0, v2
8987
; GFX6-NEXT: s_setpc_b64 s[30:31]
9088
;
9189
; GFX7-LABEL: atomic_load_global_monotonic_i8_sext_to_i32:
9290
; GFX7: ; %bb.0:
9391
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94-
; GFX7-NEXT: flat_load_sbyte v2, v[0:1] glc
95-
; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc
92+
; GFX7-NEXT: flat_load_sbyte v0, v[0:1] glc
9693
; GFX7-NEXT: s_waitcnt vmcnt(0)
97-
; GFX7-NEXT: v_mov_b32_e32 v0, v2
9894
; GFX7-NEXT: s_setpc_b64 s[30:31]
9995
;
10096
; GFX8-LABEL: atomic_load_global_monotonic_i8_sext_to_i32:
10197
; GFX8: ; %bb.0:
10298
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103-
; GFX8-NEXT: flat_load_sbyte v2, v[0:1] glc
104-
; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc
99+
; GFX8-NEXT: flat_load_sbyte v0, v[0:1] glc
105100
; GFX8-NEXT: s_waitcnt vmcnt(0)
106-
; GFX8-NEXT: v_mov_b32_e32 v0, v2
107101
; GFX8-NEXT: s_setpc_b64 s[30:31]
108102
;
109103
; GFX9-LABEL: atomic_load_global_monotonic_i8_sext_to_i32:
110104
; GFX9: ; %bb.0:
111105
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112-
; GFX9-NEXT: global_load_sbyte v2, v[0:1], off glc
113-
; GFX9-NEXT: global_load_ubyte v3, v[0:1], off glc
114-
; GFX9-NEXT: s_waitcnt vmcnt(1)
115-
; GFX9-NEXT: v_mov_b32_e32 v0, v2
106+
; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc
116107
; GFX9-NEXT: s_waitcnt vmcnt(0)
117108
; GFX9-NEXT: s_setpc_b64 s[30:31]
118109
%load = load atomic i8, ptr addrspace(1) %ptr monotonic, align 1
@@ -163,37 +154,28 @@ define i16 @atomic_load_global_monotonic_i8_sext_to_i16(ptr addrspace(1) %ptr) {
163154
; GFX6-NEXT: s_mov_b32 s6, 0
164155
; GFX6-NEXT: s_mov_b32 s7, 0x100f000
165156
; GFX6-NEXT: s_mov_b64 s[4:5], 0
166-
; GFX6-NEXT: buffer_load_sbyte v2, v[0:1], s[4:7], 0 addr64 glc
167-
; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
157+
; GFX6-NEXT: buffer_load_sbyte v0, v[0:1], s[4:7], 0 addr64 glc
168158
; GFX6-NEXT: s_waitcnt vmcnt(0)
169-
; GFX6-NEXT: v_mov_b32_e32 v0, v2
170159
; GFX6-NEXT: s_setpc_b64 s[30:31]
171160
;
172161
; GFX7-LABEL: atomic_load_global_monotonic_i8_sext_to_i16:
173162
; GFX7: ; %bb.0:
174163
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175-
; GFX7-NEXT: flat_load_sbyte v2, v[0:1] glc
176-
; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc
164+
; GFX7-NEXT: flat_load_sbyte v0, v[0:1] glc
177165
; GFX7-NEXT: s_waitcnt vmcnt(0)
178-
; GFX7-NEXT: v_mov_b32_e32 v0, v2
179166
; GFX7-NEXT: s_setpc_b64 s[30:31]
180167
;
181168
; GFX8-LABEL: atomic_load_global_monotonic_i8_sext_to_i16:
182169
; GFX8: ; %bb.0:
183170
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184-
; GFX8-NEXT: flat_load_sbyte v2, v[0:1] glc
185-
; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc
171+
; GFX8-NEXT: flat_load_sbyte v0, v[0:1] glc
186172
; GFX8-NEXT: s_waitcnt vmcnt(0)
187-
; GFX8-NEXT: v_mov_b32_e32 v0, v2
188173
; GFX8-NEXT: s_setpc_b64 s[30:31]
189174
;
190175
; GFX9-LABEL: atomic_load_global_monotonic_i8_sext_to_i16:
191176
; GFX9: ; %bb.0:
192177
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193-
; GFX9-NEXT: global_load_sbyte v2, v[0:1], off glc
194-
; GFX9-NEXT: global_load_ubyte v3, v[0:1], off glc
195-
; GFX9-NEXT: s_waitcnt vmcnt(1)
196-
; GFX9-NEXT: v_mov_b32_e32 v0, v2
178+
; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc
197179
; GFX9-NEXT: s_waitcnt vmcnt(0)
198180
; GFX9-NEXT: s_setpc_b64 s[30:31]
199181
%load = load atomic i8, ptr addrspace(1) %ptr monotonic, align 1
@@ -279,37 +261,28 @@ define i32 @atomic_load_global_monotonic_i16_sext_to_i32(ptr addrspace(1) %ptr)
279261
; GFX6-NEXT: s_mov_b32 s6, 0
280262
; GFX6-NEXT: s_mov_b32 s7, 0x100f000
281263
; GFX6-NEXT: s_mov_b64 s[4:5], 0
282-
; GFX6-NEXT: buffer_load_sbyte v2, v[0:1], s[4:7], 0 addr64 glc
283-
; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
264+
; GFX6-NEXT: buffer_load_sbyte v0, v[0:1], s[4:7], 0 addr64 glc
284265
; GFX6-NEXT: s_waitcnt vmcnt(0)
285-
; GFX6-NEXT: v_mov_b32_e32 v0, v2
286266
; GFX6-NEXT: s_setpc_b64 s[30:31]
287267
;
288268
; GFX7-LABEL: atomic_load_global_monotonic_i16_sext_to_i32:
289269
; GFX7: ; %bb.0:
290270
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291-
; GFX7-NEXT: flat_load_sshort v2, v[0:1] glc
292-
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
271+
; GFX7-NEXT: flat_load_sshort v0, v[0:1] glc
293272
; GFX7-NEXT: s_waitcnt vmcnt(0)
294-
; GFX7-NEXT: v_mov_b32_e32 v0, v2
295273
; GFX7-NEXT: s_setpc_b64 s[30:31]
296274
;
297275
; GFX8-LABEL: atomic_load_global_monotonic_i16_sext_to_i32:
298276
; GFX8: ; %bb.0:
299277
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
300-
; GFX8-NEXT: flat_load_sshort v2, v[0:1] glc
301-
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
278+
; GFX8-NEXT: flat_load_sshort v0, v[0:1] glc
302279
; GFX8-NEXT: s_waitcnt vmcnt(0)
303-
; GFX8-NEXT: v_mov_b32_e32 v0, v2
304280
; GFX8-NEXT: s_setpc_b64 s[30:31]
305281
;
306282
; GFX9-LABEL: atomic_load_global_monotonic_i16_sext_to_i32:
307283
; GFX9: ; %bb.0:
308284
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309-
; GFX9-NEXT: global_load_sshort v2, v[0:1], off glc
310-
; GFX9-NEXT: global_load_ushort v3, v[0:1], off glc
311-
; GFX9-NEXT: s_waitcnt vmcnt(1)
312-
; GFX9-NEXT: v_mov_b32_e32 v0, v2
285+
; GFX9-NEXT: global_load_sshort v0, v[0:1], off glc
313286
; GFX9-NEXT: s_waitcnt vmcnt(0)
314287
; GFX9-NEXT: s_setpc_b64 s[30:31]
315288
%load = load atomic i16, ptr addrspace(1) %ptr monotonic, align 2

llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll

Lines changed: 9 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -65,29 +65,23 @@ define i32 @atomic_load_local_monotonic_i8_sext_to_i32(ptr addrspace(3) %ptr) {
6565
; GFX7: ; %bb.0:
6666
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6767
; GFX7-NEXT: s_mov_b32 m0, -1
68-
; GFX7-NEXT: ds_read_i8 v1, v0
69-
; GFX7-NEXT: ds_read_u8 v0, v0
68+
; GFX7-NEXT: ds_read_i8 v0, v0
7069
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
71-
; GFX7-NEXT: v_mov_b32_e32 v0, v1
7270
; GFX7-NEXT: s_setpc_b64 s[30:31]
7371
;
7472
; GFX8-LABEL: atomic_load_local_monotonic_i8_sext_to_i32:
7573
; GFX8: ; %bb.0:
7674
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7775
; GFX8-NEXT: s_mov_b32 m0, -1
78-
; GFX8-NEXT: ds_read_i8 v1, v0
79-
; GFX8-NEXT: ds_read_u8 v0, v0
76+
; GFX8-NEXT: ds_read_i8 v0, v0
8077
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
81-
; GFX8-NEXT: v_mov_b32_e32 v0, v1
8278
; GFX8-NEXT: s_setpc_b64 s[30:31]
8379
;
8480
; GFX9-LABEL: atomic_load_local_monotonic_i8_sext_to_i32:
8581
; GFX9: ; %bb.0:
8682
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87-
; GFX9-NEXT: ds_read_i8 v1, v0
88-
; GFX9-NEXT: ds_read_u8 v0, v0
83+
; GFX9-NEXT: ds_read_i8 v0, v0
8984
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
90-
; GFX9-NEXT: v_mov_b32_e32 v0, v1
9185
; GFX9-NEXT: s_setpc_b64 s[30:31]
9286
%load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1
9387
%ext = sext i8 %load to i32
@@ -127,29 +121,23 @@ define i16 @atomic_load_local_monotonic_i8_sext_to_i16(ptr addrspace(3) %ptr) {
127121
; GFX7: ; %bb.0:
128122
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129123
; GFX7-NEXT: s_mov_b32 m0, -1
130-
; GFX7-NEXT: ds_read_i8 v1, v0
131-
; GFX7-NEXT: ds_read_u8 v0, v0
124+
; GFX7-NEXT: ds_read_i8 v0, v0
132125
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
133-
; GFX7-NEXT: v_mov_b32_e32 v0, v1
134126
; GFX7-NEXT: s_setpc_b64 s[30:31]
135127
;
136128
; GFX8-LABEL: atomic_load_local_monotonic_i8_sext_to_i16:
137129
; GFX8: ; %bb.0:
138130
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139131
; GFX8-NEXT: s_mov_b32 m0, -1
140-
; GFX8-NEXT: ds_read_i8 v1, v0
141-
; GFX8-NEXT: ds_read_u8 v0, v0
132+
; GFX8-NEXT: ds_read_i8 v0, v0
142133
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
143-
; GFX8-NEXT: v_mov_b32_e32 v0, v1
144134
; GFX8-NEXT: s_setpc_b64 s[30:31]
145135
;
146136
; GFX9-LABEL: atomic_load_local_monotonic_i8_sext_to_i16:
147137
; GFX9: ; %bb.0:
148138
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149-
; GFX9-NEXT: ds_read_i8 v1, v0
150-
; GFX9-NEXT: ds_read_u8 v0, v0
139+
; GFX9-NEXT: ds_read_i8 v0, v0
151140
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
152-
; GFX9-NEXT: v_mov_b32_e32 v0, v1
153141
; GFX9-NEXT: s_setpc_b64 s[30:31]
154142
%load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1
155143
%ext = sext i8 %load to i16
@@ -216,29 +204,23 @@ define i32 @atomic_load_local_monotonic_i16_sext_to_i32(ptr addrspace(3) %ptr) {
216204
; GFX7: ; %bb.0:
217205
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218206
; GFX7-NEXT: s_mov_b32 m0, -1
219-
; GFX7-NEXT: ds_read_i16 v1, v0
220-
; GFX7-NEXT: ds_read_u16 v0, v0
207+
; GFX7-NEXT: ds_read_i16 v0, v0
221208
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
222-
; GFX7-NEXT: v_mov_b32_e32 v0, v1
223209
; GFX7-NEXT: s_setpc_b64 s[30:31]
224210
;
225211
; GFX8-LABEL: atomic_load_local_monotonic_i16_sext_to_i32:
226212
; GFX8: ; %bb.0:
227213
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228214
; GFX8-NEXT: s_mov_b32 m0, -1
229-
; GFX8-NEXT: ds_read_i16 v1, v0
230-
; GFX8-NEXT: ds_read_u16 v0, v0
215+
; GFX8-NEXT: ds_read_i16 v0, v0
231216
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
232-
; GFX8-NEXT: v_mov_b32_e32 v0, v1
233217
; GFX8-NEXT: s_setpc_b64 s[30:31]
234218
;
235219
; GFX9-LABEL: atomic_load_local_monotonic_i16_sext_to_i32:
236220
; GFX9: ; %bb.0:
237221
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238-
; GFX9-NEXT: ds_read_i16 v1, v0
239-
; GFX9-NEXT: ds_read_u16 v0, v0
222+
; GFX9-NEXT: ds_read_i16 v0, v0
240223
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
241-
; GFX9-NEXT: v_mov_b32_e32 v0, v1
242224
; GFX9-NEXT: s_setpc_b64 s[30:31]
243225
%load = load atomic i16, ptr addrspace(3) %ptr monotonic, align 2
244226
%ext = sext i16 %load to i32

llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,6 @@ body: |
133133
; CHECK-NEXT: {{ $}}
134134
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
135135
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1)
136-
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1)
137136
; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
138137
%0:_(p1) = COPY $vgpr0_vgpr1
139138
%1:_(s32) = G_LOAD %0 :: (volatile load (s8), align 1, addrspace 1)
@@ -172,7 +171,6 @@ body: |
172171
; CHECK-NEXT: {{ $}}
173172
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
174173
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1)
175-
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1)
176174
; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
177175
%0:_(p1) = COPY $vgpr0_vgpr1
178176
%1:_(s32) = G_LOAD %0 :: (volatile load (s16), align 2, addrspace 1)

0 commit comments

Comments
 (0)