Skip to content

Commit 197b1b3

Browse files
committed
[AMDGPU] Fix canonicalization of truncated values.
We were relying on roundings to implicitly canonicalize, which is generally safe, except with roundings that may be optimized away. Fixes llvm#82937.
1 parent 113052b commit 197b1b3

File tree

3 files changed

+47
-0
lines changed

3 files changed

+47
-0
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12831,6 +12831,20 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
1283112831
}
1283212832
}
1283312833

12834+
// The TRUNC parameter of FP_ROUND specifies whether the operation may be
12835+
// optimized away because the operation is known to be exact. Even if the
12836+
// operation would be considered exact in normal circumstances where we do not
12837+
// care about SNaN, we do care about SNaN here and must preserve the operation
12838+
// unless its input is known to be canonicalized.
12839+
if (SrcOpc == ISD::FP_ROUND || SrcOpc == ISD::STRICT_FP_ROUND) {
12840+
if (N0.getConstantOperandVal(1) == 0 ||
12841+
isCanonicalized(DAG, N0.getOperand(0)))
12842+
return N0;
12843+
SDLoc SL(N0);
12844+
return DAG.getNode(SrcOpc, SL, VT, N0.getOperand(0),
12845+
DAG.getTargetConstant(0, SL, MVT::i32));
12846+
}
12847+
1283412848
return isCanonicalized(DAG, N0) ? N0 : SDValue();
1283512849
}
1283612850

llvm/test/CodeGen/AMDGPU/bf16.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26818,11 +26818,15 @@ define bfloat @v_canonicalize_bf16(bfloat %a) {
2681826818
; GCN-LABEL: v_canonicalize_bf16:
2681926819
; GCN: ; %bb.0:
2682026820
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26821+
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
26822+
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2682126823
; GCN-NEXT: s_setpc_b64 s[30:31]
2682226824
;
2682326825
; GFX7-LABEL: v_canonicalize_bf16:
2682426826
; GFX7: ; %bb.0:
2682526827
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26828+
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
26829+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2682626830
; GFX7-NEXT: s_setpc_b64 s[30:31]
2682726831
;
2682826832
; GFX8-LABEL: v_canonicalize_bf16:

llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,35 @@ define amdgpu_kernel void @s_test_canonicalize_var_f16(ptr addrspace(1) %out, i1
170170
ret void
171171
}
172172

173+
define half @s_test_canonicalize_arg(half %x) #1 {
174+
; VI-LABEL: s_test_canonicalize_arg:
175+
; VI: ; %bb.0:
176+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177+
; VI-NEXT: v_max_f16_e32 v0, v0, v0
178+
; VI-NEXT: s_setpc_b64 s[30:31]
179+
;
180+
; GFX9-LABEL: s_test_canonicalize_arg:
181+
; GFX9: ; %bb.0:
182+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183+
; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
184+
; GFX9-NEXT: s_setpc_b64 s[30:31]
185+
;
186+
; CI-LABEL: s_test_canonicalize_arg:
187+
; CI: ; %bb.0:
188+
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189+
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
190+
; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
191+
; CI-NEXT: s_setpc_b64 s[30:31]
192+
;
193+
; GFX11-LABEL: s_test_canonicalize_arg:
194+
; GFX11: ; %bb.0:
195+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196+
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
197+
; GFX11-NEXT: s_setpc_b64 s[30:31]
198+
%canonicalized = call half @llvm.canonicalize.f16(half %x)
199+
ret half %canonicalized
200+
}
201+
173202
define <2 x half> @v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) #1 {
174203
; VI-LABEL: v_test_canonicalize_build_vector_v2f16:
175204
; VI: ; %bb.0:

0 commit comments

Comments
 (0)