[AMDGPU] Fix canonicalization of truncated values.

hvdijk · hvdijk · commit 197b1b3b7e28 · 2024-02-26T20:45:47.000Z
We were relying on roundings to implicitly canonicalize, which is generally safe, except with roundings that may be optimized away. Fixes llvm#82937.
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -12831,6 +12831,20 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
     }
   }
 
+  // The TRUNC parameter of FP_ROUND specifies whether the operation may be
+  // optimized away because the operation is known to be exact. Even if the
+  // operation would be considered exact in normal circumstances where we do not
+  // care about SNaN, we do care about SNaN here and must preserve the operation
+  // unless its input is known to be canonicalized.
+  if (SrcOpc == ISD::FP_ROUND || SrcOpc == ISD::STRICT_FP_ROUND) {
+    if (N0.getConstantOperandVal(1) == 0 ||
+        isCanonicalized(DAG, N0.getOperand(0)))
+      return N0;
+    SDLoc SL(N0);
+    return DAG.getNode(SrcOpc, SL, VT, N0.getOperand(0),
+                       DAG.getTargetConstant(0, SL, MVT::i32));
+  }
+
   return isCanonicalized(DAG, N0) ? N0 : SDValue();
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -26818,11 +26818,15 @@ define bfloat @v_canonicalize_bf16(bfloat %a) {
 ; GCN-LABEL: v_canonicalize_bf16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GCN-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-LABEL: v_canonicalize_bf16:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_canonicalize_bf16:
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -170,6 +170,35 @@ define amdgpu_kernel void @s_test_canonicalize_var_f16(ptr addrspace(1) %out, i1
   ret void
 }
 
+define half @s_test_canonicalize_arg(half %x) #1 {
+; VI-LABEL: s_test_canonicalize_arg:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_max_f16_e32 v0, v0, v0
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_test_canonicalize_arg:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; CI-LABEL: s_test_canonicalize_arg:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_test_canonicalize_arg:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %canonicalized = call half @llvm.canonicalize.f16(half %x)
+  ret half %canonicalized
+}
+
 define <2 x half> @v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) #1 {
 ; VI-LABEL: v_test_canonicalize_build_vector_v2f16:
 ; VI:       ; %bb.0:

Original file line number	Diff line number	Diff line change
`@@ -12831,6 +12831,20 @@ SDValue SITargetLowering::performFCanonicalizeCombine(`
`12831`	`12831`	`}`
`12832`	`12832`	`}`
`12833`	`12833`
	`12834`	`+ // The TRUNC parameter of FP_ROUND specifies whether the operation may be`
	`12835`	`+ // optimized away because the operation is known to be exact. Even if the`
	`12836`	`+ // operation would be considered exact in normal circumstances where we do not`
	`12837`	`+ // care about SNaN, we do care about SNaN here and must preserve the operation`
	`12838`	`+ // unless its input is known to be canonicalized.`
	`12839`	`+ if (SrcOpc == ISD::FP_ROUND \|\| SrcOpc == ISD::STRICT_FP_ROUND) {`
	`12840`	`+ if (N0.getConstantOperandVal(1) == 0 \|\|`
	`12841`	`+ isCanonicalized(DAG, N0.getOperand(0)))`
	`12842`	`+ return N0;`
	`12843`	`+ SDLoc SL(N0);`
	`12844`	`+ return DAG.getNode(SrcOpc, SL, VT, N0.getOperand(0),`
	`12845`	`+ DAG.getTargetConstant(0, SL, MVT::i32));`
	`12846`	`+ }`
	`12847`	`+`
`12834`	`12848`	`return isCanonicalized(DAG, N0) ? N0 : SDValue();`
`12835`	`12849`	`}`
`12836`	`12850`