From a542a182cd66638a4c30e91ed22297532a38bbaa Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 9 May 2025 14:35:25 +0200 Subject: [PATCH] AMDGPU: Add more tests for fmed3 instcombine folds Add test with snan literals, and test with and without amdgpu-ieee --- .../InstCombine/AMDGPU/fmed3-fpext-fold.ll | 35 ++- .../Transforms/InstCombine/AMDGPU/fmed3.ll | 259 +++++++++++++++++- 2 files changed, 289 insertions(+), 5 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3-fpext-fold.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3-fpext-fold.ll index a31b47b2ca6e7..66011ad1ac76f 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3-fpext-fold.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3-fpext-fold.ll @@ -605,9 +605,38 @@ define float @fmed3_f32_fpext_f16_unrepresentable_k2(half %arg0, half %arg1) #1 ret float %med3 } +define float @fmed3_f32_fpext_f16_strictfp(half %arg0, half %arg1, half %arg2) #2 { +; UNKNOWN-LABEL: define float @fmed3_f32_fpext_f16_strictfp +; UNKNOWN-SAME: (half [[ARG0:%.*]], half [[ARG1:%.*]], half [[ARG2:%.*]]) #[[ATTR2:[0-9]+]] { +; UNKNOWN-NEXT: [[ARG0_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG0]], metadata !"fpexcept.strict") +; UNKNOWN-NEXT: [[ARG1_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG1]], metadata !"fpexcept.strict") +; UNKNOWN-NEXT: [[ARG2_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG2]], metadata !"fpexcept.strict") +; UNKNOWN-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[ARG0_EXT]], float [[ARG1_EXT]], float [[ARG2_EXT]]) #[[ATTR2]] +; UNKNOWN-NEXT: ret float [[MED3]] +; +; GFX8-LABEL: define float @fmed3_f32_fpext_f16_strictfp +; GFX8-SAME: (half [[ARG0:%.*]], half [[ARG1:%.*]], half [[ARG2:%.*]]) #[[ATTR2:[0-9]+]] { +; GFX8-NEXT: [[ARG0_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG0]], metadata !"fpexcept.strict") +; GFX8-NEXT: [[ARG1_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG1]], metadata !"fpexcept.strict") +; GFX8-NEXT: [[ARG2_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG2]], metadata !"fpexcept.strict") +; GFX8-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[ARG0_EXT]], float [[ARG1_EXT]], float [[ARG2_EXT]]) #[[ATTR4:[0-9]+]] +; GFX8-NEXT: ret float [[MED3]] +; +; GFX9-LABEL: define float @fmed3_f32_fpext_f16_strictfp +; GFX9-SAME: (half [[ARG0:%.*]], half [[ARG1:%.*]], half [[ARG2:%.*]]) #[[ATTR2:[0-9]+]] { +; GFX9-NEXT: [[ARG0_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG0]], metadata !"fpexcept.strict") +; GFX9-NEXT: [[ARG1_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG1]], metadata !"fpexcept.strict") +; GFX9-NEXT: [[ARG2_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG2]], metadata !"fpexcept.strict") +; GFX9-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[ARG0_EXT]], float [[ARG1_EXT]], float [[ARG2_EXT]]) #[[ATTR5:[0-9]+]] +; GFX9-NEXT: ret float [[MED3]] +; + %arg0.ext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg0, metadata !"fpexcept.strict") + %arg1.ext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg1, metadata !"fpexcept.strict") + %arg2.ext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg2, metadata !"fpexcept.strict") + %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) #2 + ret float %med3 +} attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX8: {{.*}} -; UNKNOWN: {{.*}} +attributes #2 = { strictfp } diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll index 311846b391e2b..5274ac1093a26 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll @@ -1,5 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine < %s | FileCheck %s + +; Test with "amdgpu-ieee" set to true and false +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine -mcpu=gfx600 < %s | FileCheck -check-prefixes=CHECK,IEEE1 %s +; RUN: sed 's/\"true\"/\"false\"/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -passes=instcombine | FileCheck -check-prefixes=CHECK,IEEE0 %s + +; Test with gfx12 since there is no ieee bit anymore. +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=instcombine < %s | FileCheck -check-prefixes=CHECK,IEEE1 %s +; RUN: sed 's/\"true\"/\"false\"/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=instcombine | FileCheck -check-prefixes=CHECK,IEEE0 %s + ; -------------------------------------------------------------------- ; llvm.amdgcn.fmed3 ; -------------------------------------------------------------------- @@ -328,5 +336,252 @@ define float @fmed3_x_y_poison_f32(float %x, float %y) #1 { ret float %med3 } +define float @fmed3_snan1_x_y_f32(float %x, float %y) #1 { +; CHECK-LABEL: define float @fmed3_snan1_x_y_f32( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[MED3]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y) + ret float %med3 +} + +define float @fmed3_x_snan1_y_f32(float %x, float %y) #1 { +; CHECK-LABEL: define float @fmed3_x_snan1_y_f32( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[MED3]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y) + ret float %med3 +} + +define float @fmed3_x_y_snan1_f32(float %x, float %y) #1 { +; CHECK-LABEL: define float @fmed3_x_y_snan1_f32( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[MED3]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000) + ret float %med3 +} + +define float @fmed3_snan1_x_snan2_f32(float %x) #1 { +; CHECK-LABEL: define float @fmed3_snan1_x_snan2_f32( +; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret float [[X]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF0000020000000, float %x, float 0x7FF0000040000000) + ret float %med3 +} + +define float @fmed3_x_snan1_snan2_f32(float %x) #1 { +; CHECK-LABEL: define float @fmed3_x_snan1_snan2_f32( +; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret float [[X]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF0000020000000, float 0x7FF0000040000000) + ret float %med3 +} + +define float @fmed3_snan1_snan2_snan3_f32(float %x) #1 { +; CHECK-LABEL: define float @fmed3_snan1_snan2_snan3_f32( +; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret float 0x7FF8000020000000 +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float 0x7FF0000020000000, float 0x7FF0000040000000) + ret float %med3 +} + +define float @fmed3_snan1_1_2_f32(float %x, float %y) #1 { +; CHECK-LABEL: define float @fmed3_snan1_1_2_f32( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret float 1.000000e+00 +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float 1.0, float 2.0) + ret float %med3 +} + +define float @fmed3_snan1_neg1_2_f32(float %x, float %y) #1 { +; CHECK-LABEL: define float @fmed3_snan1_neg1_2_f32( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret float -1.000000e+00 +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0x7FF4000000000000, float 2.0) + ret float %med3 +} + +define float @fmed3_neg2_3_snan1_f32(float %x, float %y) #1 { +; CHECK-LABEL: define float @fmed3_neg2_3_snan1_f32( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret float 3.000000e+00 +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float -2.0, float 3.0, float 0x7FF4000000000000) + ret float %med3 +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.fmed3 with default mode implied by shader CC +; -------------------------------------------------------------------- + +define amdgpu_ps float @amdgpu_ps_default_fmed3_snan1_x_y_f32(float %x, float %y) { +; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_snan1_x_y_f32( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[MED3]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y) + ret float %med3 +} + +define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32(float %x, float %y) { +; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[MED3]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y) + ret float %med3 +} + +define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(float %x, float %y) { +; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[MED3]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000) + ret float %med3 +} +; -------------------------------------------------------------------- +; llvm.amdgcn.fmed3 with default mode shader cc and amdgpu-ieee +; -------------------------------------------------------------------- + +define amdgpu_ps float @amdgpu_ps_attr_fmed3_snan1_x_y_f32(float %x, float %y) #1 { +; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_snan1_x_y_f32( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[MED3]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y) + ret float %med3 +} + +define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_snan1_y_f32(float %x, float %y) #1 { +; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_snan1_y_f32( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[MED3]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y) + ret float %med3 +} + +define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32(float %x, float %y) #1 { +; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[MED3]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000) + ret float %med3 +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.fmed3 with strictfp calls +; -------------------------------------------------------------------- + +define float @fmed3_qnan0_x_y_f32_strictfp(float %x, float %y) #2 { +; CHECK-LABEL: define float @fmed3_qnan0_x_y_f32_strictfp( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[MED3]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y) strictfp + ret float %med3 +} + +define float @fmed3_x_qnan0_y_f32_strictfp(float %x, float %y) #2 { +; CHECK-LABEL: define float @fmed3_x_qnan0_y_f32_strictfp( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[MED3]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y) strictfp + ret float %med3 +} + +define float @fmed3_x_y_qnan0_f32_strictfp(float %x, float %y) #2 { +; CHECK-LABEL: define float @fmed3_x_y_qnan0_f32_strictfp( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[MED3]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000) strictfp + ret float %med3 +} + +define float @fmed3_snan1_x_y_f32_strictfp(float %x, float %y) #2 { +; CHECK-LABEL: define float @fmed3_snan1_x_y_f32_strictfp( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[MED3]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y) strictfp + ret float %med3 +} + +define float @fmed3_x_snan1_y_f32_strictfp(float %x, float %y) #2 { +; CHECK-LABEL: define float @fmed3_x_snan1_y_f32_strictfp( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[MED3]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y) strictfp + ret float %med3 +} + +define float @fmed3_x_y_snan1_f32_strictfp(float %x, float %y) #2 { +; CHECK-LABEL: define float @fmed3_x_y_snan1_f32_strictfp( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[MED3]] +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000) strictfp + ret float %med3 +} + +define float @fmed3_poison_x_y_f32_strictfp(float %x, float %y) #2 { +; CHECK-LABEL: define float @fmed3_poison_x_y_f32_strictfp( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: ret float poison +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float poison, float %x, float %y) strictfp + ret float %med3 +} + +define float @fmed3_x_poison_y_f32_strictfp(float %x, float %y) #2 { +; CHECK-LABEL: define float @fmed3_x_poison_y_f32_strictfp( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: ret float poison +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float poison, float %y) strictfp + ret float %med3 +} + +define float @fmed3_x_y_poison_f32_strictfp(float %x, float %y) #2 { +; CHECK-LABEL: define float @fmed3_x_y_poison_f32_strictfp( +; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: ret float poison +; + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float poison) strictfp + ret float %med3 +} + attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -attributes #1 = { nounwind } +attributes #1 = { nounwind "amdgpu-ieee"="true" } +attributes #2 = { nounwind strictfp "amdgpu-ieee"="true" } + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; IEEE0: {{.*}} +; IEEE1: {{.*}}