diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 1ca300464a697..f2a2cf41eab90 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -59,6 +59,28 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, return maxnum(Src0, Src1); } +enum class KnownIEEEMode { Unknown, On, Off }; + +/// Return KnownIEEEMode::On if we know if the use context can assume +/// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume +/// "amdgpu-ieee"="false". +static KnownIEEEMode fpenvIEEEMode(const Instruction &I, + const GCNSubtarget &ST) { + if (!ST.hasIEEEMode()) // Only mode on gfx12 + return KnownIEEEMode::On; + + const Function *F = I.getFunction(); + if (!F) + return KnownIEEEMode::Unknown; + + Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee"); + if (IEEEAttr.isValid()) + return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off; + + return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off + : KnownIEEEMode::On; +} + // Check if a value can be converted to a 16-bit value without losing // precision. // The value is expected to be either a float (IsFloat = true) or an unsigned @@ -843,9 +865,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { break; } case Intrinsic::amdgcn_fmed3: { - // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled - // for the shader. - Value *Src0 = II.getArgOperand(0); Value *Src1 = II.getArgOperand(1); Value *Src2 = II.getArgOperand(2); @@ -858,16 +877,85 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { if (II.isStrictFP()) break; + // med3 with a nan input acts like + // v_min_f32(v_min_f32(s0, s1), s2) + // + // Signalingness is ignored with ieee=0, so we fold to + // minimumnum/maximumnum. With ieee=1, the v_min_f32 acts like llvm.minnum + // with signaling nan handling. With ieee=0, like llvm.minimumnum except a + // returned signaling nan will not be quieted. + + // ieee=1 + // s0 snan: s2 + // s1 snan: s2 + // s2 snan: qnan + + // s0 qnan: min(s1, s2) + // s1 qnan: min(s0, s2) + // s2 qnan: min(s0, s1) + + // ieee=0 + // s0 _nan: min(s1, s2) + // s1 _nan: min(s0, s2) + // s2 _nan: min(s0, s1) + // Checking for NaN before canonicalization provides better fidelity when // mapping other operations onto fmed3 since the order of operands is // unchanged. Value *V = nullptr; - if (match(Src0, PatternMatch::m_NaN()) || isa(Src0)) { - V = IC.Builder.CreateMinNum(Src1, Src2); - } else if (match(Src1, PatternMatch::m_NaN()) || isa(Src1)) { - V = IC.Builder.CreateMinNum(Src0, Src2); - } else if (match(Src2, PatternMatch::m_NaN()) || isa(Src2)) { - V = IC.Builder.CreateMinNum(Src0, Src1); + const APFloat *ConstSrc0 = nullptr; + const APFloat *ConstSrc1 = nullptr; + const APFloat *ConstSrc2 = nullptr; + + // TODO: Also can fold to 2 operands with infinities. + if ((match(Src0, m_APFloat(ConstSrc0)) && ConstSrc0->isNaN()) || + isa(Src0)) { + switch (fpenvIEEEMode(II, *ST)) { + case KnownIEEEMode::On: + // TODO: If Src2 is snan, does it need quieting? + if (ConstSrc0 && ConstSrc0->isSignaling()) + return IC.replaceInstUsesWith(II, Src2); + V = IC.Builder.CreateMinNum(Src1, Src2); + break; + case KnownIEEEMode::Off: + V = IC.Builder.CreateMinimumNum(Src1, Src2); + break; + case KnownIEEEMode::Unknown: + break; + } + } else if ((match(Src1, m_APFloat(ConstSrc1)) && ConstSrc1->isNaN()) || + isa(Src1)) { + switch (fpenvIEEEMode(II, *ST)) { + case KnownIEEEMode::On: + // TODO: If Src2 is snan, does it need quieting? + if (ConstSrc1 && ConstSrc1->isSignaling()) + return IC.replaceInstUsesWith(II, Src2); + + V = IC.Builder.CreateMinNum(Src0, Src2); + break; + case KnownIEEEMode::Off: + V = IC.Builder.CreateMinimumNum(Src0, Src2); + break; + case KnownIEEEMode::Unknown: + break; + } + } else if ((match(Src2, m_APFloat(ConstSrc2)) && ConstSrc2->isNaN()) || + isa(Src2)) { + switch (fpenvIEEEMode(II, *ST)) { + case KnownIEEEMode::On: + if (ConstSrc2 && ConstSrc2->isSignaling()) { + auto *Quieted = ConstantFP::get(II.getType(), ConstSrc2->makeQuiet()); + return IC.replaceInstUsesWith(II, Quieted); + } + + V = IC.Builder.CreateMinNum(Src0, Src1); + break; + case KnownIEEEMode::Off: + V = IC.Builder.CreateMaximumNum(Src0, Src1); + break; + case KnownIEEEMode::Unknown: + break; + } } if (V) { diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll index 972862d8e327e..d9311008bd680 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; Test with "amdgpu-ieee" set to true and false -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine -mcpu=gfx600 < %s | FileCheck -check-prefixes=CHECK,IEEE1 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine -mcpu=gfx600 < %s | FileCheck -check-prefixes=CHECK,IEEE1,HAS-IEEE-BIT1 %s ; RUN: sed 's/\"true\"/\"false\"/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -passes=instcombine | FileCheck -check-prefixes=CHECK,IEEE0 %s -; Test with gfx12 since there is no ieee bit anymore. -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=instcombine < %s | FileCheck -check-prefixes=CHECK,IEEE1 %s -; RUN: sed 's/\"true\"/\"false\"/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=instcombine | FileCheck -check-prefixes=CHECK,IEEE0 %s +; Test with gfx12 since there is no ieee bit anymore and the attribute is ignored. +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=instcombine < %s | FileCheck -check-prefixes=CHECK,IEEE1,NO-IEEE-BIT %s +; RUN: sed 's/\"true\"/\"false\"/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=instcombine | FileCheck -check-prefixes=CHECK,IEEE1,NO-IEEE-BIT %s ; -------------------------------------------------------------------- ; llvm.amdgcn.fmed3 @@ -85,80 +85,120 @@ define float @fmed3_canonicalize_c_x_y_f32(float %x, float %y) #1 { } define float @fmed3_undef_x_y_f32(float %x, float %y) #1 { -; CHECK-LABEL: define float @fmed3_undef_x_y_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; IEEE1-LABEL: define float @fmed3_undef_x_y_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; IEEE1-NEXT: ret float [[MED3]] +; +; IEEE0-LABEL: define float @fmed3_undef_x_y_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y) ret float %med3 } define float @fmed3_fmf_undef_x_y_f32(float %x, float %y) #1 { -; CHECK-LABEL: define float @fmed3_fmf_undef_x_y_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[MED3:%.*]] = call nnan float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; IEEE1-LABEL: define float @fmed3_fmf_undef_x_y_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: [[MED3:%.*]] = call nnan float @llvm.minnum.f32(float [[X]], float [[Y]]) +; IEEE1-NEXT: ret float [[MED3]] +; +; IEEE0-LABEL: define float @fmed3_fmf_undef_x_y_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call nnan float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call nnan float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y) ret float %med3 } define float @fmed3_x_undef_y_f32(float %x, float %y) #1 { -; CHECK-LABEL: define float @fmed3_x_undef_y_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; IEEE1-LABEL: define float @fmed3_x_undef_y_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; IEEE1-NEXT: ret float [[MED3]] +; +; IEEE0-LABEL: define float @fmed3_x_undef_y_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float undef, float %y) ret float %med3 } define float @fmed3_x_y_undef_f32(float %x, float %y) #1 { -; CHECK-LABEL: define float @fmed3_x_y_undef_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; IEEE1-LABEL: define float @fmed3_x_y_undef_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; IEEE1-NEXT: ret float [[MED3]] +; +; IEEE0-LABEL: define float @fmed3_x_y_undef_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef) ret float %med3 } define float @fmed3_qnan0_x_y_f32(float %x, float %y) #1 { -; CHECK-LABEL: define float @fmed3_qnan0_x_y_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; IEEE1-LABEL: define float @fmed3_qnan0_x_y_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; IEEE1-NEXT: ret float [[MED3]] +; +; IEEE0-LABEL: define float @fmed3_qnan0_x_y_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y) ret float %med3 } define float @fmed3_x_qnan0_y_f32(float %x, float %y) #1 { -; CHECK-LABEL: define float @fmed3_x_qnan0_y_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; IEEE1-LABEL: define float @fmed3_x_qnan0_y_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; IEEE1-NEXT: ret float [[MED3]] +; +; IEEE0-LABEL: define float @fmed3_x_qnan0_y_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y) ret float %med3 } define float @fmed3_x_y_qnan0_f32(float %x, float %y) #1 { -; CHECK-LABEL: define float @fmed3_x_y_qnan0_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; IEEE1-LABEL: define float @fmed3_x_y_qnan0_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; IEEE1-NEXT: ret float [[MED3]] +; +; IEEE0-LABEL: define float @fmed3_x_y_qnan0_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000) ret float %med3 } define float @fmed3_qnan1_x_y_f32(float %x, float %y) #1 { -; CHECK-LABEL: define float @fmed3_qnan1_x_y_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; IEEE1-LABEL: define float @fmed3_qnan1_x_y_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) +; IEEE1-NEXT: ret float [[MED3]] +; +; IEEE0-LABEL: define float @fmed3_qnan1_x_y_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float %x, float %y) ret float %med3 @@ -229,27 +269,42 @@ define float @fmed3_constant_src2_1_f32(float %x, float %y) #1 { } define float @fmed3_x_qnan0_qnan1_f32(float %x) #1 { -; CHECK-LABEL: define float @fmed3_x_qnan0_qnan1_f32( -; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret float [[X]] +; IEEE1-LABEL: define float @fmed3_x_qnan0_qnan1_f32( +; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: ret float [[X]] +; +; IEEE0-LABEL: define float @fmed3_x_qnan0_qnan1_f32( +; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000) ret float %med3 } define float @fmed3_qnan0_x_qnan1_f32(float %x) #1 { -; CHECK-LABEL: define float @fmed3_qnan0_x_qnan1_f32( -; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret float [[X]] +; IEEE1-LABEL: define float @fmed3_qnan0_x_qnan1_f32( +; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: ret float [[X]] +; +; IEEE0-LABEL: define float @fmed3_qnan0_x_qnan1_f32( +; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000) ret float %med3 } define float @fmed3_qnan0_qnan1_x_f32(float %x) #1 { -; CHECK-LABEL: define float @fmed3_qnan0_qnan1_x_f32( -; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret float [[X]] +; IEEE1-LABEL: define float @fmed3_qnan0_qnan1_x_f32( +; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: ret float [[X]] +; +; IEEE0-LABEL: define float @fmed3_qnan0_qnan1_x_f32( +; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x) ret float %med3 @@ -274,9 +329,13 @@ define float @fmed3_0_nan_1_f32() #1 { } define float @fmed3_0_1_nan_f32() #1 { -; CHECK-LABEL: define float @fmed3_0_1_nan_f32( -; CHECK-SAME: ) #[[ATTR1]] { -; CHECK-NEXT: ret float 0.000000e+00 +; IEEE1-LABEL: define float @fmed3_0_1_nan_f32( +; IEEE1-SAME: ) #[[ATTR1]] { +; IEEE1-NEXT: ret float 0.000000e+00 +; +; IEEE0-LABEL: define float @fmed3_0_1_nan_f32( +; IEEE0-SAME: ) #[[ATTR1]] { +; IEEE0-NEXT: ret float 1.000000e+00 ; %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8001000000000) ret float %med @@ -301,9 +360,13 @@ define float @fmed3_0_undef_1_f32() #1 { } define float @fmed3_0_1_undef_f32() #1 { -; CHECK-LABEL: define float @fmed3_0_1_undef_f32( -; CHECK-SAME: ) #[[ATTR1]] { -; CHECK-NEXT: ret float 0.000000e+00 +; IEEE1-LABEL: define float @fmed3_0_1_undef_f32( +; IEEE1-SAME: ) #[[ATTR1]] { +; IEEE1-NEXT: ret float 0.000000e+00 +; +; IEEE0-LABEL: define float @fmed3_0_1_undef_f32( +; IEEE0-SAME: ) #[[ATTR1]] { +; IEEE0-NEXT: ret float 1.000000e+00 ; %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef) ret float %med @@ -337,84 +400,122 @@ define float @fmed3_x_y_poison_f32(float %x, float %y) #1 { } define float @fmed3_snan1_x_y_f32(float %x, float %y) #1 { -; CHECK-LABEL: define float @fmed3_snan1_x_y_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; IEEE1-LABEL: define float @fmed3_snan1_x_y_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: ret float [[Y]] +; +; IEEE0-LABEL: define float @fmed3_snan1_x_y_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y) ret float %med3 } define float @fmed3_x_snan1_y_f32(float %x, float %y) #1 { -; CHECK-LABEL: define float @fmed3_x_snan1_y_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; IEEE1-LABEL: define float @fmed3_x_snan1_y_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: ret float [[Y]] +; +; IEEE0-LABEL: define float @fmed3_x_snan1_y_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y) ret float %med3 } define float @fmed3_x_y_snan1_f32(float %x, float %y) #1 { -; CHECK-LABEL: define float @fmed3_x_y_snan1_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; IEEE1-LABEL: define float @fmed3_x_y_snan1_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: ret float 0x7FFC000000000000 +; +; IEEE0-LABEL: define float @fmed3_x_y_snan1_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000) ret float %med3 } define float @fmed3_snan1_x_snan2_f32(float %x) #1 { -; CHECK-LABEL: define float @fmed3_snan1_x_snan2_f32( -; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret float [[X]] +; IEEE1-LABEL: define float @fmed3_snan1_x_snan2_f32( +; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: ret float 0x7FF0000040000000 +; +; IEEE0-LABEL: define float @fmed3_snan1_x_snan2_f32( +; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF0000040000000) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF0000020000000, float %x, float 0x7FF0000040000000) ret float %med3 } define float @fmed3_x_snan1_snan2_f32(float %x) #1 { -; CHECK-LABEL: define float @fmed3_x_snan1_snan2_f32( -; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret float [[X]] +; IEEE1-LABEL: define float @fmed3_x_snan1_snan2_f32( +; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: ret float 0x7FF0000040000000 +; +; IEEE0-LABEL: define float @fmed3_x_snan1_snan2_f32( +; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF0000040000000) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF0000020000000, float 0x7FF0000040000000) ret float %med3 } define float @fmed3_snan1_snan2_snan3_f32(float %x) #1 { -; CHECK-LABEL: define float @fmed3_snan1_snan2_snan3_f32( -; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret float 0x7FF8000020000000 +; IEEE1-LABEL: define float @fmed3_snan1_snan2_snan3_f32( +; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: ret float 0x7FF0000040000000 +; +; IEEE0-LABEL: define float @fmed3_snan1_snan2_snan3_f32( +; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: ret float 0x7FF8000040000000 ; %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float 0x7FF0000020000000, float 0x7FF0000040000000) ret float %med3 } define float @fmed3_snan1_1_2_f32(float %x, float %y) #1 { -; CHECK-LABEL: define float @fmed3_snan1_1_2_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret float 1.000000e+00 +; IEEE1-LABEL: define float @fmed3_snan1_1_2_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: ret float 2.000000e+00 +; +; IEEE0-LABEL: define float @fmed3_snan1_1_2_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: ret float 1.000000e+00 ; %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float 1.0, float 2.0) ret float %med3 } define float @fmed3_snan1_neg1_2_f32(float %x, float %y) #1 { -; CHECK-LABEL: define float @fmed3_snan1_neg1_2_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret float -1.000000e+00 +; IEEE1-LABEL: define float @fmed3_snan1_neg1_2_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: ret float 2.000000e+00 +; +; IEEE0-LABEL: define float @fmed3_snan1_neg1_2_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: ret float -1.000000e+00 ; %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0x7FF4000000000000, float 2.0) ret float %med3 } define float @fmed3_neg2_3_snan1_f32(float %x, float %y) #1 { -; CHECK-LABEL: define float @fmed3_neg2_3_snan1_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret float -2.000000e+00 +; IEEE1-LABEL: define float @fmed3_neg2_3_snan1_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: ret float 0x7FFC000000000000 +; +; IEEE0-LABEL: define float @fmed3_neg2_3_snan1_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: ret float 3.000000e+00 ; %med3 = call float @llvm.amdgcn.fmed3.f32(float -2.0, float 3.0, float 0x7FF4000000000000) ret float %med3 @@ -425,30 +526,57 @@ define float @fmed3_neg2_3_snan1_f32(float %x, float %y) #1 { ; -------------------------------------------------------------------- define amdgpu_ps float @amdgpu_ps_default_fmed3_snan1_x_y_f32(float %x, float %y) { -; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_snan1_x_y_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; HAS-IEEE-BIT1-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_snan1_x_y_f32( +; HAS-IEEE-BIT1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; HAS-IEEE-BIT1-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; HAS-IEEE-BIT1-NEXT: ret float [[MED3]] +; +; IEEE0-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_snan1_x_y_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] +; +; NO-IEEE-BIT-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_snan1_x_y_f32( +; NO-IEEE-BIT-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; NO-IEEE-BIT-NEXT: ret float [[Y]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y) ret float %med3 } define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32(float %x, float %y) { -; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; HAS-IEEE-BIT1-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32( +; HAS-IEEE-BIT1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] { +; HAS-IEEE-BIT1-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; HAS-IEEE-BIT1-NEXT: ret float [[MED3]] +; +; IEEE0-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] +; +; NO-IEEE-BIT-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32( +; NO-IEEE-BIT-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] { +; NO-IEEE-BIT-NEXT: ret float [[Y]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y) ret float %med3 } define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(float %x, float %y) { -; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; HAS-IEEE-BIT1-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32( +; HAS-IEEE-BIT1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] { +; HAS-IEEE-BIT1-NEXT: [[MED3:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float [[Y]]) +; HAS-IEEE-BIT1-NEXT: ret float [[MED3]] +; +; IEEE0-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] +; +; NO-IEEE-BIT-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32( +; NO-IEEE-BIT-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] { +; NO-IEEE-BIT-NEXT: ret float 0x7FFC000000000000 ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000) ret float %med3 @@ -458,30 +586,42 @@ define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(float %x, float %y ; -------------------------------------------------------------------- define amdgpu_ps float @amdgpu_ps_attr_fmed3_snan1_x_y_f32(float %x, float %y) #1 { -; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_snan1_x_y_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; IEEE1-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_snan1_x_y_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: ret float [[Y]] +; +; IEEE0-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_snan1_x_y_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y) ret float %med3 } define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_snan1_y_f32(float %x, float %y) #1 { -; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_snan1_y_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; IEEE1-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_snan1_y_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: ret float [[Y]] +; +; IEEE0-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_snan1_y_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y) ret float %med3 } define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32(float %x, float %y) #1 { -; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32( -; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[MED3]] +; IEEE1-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: ret float 0x7FFC000000000000 +; +; IEEE0-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000) ret float %med3 @@ -578,10 +718,55 @@ define float @fmed3_x_y_poison_f32_strictfp(float %x, float %y) #2 { ret float %med3 } +; -------------------------------------------------------------------- +; llvm.amdgcn.fmed3 with flags +; -------------------------------------------------------------------- + +define float @fmed3_qnan0_x_y_f32_flags(float %x, float %y) #1 { +; IEEE1-LABEL: define float @fmed3_qnan0_x_y_f32_flags( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: [[MED3:%.*]] = call ninf nsz float @llvm.minnum.f32(float [[X]], float [[Y]]) +; IEEE1-NEXT: ret float [[MED3]] +; +; IEEE0-LABEL: define float @fmed3_qnan0_x_y_f32_flags( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call ninf nsz float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] +; + %med3 = call nsz ninf float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y) + ret float %med3 +} + +define float @fmed3_x_qnan0_y_f32_flags(float %x, float %y) #1 { +; IEEE1-LABEL: define float @fmed3_x_qnan0_y_f32_flags( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: [[MED3:%.*]] = call ninf nsz float @llvm.minnum.f32(float [[X]], float [[Y]]) +; IEEE1-NEXT: ret float [[MED3]] +; +; IEEE0-LABEL: define float @fmed3_x_qnan0_y_f32_flags( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call ninf nsz float @llvm.minimumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] +; + %med3 = call nsz ninf float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y) + ret float %med3 +} + +define float @fmed3_x_y_qnan0_f32_flags(float %x, float %y) #1 { +; IEEE1-LABEL: define float @fmed3_x_y_qnan0_f32_flags( +; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE1-NEXT: [[MED3:%.*]] = call ninf nsz float @llvm.minnum.f32(float [[X]], float [[Y]]) +; IEEE1-NEXT: ret float [[MED3]] +; +; IEEE0-LABEL: define float @fmed3_x_y_qnan0_f32_flags( +; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] { +; IEEE0-NEXT: [[MED3:%.*]] = call ninf nsz float @llvm.maximumnum.f32(float [[X]], float [[Y]]) +; IEEE0-NEXT: ret float [[MED3]] +; + %med3 = call nsz ninf float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000) + ret float %med3 +} + attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { nounwind "amdgpu-ieee"="true" } attributes #2 = { nounwind strictfp "amdgpu-ieee"="true" } - -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; IEEE0: {{.*}} -; IEEE1: {{.*}}