From a4098bc544e6f80042deeb354b1c5e5128d3dc08 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 7 Jan 2024 21:00:23 +0800 Subject: [PATCH 1/4] [ValueTracking] Add additional tests for `llvm.powi`. NFC. --- llvm/test/Analysis/ValueTracking/powi-nneg.ll | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 llvm/test/Analysis/ValueTracking/powi-nneg.ll diff --git a/llvm/test/Analysis/ValueTracking/powi-nneg.ll b/llvm/test/Analysis/ValueTracking/powi-nneg.ll new file mode 100644 index 0000000000000..8a46613a0c9ee --- /dev/null +++ b/llvm/test/Analysis/ValueTracking/powi-nneg.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=instsimplify -S < %s | FileCheck %s + +define i1 @test_powi_even_exp(float %x) { +; CHECK-LABEL: define i1 @test_powi_even_exp( +; CHECK-SAME: float [[X:%.*]]) { +; CHECK-NEXT: ret i1 false +; + %y = call float @llvm.powi.f32(float %x, i32 8) + %cmp = fcmp olt float %y, 0.0 + ret i1 %cmp +} + +define i1 @test_powi_base(float nofpclass(ninf nnorm nsub nzero) %x, i32 %e) { +; CHECK-LABEL: define i1 @test_powi_base( +; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[X:%.*]], i32 [[E:%.*]]) { +; CHECK-NEXT: ret i1 false +; + %y = call float @llvm.powi.f32(float %x, i32 %e) + %cmp = fcmp olt float %y, 0.0 + ret i1 %cmp +} + +define i1 @test_powi_base_without_nsz(float nofpclass(ninf nnorm nsub) %x, i32 %e) { +; CHECK-LABEL: define i1 @test_powi_base_without_nsz( +; CHECK-SAME: float nofpclass(ninf nsub nnorm) [[X:%.*]], i32 [[E:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.powi.f32.i32(float [[X]], i32 [[E]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[Y]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %y = call float @llvm.powi.f32(float %x, i32 %e) + %cmp = fcmp olt float %y, 0.0 + ret i1 %cmp +} + +define float @pr31702(float %x, i32 %n) { +; CHECK-LABEL: define float @pr31702( +; CHECK-SAME: float [[X:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL1:%.*]] = call float @llvm.powi.f32.i32(float -0.000000e+00, i32 [[N]]) +; CHECK-NEXT: [[CALL2:%.*]] = call float @llvm.fabs.f32(float [[CALL1]]) +; CHECK-NEXT: ret float [[CALL2]] +; +entry: + %call1 = call float @llvm.powi.f32(float -0.0, i32 %n) + %call2 = call float @llvm.fabs.f32(float %call1) + ret float %call2 +} + +declare float @llvm.powi.f32(float, i32) +declare float @llvm.fabs.f32(float) From 82d1d5005236ff229defccd76fb564e009453ec2 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 7 Jan 2024 21:06:54 +0800 Subject: [PATCH 2/4] [ValueTracking] Merge `cannotBeOrderedLessThanZeroImpl` into `computeKnownFPClass` --- llvm/include/llvm/Analysis/ValueTracking.h | 33 ++- llvm/lib/Analysis/InstructionSimplify.cpp | 9 +- llvm/lib/Analysis/ValueTracking.cpp | 246 +++--------------- .../InstCombine/InstCombineCalls.cpp | 18 +- .../CodeGen/Thumb2/mve-vmaxnma-commute.ll | 12 +- .../InstCombine/copysign-fneg-fabs.ll | 5 +- .../floating-point-arithmetic-strictfp.ll | 8 +- 7 files changed, 89 insertions(+), 242 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 7360edfce1f39..90075cb88c3a0 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -332,6 +332,12 @@ struct KnownFPClass { void knownNot(FPClassTest RuleOut) { KnownFPClasses = KnownFPClasses & ~RuleOut; + if (isKnownNever(fcNan) && !SignBit) { + if (isKnownNever(OrderedLessThanZeroMask | fcNegZero)) + SignBit = false; + else if (isKnownNever(OrderedGreaterThanZeroMask | fcPosZero)) + SignBit = true; + } } void fneg() { @@ -367,6 +373,12 @@ struct KnownFPClass { SignBit = false; } + /// Assume the sign bit is one. + void signBitMustBeOne() { + KnownFPClasses &= (fcNegative | fcNan); + SignBit = true; + } + void copysign(const KnownFPClass &Sign) { // Don't know anything about the sign of the source. Expand the possible set // to its opposite sign pair. @@ -553,15 +565,18 @@ inline bool isKnownNeverNaN(const Value *V, const DataLayout &DL, return Known.isKnownNeverNaN(); } -/// Return true if we can prove that the specified FP value's sign bit is 0. -/// -/// NaN --> true/false (depending on the NaN's sign bit) -/// +0 --> true -/// -0 --> false -/// x > +0 --> true -/// x < -0 --> false -bool SignBitMustBeZero(const Value *V, const DataLayout &DL, - const TargetLibraryInfo *TLI); +/// Return false if we can prove that the specified FP value's sign bit is 0. +/// Return true if we can prove that the specified FP value's sign bit is 1. +/// Otherwise return std::nullopt. +inline std::optional computeKnownFPSignBit( + const Value *V, const DataLayout &DL, + const TargetLibraryInfo *TLI = nullptr, unsigned Depth = 0, + AssumptionCache *AC = nullptr, const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, bool UseInstrInfo = true) { + KnownFPClass Known = computeKnownFPClass(V, DL, fcAllFlags, Depth, TLI, AC, + CtxI, DT, UseInstrInfo); + return Known.SignBit; +} /// If the specified value can be set by repeating the same byte in memory, /// return the i8 value that it is represented with. This is true for all i8 diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 241bdd81b75a9..02eef4b5ee240 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5762,9 +5762,9 @@ static Value *simplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, return ConstantFP::getZero(Op0->getType()); // +normal number * (-)0.0 --> (-)0.0 - if (isKnownNeverInfOrNaN(Op0, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT) && - // TODO: Check SignBit from computeKnownFPClass when it's more complete. - SignBitMustBeZero(Op0, Q.DL, Q.TLI)) + KnownFPClass Known = computeKnownFPClass( + Op0, FMF, Q.DL, fcInf | fcNan, /*Depth=*/0, Q.TLI, Q.AC, Q.CxtI, Q.DT); + if (Known.SignBit == false && Known.isKnownNever(fcInf | fcNan)) return Op1; } @@ -6217,7 +6217,8 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, Value *X; switch (IID) { case Intrinsic::fabs: - if (SignBitMustBeZero(Op0, Q.DL, Q.TLI)) + if (computeKnownFPSignBit(Op0, Q.DL, Q.TLI, /*Depth=*/0, Q.AC, Q.CxtI, + Q.DT) == false) return Op0; break; case Intrinsic::bswap: diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 940ae9eb7ee29..7c49ddd175455 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -3704,205 +3704,6 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB, return Intrinsic::not_intrinsic; } -/// Deprecated, use computeKnownFPClass instead. -/// -/// If \p SignBitOnly is true, test for a known 0 sign bit rather than a -/// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign -/// bit despite comparing equal. -static bool cannotBeOrderedLessThanZeroImpl(const Value *V, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - bool SignBitOnly, unsigned Depth) { - // TODO: This function does not do the right thing when SignBitOnly is true - // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform - // which flips the sign bits of NaNs. See - // https://llvm.org/bugs/show_bug.cgi?id=31702. - - if (const ConstantFP *CFP = dyn_cast(V)) { - return !CFP->getValueAPF().isNegative() || - (!SignBitOnly && CFP->getValueAPF().isZero()); - } - - // Handle vector of constants. - if (auto *CV = dyn_cast(V)) { - if (auto *CVFVTy = dyn_cast(CV->getType())) { - unsigned NumElts = CVFVTy->getNumElements(); - for (unsigned i = 0; i != NumElts; ++i) { - auto *CFP = dyn_cast_or_null(CV->getAggregateElement(i)); - if (!CFP) - return false; - if (CFP->getValueAPF().isNegative() && - (SignBitOnly || !CFP->getValueAPF().isZero())) - return false; - } - - // All non-negative ConstantFPs. - return true; - } - } - - if (Depth == MaxAnalysisRecursionDepth) - return false; - - const Operator *I = dyn_cast(V); - if (!I) - return false; - - switch (I->getOpcode()) { - default: - break; - // Unsigned integers are always nonnegative. - case Instruction::UIToFP: - return true; - case Instruction::FDiv: - // X / X is always exactly 1.0 or a NaN. - if (I->getOperand(0) == I->getOperand(1) && - (!SignBitOnly || cast(I)->hasNoNaNs())) - return true; - - // Set SignBitOnly for RHS, because X / -0.0 is -Inf (or NaN). - return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, - SignBitOnly, Depth + 1) && - cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, - /*SignBitOnly*/ true, Depth + 1); - case Instruction::FMul: - // X * X is always non-negative or a NaN. - if (I->getOperand(0) == I->getOperand(1) && - (!SignBitOnly || cast(I)->hasNoNaNs())) - return true; - - [[fallthrough]]; - case Instruction::FAdd: - case Instruction::FRem: - return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, - SignBitOnly, Depth + 1) && - cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, - SignBitOnly, Depth + 1); - case Instruction::Select: - return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, - SignBitOnly, Depth + 1) && - cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI, - SignBitOnly, Depth + 1); - case Instruction::FPExt: - case Instruction::FPTrunc: - // Widening/narrowing never change sign. - return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, - SignBitOnly, Depth + 1); - case Instruction::ExtractElement: - // Look through extract element. At the moment we keep this simple and skip - // tracking the specific element. But at least we might find information - // valid for all elements of the vector. - return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, - SignBitOnly, Depth + 1); - case Instruction::Call: - const auto *CI = cast(I); - Intrinsic::ID IID = getIntrinsicForCallSite(*CI, TLI); - switch (IID) { - default: - break; - case Intrinsic::canonicalize: - case Intrinsic::arithmetic_fence: - case Intrinsic::floor: - case Intrinsic::ceil: - case Intrinsic::trunc: - case Intrinsic::rint: - case Intrinsic::nearbyint: - case Intrinsic::round: - case Intrinsic::roundeven: - case Intrinsic::fptrunc_round: - return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, - SignBitOnly, Depth + 1); - case Intrinsic::maxnum: { - Value *V0 = I->getOperand(0), *V1 = I->getOperand(1); - auto isPositiveNum = [&](Value *V) { - if (SignBitOnly) { - // With SignBitOnly, this is tricky because the result of - // maxnum(+0.0, -0.0) is unspecified. Just check if the operand is - // a constant strictly greater than 0.0. - const APFloat *C; - return match(V, m_APFloat(C)) && - *C > APFloat::getZero(C->getSemantics()); - } - - // -0.0 compares equal to 0.0, so if this operand is at least -0.0, - // maxnum can't be ordered-less-than-zero. - return isKnownNeverNaN(V, DL, TLI) && - cannotBeOrderedLessThanZeroImpl(V, DL, TLI, false, Depth + 1); - }; - - // TODO: This could be improved. We could also check that neither operand - // has its sign bit set (and at least 1 is not-NAN?). - return isPositiveNum(V0) || isPositiveNum(V1); - } - - case Intrinsic::maximum: - return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, - SignBitOnly, Depth + 1) || - cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, - SignBitOnly, Depth + 1); - case Intrinsic::minnum: - case Intrinsic::minimum: - return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, - SignBitOnly, Depth + 1) && - cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, - SignBitOnly, Depth + 1); - case Intrinsic::exp: - case Intrinsic::exp2: - case Intrinsic::fabs: - return true; - case Intrinsic::copysign: - // Only the sign operand matters. - return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, true, - Depth + 1); - case Intrinsic::sqrt: - // sqrt(x) is always >= -0 or NaN. Moreover, sqrt(x) == -0 iff x == -0. - if (!SignBitOnly) - return true; - return CI->hasNoNaNs() && - (CI->hasNoSignedZeros() || - cannotBeNegativeZero(CI->getOperand(0), DL, TLI)); - - case Intrinsic::powi: - if (ConstantInt *Exponent = dyn_cast(I->getOperand(1))) { - // powi(x,n) is non-negative if n is even. - if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0) - return true; - } - // TODO: This is not correct. Given that exp is an integer, here are the - // ways that pow can return a negative value: - // - // pow(x, exp) --> negative if exp is odd and x is negative. - // pow(-0, exp) --> -inf if exp is negative odd. - // pow(-0, exp) --> -0 if exp is positive odd. - // pow(-inf, exp) --> -0 if exp is negative odd. - // pow(-inf, exp) --> -inf if exp is positive odd. - // - // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN, - // but we must return false if x == -0. Unfortunately we do not currently - // have a way of expressing this constraint. See details in - // https://llvm.org/bugs/show_bug.cgi?id=31702. - return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, - SignBitOnly, Depth + 1); - - case Intrinsic::fma: - case Intrinsic::fmuladd: - // x*x+y is non-negative if y is non-negative. - return I->getOperand(0) == I->getOperand(1) && - (!SignBitOnly || cast(I)->hasNoNaNs()) && - cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI, - SignBitOnly, Depth + 1); - } - break; - } - return false; -} - -bool llvm::SignBitMustBeZero(const Value *V, const DataLayout &DL, - const TargetLibraryInfo *TLI) { - // FIXME: Use computeKnownFPClass and pass all arguments - return cannotBeOrderedLessThanZeroImpl(V, DL, TLI, true, 0); -} - /// Return true if it's possible to assume IEEE treatment of input denormals in /// \p F for \p Val. static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) { @@ -4307,7 +4108,6 @@ static void computeKnownFPClassForFPTrunc(const Operator *Op, // Infinity needs a range check. } -// TODO: Merge implementation of cannotBeOrderedLessThanZero into here. void computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, KnownFPClass &Known, unsigned Depth, const SimplifyQuery &Q) { @@ -4332,6 +4132,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, const Constant *CV = dyn_cast(V); if (VFVTy && CV) { Known.KnownFPClasses = fcNone; + bool SignBitAllZero = true; + bool SignBitAllOne = true; // For vectors, verify that each element is not NaN. unsigned NumElts = VFVTy->getNumElements(); @@ -4349,10 +4151,15 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, return; } - KnownFPClass KnownElt{CElt->getValueAPF().classify(), CElt->isNegative()}; - Known |= KnownElt; + const APFloat &C = CElt->getValueAPF(); + Known.KnownFPClasses |= C.classify(); + if (C.isNegative()) + SignBitAllZero = false; + else + SignBitAllOne = false; } - + if (SignBitAllOne != SignBitAllZero) + Known.SignBit = SignBitAllOne; return; } @@ -4488,7 +4295,6 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, computeKnownFPClass(II->getArgOperand(2), DemandedElts, InterestedClasses, KnownAddend, Depth + 1, Q); - // TODO: Known sign bit with no nans if (KnownAddend.cannotBeOrderedLessThanZero()) Known.knownNot(fcNegative); break; @@ -4522,7 +4328,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))) { Known.knownNot(fcNegZero); if (KnownSrc.isKnownNeverNaN()) - Known.SignBit = false; + Known.signBitMustBeZero(); } break; @@ -4592,7 +4398,6 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, // subtargets on AMDGPU the min/max instructions would not flush the // output and return the original value. // - // TODO: This could be refined based on the sign if ((Known.KnownFPClasses & fcZero) != fcNone && !Known.isKnownNeverSubnormal()) { const Function *Parent = II->getFunction(); @@ -4605,6 +4410,26 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, Known.KnownFPClasses |= fcZero; } + if (Known.isKnownNeverNaN()) { + if (KnownLHS.SignBit && KnownRHS.SignBit && + *KnownLHS.SignBit == *KnownRHS.SignBit) { + if (*KnownLHS.SignBit) + Known.signBitMustBeOne(); + else + Known.signBitMustBeZero(); + } else if ((IID == Intrinsic::maximum || IID == Intrinsic::minimum) || + ((KnownLHS.isKnownNeverNegZero() || + KnownRHS.isKnownNeverPosZero()) && + (KnownLHS.isKnownNeverPosZero() || + KnownRHS.isKnownNeverNegZero()))) { + if ((IID == Intrinsic::maximum || IID == Intrinsic::maxnum) && + (KnownLHS.SignBit == false || KnownRHS.SignBit == false)) + Known.signBitMustBeZero(); + else if ((IID == Intrinsic::minimum || IID == Intrinsic::minnum) && + (KnownLHS.SignBit == true || KnownRHS.SignBit == true)) + Known.signBitMustBeOne(); + } + } break; } case Intrinsic::canonicalize: { @@ -4704,7 +4529,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, KnownSrc, Depth + 1, Q); if (KnownSrc.isKnownNeverNaN()) { Known.knownNot(fcNan); - Known.SignBit = false; + Known.signBitMustBeZero(); } break; @@ -4954,6 +4779,13 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, if (!KnownLHS.isKnownNeverNaN()) break; + if (KnownLHS.SignBit && KnownRHS.SignBit) { + if (*KnownLHS.SignBit == *KnownRHS.SignBit) + Known.signBitMustBeZero(); + else + Known.signBitMustBeOne(); + } + // If 0 * +/-inf produces NaN. if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) { Known.knownNot(fcNan); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 40b48699f7585..0174b5237de83 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2406,20 +2406,20 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } case Intrinsic::copysign: { Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1); - if (SignBitMustBeZero(Sign, DL, &TLI)) { + if (std::optional KnownSignBit = computeKnownFPSignBit( + Sign, getDataLayout(), &TLI, /*Depth=*/0, &AC, II, &DT)) { + if (*KnownSignBit) { + // If we know that the sign argument is negative, reduce to FNABS: + // copysign Mag, -Sign --> fneg (fabs Mag) + Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II); + return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II)); + } + // If we know that the sign argument is positive, reduce to FABS: // copysign Mag, +Sign --> fabs Mag Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II); return replaceInstUsesWith(*II, Fabs); } - // TODO: There should be a ValueTracking sibling like SignBitMustBeOne. - const APFloat *C; - if (match(Sign, m_APFloat(C)) && C->isNegative()) { - // If we know that the sign argument is negative, reduce to FNABS: - // copysign Mag, -Sign --> fneg (fabs Mag) - Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II); - return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II)); - } // Propagate sign argument through nested calls: // copysign Mag, (copysign ?, X) --> copysign Mag, X diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll index 0a18279a57ef1..e6cb00273a273 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll @@ -203,7 +203,8 @@ define void @loop_absmax32(float* nocapture readonly %0, i32 %1, float* nocaptur ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: .LBB16_2: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 -; CHECK-NEXT: vmaxnma.f32 q0, q1 +; CHECK-NEXT: vabs.f32 q1, q1 +; CHECK-NEXT: vmaxnm.f32 q0, q0, q1 ; CHECK-NEXT: le lr, .LBB16_2 ; CHECK-NEXT: .LBB16_3: ; CHECK-NEXT: vldr s4, .LCPI16_0 @@ -253,7 +254,8 @@ define void @loop_absmax32_c(float* nocapture readonly %0, i32 %1, float* nocapt ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: .LBB17_2: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 -; CHECK-NEXT: vmaxnma.f32 q0, q1 +; CHECK-NEXT: vabs.f32 q1, q1 +; CHECK-NEXT: vmaxnm.f32 q0, q1, q0 ; CHECK-NEXT: le lr, .LBB17_2 ; CHECK-NEXT: .LBB17_3: ; CHECK-NEXT: vldr s4, .LCPI17_0 @@ -395,7 +397,8 @@ define void @loop_absmax16(half* nocapture readonly %0, i32 %1, half* nocapture ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: .LBB20_2: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0], #8 -; CHECK-NEXT: vmaxnma.f16 q0, q1 +; CHECK-NEXT: vabs.f16 q1, q1 +; CHECK-NEXT: vmaxnm.f16 q0, q0, q1 ; CHECK-NEXT: le lr, .LBB20_2 ; CHECK-NEXT: .LBB20_3: ; CHECK-NEXT: vldr.16 s4, .LCPI20_0 @@ -445,7 +448,8 @@ define void @loop_absmax16_c(half* nocapture readonly %0, i32 %1, half* nocaptur ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: .LBB21_2: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0], #8 -; CHECK-NEXT: vmaxnma.f16 q0, q1 +; CHECK-NEXT: vabs.f16 q1, q1 +; CHECK-NEXT: vmaxnm.f16 q0, q1, q0 ; CHECK-NEXT: le lr, .LBB21_2 ; CHECK-NEXT: .LBB21_3: ; CHECK-NEXT: vldr.16 s4, .LCPI21_0 diff --git a/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll b/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll index db839e86f2532..af939cf74399a 100644 --- a/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll +++ b/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll @@ -55,9 +55,8 @@ define half @copysign_fabs_y(half %x, half %y) { define half @copysign_fneg_fabs_y(half %x, half %y) { ; CHECK-LABEL: @copysign_fneg_fabs_y( -; CHECK-NEXT: [[FABS_Y:%.*]] = call half @llvm.fabs.f16(half [[Y:%.*]]) -; CHECK-NEXT: [[FNEG_FABS_Y:%.*]] = fneg half [[FABS_Y]] -; CHECK-NEXT: [[COPYSIGN:%.*]] = call half @llvm.copysign.f16(half [[X:%.*]], half [[FNEG_FABS_Y]]) +; CHECK-NEXT: [[TMP1:%.*]] = call half @llvm.fabs.f16(half [[X:%.*]]) +; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg half [[TMP1]] ; CHECK-NEXT: ret half [[COPYSIGN]] ; %fabs.y = call half @llvm.fabs.f16(half %y) diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll index 7ee8a82ab5a2d..e4748a2402923 100644 --- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll +++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll @@ -249,25 +249,21 @@ define float @fabs_sqrt_nsz(float %a) #0 { ret float %fabs } -; The fabs can be eliminated because we're nsz and nnan. define float @fabs_sqrt_nnan_nsz(float %a) #0 { ; CHECK-LABEL: @fabs_sqrt_nnan_nsz( ; CHECK-NEXT: [[SQRT:%.*]] = call nnan nsz float @llvm.experimental.constrained.sqrt.f32(float [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") -; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SQRT]]) #[[ATTR0]] -; CHECK-NEXT: ret float [[FABS]] +; CHECK-NEXT: ret float [[SQRT]] ; %sqrt = call nnan nsz float @llvm.experimental.constrained.sqrt.f32(float %a, metadata !"round.tonearest", metadata !"fpexcept.ignore") %fabs = call float @llvm.fabs.f32(float %sqrt) #0 ret float %fabs } -; The second fabs can be eliminated because the operand to sqrt cannot be -0. define float @fabs_sqrt_nnan_fabs(float %a) #0 { ; CHECK-LABEL: @fabs_sqrt_nnan_fabs( ; CHECK-NEXT: [[B:%.*]] = call float @llvm.fabs.f32(float [[A:%.*]]) #[[ATTR0]] ; CHECK-NEXT: [[SQRT:%.*]] = call nnan float @llvm.experimental.constrained.sqrt.f32(float [[B]], metadata !"round.tonearest", metadata !"fpexcept.ignore") -; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SQRT]]) #[[ATTR0]] -; CHECK-NEXT: ret float [[FABS]] +; CHECK-NEXT: ret float [[SQRT]] ; %b = call float @llvm.fabs.f32(float %a) #0 %sqrt = call nnan float @llvm.experimental.constrained.sqrt.f32(float %b, metadata !"round.tonearest", metadata !"fpexcept.ignore") From 6bb0fd6c8bb720549e43965a7693b88015c9984c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 31 Jan 2024 12:54:38 +0530 Subject: [PATCH 3/4] Use fcNegative --- llvm/include/llvm/Analysis/ValueTracking.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 90075cb88c3a0..4080f783b8bd0 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -333,7 +333,7 @@ struct KnownFPClass { void knownNot(FPClassTest RuleOut) { KnownFPClasses = KnownFPClasses & ~RuleOut; if (isKnownNever(fcNan) && !SignBit) { - if (isKnownNever(OrderedLessThanZeroMask | fcNegZero)) + if (isKnownNever(fcNegative)) SignBit = false; else if (isKnownNever(OrderedGreaterThanZeroMask | fcPosZero)) SignBit = true; From b747f0b10a17c34def4942fe89fe60bd9c951b1e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 31 Jan 2024 12:54:48 +0530 Subject: [PATCH 4/4] Use fcPositive --- llvm/include/llvm/Analysis/ValueTracking.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 4080f783b8bd0..2ebc17e3636a9 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -335,7 +335,7 @@ struct KnownFPClass { if (isKnownNever(fcNan) && !SignBit) { if (isKnownNever(fcNegative)) SignBit = false; - else if (isKnownNever(OrderedGreaterThanZeroMask | fcPosZero)) + else if (isKnownNever(fcPositive)) SignBit = true; } }