13
13
14
14
#include " InstCombineInternal.h"
15
15
#include " llvm/ADT/APInt.h"
16
+ #include " llvm/ADT/SmallPtrSet.h"
16
17
#include " llvm/ADT/SmallVector.h"
17
18
#include " llvm/Analysis/InstructionSimplify.h"
18
19
#include " llvm/Analysis/ValueTracking.h"
@@ -666,6 +667,90 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
666
667
return nullptr ;
667
668
}
668
669
670
+ // Check legality for transforming
671
+ // x = 1.0/sqrt(a)
672
+ // r1 = x * x;
673
+ // r2 = a/sqrt(a);
674
+ //
675
+ // TO
676
+ //
677
+ // r1 = 1/a
678
+ // r2 = sqrt(a)
679
+ // x = r1 * r2
680
+ // This transform works only when 'a' is known positive.
681
+ static bool isFSqrtDivToFMulLegal (Instruction *X,
682
+ SmallPtrSetImpl<Instruction *> &R1,
683
+ SmallPtrSetImpl<Instruction *> &R2) {
684
+ BasicBlock *BBx = X->getParent ();
685
+ BasicBlock *BBr1 = (*R1.begin ())->getParent ();
686
+ BasicBlock *BBr2 = (*R2.begin ())->getParent ();
687
+
688
+ CallInst *FSqrt = cast<CallInst>(X->getOperand (1 ));
689
+ if (!FSqrt->hasAllowReassoc () || !FSqrt->hasNoNaNs () ||
690
+ !FSqrt->hasNoSignedZeros () || !FSqrt->hasNoInfs ())
691
+ return false ;
692
+
693
+ // We change x = 1/sqrt(a) to x = sqrt(a) * 1/a . This change isn't allowed
694
+ // by recip fp as it is strictly meant to transform ops of type a/b to
695
+ // a * 1/b. So, this can be considered as algebraic rewrite and reassoc flag
696
+ // has been used(rather abused)in the past for algebraic rewrites.
697
+ if (!X->hasAllowReassoc () || !X->hasAllowReciprocal () || !X->hasNoInfs ())
698
+ return false ;
699
+
700
+ // Check the constraints on X, R1 and R2 combined.
701
+ // fdiv instruction and one of the multiplications must reside in the same
702
+ // block. If not, the optimized code may execute more ops than before and
703
+ // this may hamper the performance.
704
+ if (BBx != BBr1 && BBx != BBr2)
705
+ return false ;
706
+
707
+ // Check the constraints on instructions in R1.
708
+ if (any_of (R1, [BBr1](Instruction *I) {
709
+ // When you have multiple instructions residing in R1 and R2
710
+ // respectively, it's difficult to generate combinations of (R1,R2) and
711
+ // then check if we have the required pattern. So, for now, just be
712
+ // conservative.
713
+ return (I->getParent () != BBr1 || !I->hasAllowReassoc ());
714
+ }))
715
+ return false ;
716
+
717
+ // Check the constraints on instructions in R2.
718
+ return all_of (R2, [BBr2](Instruction *I) {
719
+ // When you have multiple instructions residing in R1 and R2
720
+ // respectively, it's difficult to generate combination of (R1,R2) and
721
+ // then check if we have the required pattern. So, for now, just be
722
+ // conservative.
723
+ return (I->getParent () == BBr2 && I->hasAllowReassoc ());
724
+ });
725
+ }
726
+
727
+ // If we have the following pattern,
728
+ // X = 1.0/sqrt(a)
729
+ // R1 = X * X
730
+ // R2 = a/sqrt(a)
731
+ // then this method collects all the instructions that match R1 and R2.
732
+ static bool getFSqrtDivOptPattern (Instruction *Div,
733
+ SmallPtrSetImpl<Instruction *> &R1,
734
+ SmallPtrSetImpl<Instruction *> &R2) {
735
+ Value *A;
736
+ if (match (Div, m_FDiv (m_FPOne (), m_Sqrt (m_Value (A)))) ||
737
+ match (Div, m_FDiv (m_SpecificFP (-1.0 ), m_Sqrt (m_Value (A))))) {
738
+ for (User *U : Div->users ()) {
739
+ Instruction *I = cast<Instruction>(U);
740
+ if (match (I, m_FMul (m_Specific (Div), m_Specific (Div))))
741
+ R1.insert (I);
742
+ }
743
+
744
+ CallInst *CI = cast<CallInst>(Div->getOperand (1 ));
745
+ for (User *U : CI->users ()) {
746
+ Instruction *I = cast<Instruction>(U);
747
+ if (match (I, m_FDiv (m_Specific (A), m_Sqrt (m_Specific (A)))))
748
+ R2.insert (I);
749
+ }
750
+ }
751
+ return !R1.empty () && !R2.empty ();
752
+ }
753
+
669
754
Instruction *InstCombinerImpl::foldFMulReassoc (BinaryOperator &I) {
670
755
Value *Op0 = I.getOperand (0 );
671
756
Value *Op1 = I.getOperand (1 );
@@ -1864,6 +1949,68 @@ static Instruction *foldFDivSqrtDivisor(BinaryOperator &I,
1864
1949
return BinaryOperator::CreateFMulFMF (Op0, NewSqrt, &I);
1865
1950
}
1866
1951
1952
+ // Change
1953
+ // X = 1/sqrt(a)
1954
+ // R1 = X * X
1955
+ // R2 = a * X
1956
+ //
1957
+ // TO
1958
+ //
1959
+ // FDiv = 1/a
1960
+ // FSqrt = sqrt(a)
1961
+ // FMul = FDiv * FSqrt
1962
+ // Replace Uses Of R1 With FDiv
1963
+ // Replace Uses Of R2 With FSqrt
1964
+ // Replace Uses Of X With FMul
1965
+ static Instruction *
1966
+ convertFSqrtDivIntoFMul (CallInst *CI, Instruction *X,
1967
+ const SmallPtrSetImpl<Instruction *> &R1,
1968
+ const SmallPtrSetImpl<Instruction *> &R2,
1969
+ InstCombiner::BuilderTy &B, InstCombinerImpl *IC) {
1970
+
1971
+ B.SetInsertPoint (X);
1972
+
1973
+ // Every instance of R1 may have different fpmath metadata and fpmath flags.
1974
+ // We try to preserve them by having separate fdiv instruction per R1
1975
+ // instance.
1976
+ Value *SqrtOp = CI->getArgOperand (0 );
1977
+ Instruction *FDiv;
1978
+
1979
+ for (Instruction *I : R1) {
1980
+ FDiv = cast<Instruction>(
1981
+ B.CreateFDiv (ConstantFP::get (X->getType (), 1.0 ), SqrtOp));
1982
+ FDiv->copyMetadata (*I);
1983
+ FDiv->copyFastMathFlags (I);
1984
+ IC->replaceInstUsesWith (*I, FDiv);
1985
+ IC->eraseInstFromFunction (*I);
1986
+ }
1987
+
1988
+ // Although, by value, FSqrt = CI , every instance of R2 may have different
1989
+ // fpmath metadata and fpmath flags. We try to preserve them by cloning the
1990
+ // call instruction per R2 instance.
1991
+ CallInst *FSqrt;
1992
+ for (Instruction *I : R2) {
1993
+ FSqrt = cast<CallInst>(CI->clone ());
1994
+ FSqrt->insertBefore (CI);
1995
+ FSqrt->copyFastMathFlags (I);
1996
+ FSqrt->copyMetadata (*I);
1997
+ IC->replaceInstUsesWith (*I, FSqrt);
1998
+ IC->eraseInstFromFunction (*I);
1999
+ }
2000
+
2001
+ Instruction *FMul;
2002
+ // If X = -1/sqrt(a) initially,then FMul = -(FDiv * FSqrt)
2003
+ if (match (X, m_FDiv (m_SpecificFP (-1.0 ), m_Specific (CI)))) {
2004
+ Value *Mul = B.CreateFMul (FDiv, FSqrt);
2005
+ FMul = cast<Instruction>(B.CreateFNegFMF (Mul, X));
2006
+ } else
2007
+ FMul = cast<Instruction>(B.CreateFMulFMF (FDiv, FSqrt, X));
2008
+ FMul->copyMetadata (*X);
2009
+
2010
+ IC->replaceInstUsesWith (*X, FMul);
2011
+ return IC->eraseInstFromFunction (*X);
2012
+ }
2013
+
1867
2014
Instruction *InstCombinerImpl::visitFDiv (BinaryOperator &I) {
1868
2015
Module *M = I.getModule ();
1869
2016
@@ -1888,6 +2035,24 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
1888
2035
return R;
1889
2036
1890
2037
Value *Op0 = I.getOperand (0 ), *Op1 = I.getOperand (1 );
2038
+
2039
+ // Convert
2040
+ // x = 1.0/sqrt(a)
2041
+ // r1 = x * x;
2042
+ // r2 = a/sqrt(a);
2043
+ //
2044
+ // TO
2045
+ //
2046
+ // r1 = 1/a
2047
+ // r2 = sqrt(a)
2048
+ // x = r1 * r2
2049
+ SmallPtrSet<Instruction *, 2 > R1, R2;
2050
+ if (getFSqrtDivOptPattern (&I, R1, R2) && isFSqrtDivToFMulLegal (&I, R1, R2)) {
2051
+ CallInst *CI = cast<CallInst>(I.getOperand (1 ));
2052
+ if (Instruction *D = convertFSqrtDivIntoFMul (CI, &I, R1, R2, Builder, this ))
2053
+ return D;
2054
+ }
2055
+
1891
2056
if (isa<Constant>(Op0))
1892
2057
if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
1893
2058
if (Instruction *R = FoldOpIntoSelect (I, SI))
0 commit comments