@@ -3777,3 +3777,201 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3777
3777
return lowerVAARG (Op, DAG);
3778
3778
}
3779
3779
}
3780
+
3781
+ static bool isPackableElemVT (EVT VT) {
3782
+ if (VT.isVector ())
3783
+ return false ;
3784
+ return VT.getScalarSizeInBits () <= 32 ;
3785
+ }
3786
+
3787
+ static bool isVectorRegisterVT (EVT VT) {
3788
+ if (!VT.isVector () || VT.isScalableVector ())
3789
+ return false ;
3790
+ unsigned NumElems = VT.getVectorNumElements ();
3791
+ EVT ElemVT = VT.getVectorElementType ();
3792
+
3793
+ // Not a legal element count.
3794
+ if ((NumElems != 256 ) && (NumElems != 512 ))
3795
+ return false ;
3796
+
3797
+ // Legal as both regular and packed vectors.
3798
+ if (ElemVT == MVT::i1 || ElemVT == MVT::i32 || ElemVT == MVT::f32)
3799
+ return true ;
3800
+
3801
+ // Only legal in regular mode.
3802
+ return NumElems == 256 ;
3803
+ }
3804
+
3805
+ static TargetLoweringBase::LegalizeKind
3806
+ getPromoteElementConversion (LLVMContext &Context, EVT ElemVT,
3807
+ unsigned NumElems) {
3808
+ using LegalizeKind = TargetLoweringBase::LegalizeKind;
3809
+ using LegalizeTypeAction = TargetLoweringBase::LegalizeTypeAction;
3810
+
3811
+ LegalizeTypeAction LTA;
3812
+ MVT PromotedElemVT;
3813
+ if (ElemVT.isFloatingPoint ()) {
3814
+ PromotedElemVT = MVT::f32;
3815
+ LTA = LegalizeTypeAction::TypePromoteFloat;
3816
+ } else {
3817
+ assert (ElemVT.isInteger ());
3818
+ PromotedElemVT = MVT::i32;
3819
+ LTA = LegalizeTypeAction::TypePromoteInteger;
3820
+ }
3821
+ return LegalizeKind (LTA, EVT::getVectorVT (Context, PromotedElemVT, NumElems));
3822
+ }
3823
+
3824
+ static TargetLoweringBase::LegalizeKind
3825
+ getWidenVectorConversion (LLVMContext &Context, EVT ElemVT,
3826
+ unsigned LegalNumElems) {
3827
+ using LegalizeKind = TargetLoweringBase::LegalizeKind;
3828
+ using LegalizeTypeAction = TargetLoweringBase::LegalizeTypeAction;
3829
+
3830
+ return LegalizeKind (LegalizeTypeAction::TypeWidenVector,
3831
+ EVT::getVectorVT (Context, ElemVT, LegalNumElems));
3832
+ }
3833
+
3834
+ static TargetLoweringBase::LegalizeKind
3835
+ getSplitVectorConversion (LLVMContext &Context, EVT ElemVT, unsigned NumElems) {
3836
+ using LegalizeKind = TargetLoweringBase::LegalizeKind;
3837
+ using LegalizeTypeAction = TargetLoweringBase::LegalizeTypeAction;
3838
+
3839
+ return LegalizeKind (LegalizeTypeAction::TypeSplitVector,
3840
+ EVT::getVectorVT (Context, ElemVT, (NumElems + 1 ) / 2 ));
3841
+ }
3842
+
3843
+ Optional<TargetLoweringBase::LegalizeKind>
3844
+ VETargetLowering::getCustomTypeConversion (LLVMContext &Context, EVT VT) const {
3845
+ // Do not interfere with SPU legalization.
3846
+ if (!VT.isVector () || !Subtarget->enableVPU () ||
3847
+ VT.getVectorNumElements () == 1 )
3848
+ return None;
3849
+
3850
+ EVT ElemVT = VT.getVectorElementType ();
3851
+ unsigned NumElems = VT.getVectorNumElements ();
3852
+ auto ElemBits = ElemVT.getScalarSizeInBits ();
3853
+
3854
+ // Only use packed mode when surpassing the regular (256 elements) vector
3855
+ // size.
3856
+ const bool RequiresPackedRegister =
3857
+ isOverPackedType (VT) || (isPackableElemVT (ElemVT) && NumElems > 256 );
3858
+
3859
+ // Already a legal type.
3860
+ if (isVectorRegisterVT (VT) &&
3861
+ (!RequiresPackedRegister || Subtarget->hasPackedMode ()))
3862
+ return None;
3863
+
3864
+ // Promote small elements to i/f32.
3865
+ if (1 < ElemBits && ElemBits < 32 )
3866
+ return getPromoteElementConversion (Context, ElemVT, NumElems);
3867
+
3868
+ // Excessive element size.
3869
+ if (ElemBits > 64 )
3870
+ return None; // Defer to builtin expansion for oversized vectors.
3871
+
3872
+ // Only use packed mode when surpassing the regular (256 elements) vector
3873
+ // size.
3874
+ const bool UsePackedRegister =
3875
+ Subtarget->hasPackedMode () && RequiresPackedRegister;
3876
+
3877
+ // Widen to register width.
3878
+ const unsigned RegisterNumElems = UsePackedRegister ? 512 : 256 ;
3879
+ if (NumElems < RegisterNumElems)
3880
+ return getWidenVectorConversion (Context, ElemVT, RegisterNumElems);
3881
+
3882
+ // Split to register width.
3883
+ // TODO: Teach isel to split non-power-of-two vectors.
3884
+ if (NumElems > RegisterNumElems && (NumElems % 2 == 0 ))
3885
+ return getSplitVectorConversion (Context, ElemVT, NumElems);
3886
+
3887
+ // Type is either legal or not custom converted.
3888
+ return None;
3889
+ }
3890
+
3891
+ Optional<VETargetLowering::RegisterCountPair>
3892
+ VETargetLowering::getRegistersForCallingConv (LLVMContext &Context,
3893
+ CallingConv::ID CC, EVT VT) const {
3894
+ using RegisterCount = VETargetLowering::RegisterCountPair;
3895
+ if (CC != CallingConv::Fast)
3896
+ return None;
3897
+ if (!VT.isVector () || VT.isScalableVector ())
3898
+ return None;
3899
+
3900
+ MVT RegisterVT;
3901
+ EVT IntermediateVT;
3902
+ unsigned NumIntermediates;
3903
+ unsigned NumRegs = getVectorTypeBreakdownForCallingConv (
3904
+ Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
3905
+ return RegisterCount{RegisterVT, NumRegs};
3906
+ }
3907
+
3908
+ unsigned VETargetLowering::getVectorTypeBreakdownForCallingConv (
3909
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
3910
+ unsigned &NumIntermediates, MVT &RegisterVT) const {
3911
+ auto DefaultImpl = [&]() {
3912
+ return TargetLoweringBase::getVectorTypeBreakdownForCallingConv (
3913
+ Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
3914
+ };
3915
+
3916
+ auto ElemVT = VT.getVectorElementType ();
3917
+ unsigned NumElems = VT.isScalableVector () ? 0 : VT.getVectorNumElements ();
3918
+ const bool RequiresPackedRegister =
3919
+ !VT.isScalableVector () &&
3920
+ (isOverPackedType (VT) || (isPackableElemVT (ElemVT) && NumElems > 256 ));
3921
+
3922
+ if (CC != CallingConv::Fast || VT.isScalableVector () ||
3923
+ (isVectorRegisterVT (VT) &&
3924
+ !(Subtarget->hasPackedMode () && RequiresPackedRegister)))
3925
+ return DefaultImpl ();
3926
+
3927
+ // fastcc - map everything to vregs.
3928
+ auto LK = getCustomTypeConversion (Context, VT);
3929
+ // Non-custom converted type - back to builtin logic.
3930
+ if (!LK.hasValue ())
3931
+ return DefaultImpl ();
3932
+
3933
+ // Compute the fixed point of the custom type conversion rules.
3934
+ // We want to have the same vector layout inside functions as well as across
3935
+ // function boundaries.
3936
+
3937
+ // IntermediateVT : used to copy the parts.
3938
+ IntermediateVT = VT;
3939
+ NumIntermediates = 1 ;
3940
+
3941
+ EVT NextVT;
3942
+ do {
3943
+ NextVT = LK->second ;
3944
+ auto LTA = LK->first ;
3945
+
3946
+ switch (LTA) {
3947
+ default :
3948
+ return DefaultImpl ();
3949
+
3950
+ case LegalizeTypeAction::TypePromoteFloat:
3951
+ case LegalizeTypeAction::TypePromoteInteger:
3952
+ // Promote elements across call boundaries.
3953
+ IntermediateVT = NextVT;
3954
+ break ;
3955
+
3956
+ case LegalizeTypeAction::TypeWidenVector:
3957
+ // Retain all information about the original vector length.
3958
+ // That is, keep the IntermediateVT at the original vector length if
3959
+ // possible
3960
+ break ;
3961
+
3962
+ case LegalizeTypeAction::TypeSplitVector:
3963
+ // The last split results in the intermediate VT used for copying vectors
3964
+ // at calls.
3965
+ IntermediateVT = NextVT;
3966
+ NumIntermediates *= 2 ;
3967
+ break ;
3968
+ }
3969
+
3970
+ LK = getCustomTypeConversion (Context, NextVT);
3971
+ } while (LK.hasValue ());
3972
+
3973
+ RegisterVT = NextVT.getSimpleVT ();
3974
+
3975
+ // Must converge in a valid RegisterVT.
3976
+ return NumIntermediates;
3977
+ }
0 commit comments