@@ -11136,8 +11136,9 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
11136
11136
if (!VL)
11137
11137
VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11138
11138
11139
- SDValue Index;
11140
- if (!IsUnmasked && IsExpandingLoad) {
11139
+ SDValue Result;
11140
+ if (!IsUnmasked && IsExpandingLoad &&
11141
+ Subtarget.hasOptimizedIndexedLoadStore()) {
11141
11142
MVT IndexVT = ContainerVT;
11142
11143
if (ContainerVT.isFloatingPoint())
11143
11144
IndexVT = IndexVT.changeVectorElementTypeToInteger();
@@ -11147,47 +11148,98 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
11147
11148
IndexVT = IndexVT.changeVectorElementType(XLenVT);
11148
11149
11149
11150
// If index vector is an i8 vector and the element count exceeds 256, we
11150
- // should change the element type of index vector to i16 to avoid overflow.
11151
+ // should change the element type of index vector to i16 to avoid
11152
+ // overflow.
11151
11153
if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
11152
11154
// FIXME: We need to do vector splitting manually for LMUL=8 cases.
11153
11155
if (getLMUL(IndexVT) == RISCVII::LMUL_8)
11154
11156
return SDValue();
11155
11157
IndexVT = IndexVT.changeVectorElementType(MVT::i16);
11156
11158
}
11157
11159
11158
- Index =
11160
+ SDValue Index =
11159
11161
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11160
11162
DAG.getTargetConstant(Intrinsic::riscv_viota, DL, XLenVT),
11161
11163
DAG.getUNDEF(IndexVT), Mask, VL);
11162
11164
if (uint64_t EltSize = ContainerVT.getScalarSizeInBits(); EltSize > 8)
11163
11165
Index = DAG.getNode(RISCVISD::SHL_VL, DL, IndexVT, Index,
11164
11166
DAG.getConstant(Log2_64(EltSize / 8), DL, IndexVT),
11165
11167
DAG.getUNDEF(IndexVT), Mask, VL);
11166
- }
11167
-
11168
- unsigned IntID = IsUnmasked ? Intrinsic::riscv_vle
11169
- : IsExpandingLoad ? Intrinsic::riscv_vluxei_mask
11170
- : Intrinsic::riscv_vle_mask;
11171
- SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11172
- if (IsUnmasked)
11173
- Ops.push_back(DAG.getUNDEF(ContainerVT));
11174
- else
11168
+ unsigned IntID = Intrinsic::riscv_vluxei_mask;
11169
+ SmallVector<SDValue, 8> Ops{Chain,
11170
+ DAG.getTargetConstant(IntID, DL, XLenVT)};
11175
11171
Ops.push_back(PassThru);
11176
- Ops.push_back(BasePtr);
11177
- if (!IsUnmasked) {
11178
- if (IsExpandingLoad)
11179
- Ops.push_back(Index);
11172
+ Ops.push_back(BasePtr);
11173
+ Ops.push_back(Index);
11180
11174
Ops.push_back(Mask);
11181
- }
11182
- Ops.push_back(VL);
11183
- if (!IsUnmasked)
11175
+ Ops.push_back(VL);
11184
11176
Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11185
11177
11186
- SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11178
+ SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11187
11179
11188
- SDValue Result =
11189
- DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11190
- Chain = Result.getValue(1);
11180
+ Result = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
11181
+ MemVT, MMO);
11182
+ Chain = Result.getValue(1);
11183
+ } else {
11184
+ SDValue ExpandingVL;
11185
+ if (!IsUnmasked && IsExpandingLoad &&
11186
+ !Subtarget.hasOptimizedIndexedLoadStore()) {
11187
+ ExpandingVL = VL;
11188
+ VL = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11189
+ getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG),
11190
+ VL);
11191
+ }
11192
+
11193
+ unsigned IntID = IsUnmasked || (IsExpandingLoad &&
11194
+ !Subtarget.hasOptimizedIndexedLoadStore())
11195
+ ? Intrinsic::riscv_vle
11196
+ : Intrinsic::riscv_vle_mask;
11197
+ SmallVector<SDValue, 8> Ops{Chain,
11198
+ DAG.getTargetConstant(IntID, DL, XLenVT)};
11199
+ if (IntID == Intrinsic::riscv_vle)
11200
+ Ops.push_back(DAG.getUNDEF(ContainerVT));
11201
+ else
11202
+ Ops.push_back(PassThru);
11203
+ Ops.push_back(BasePtr);
11204
+ if (IntID == Intrinsic::riscv_vle_mask)
11205
+ Ops.push_back(Mask);
11206
+ Ops.push_back(VL);
11207
+ if (IntID == Intrinsic::riscv_vle_mask)
11208
+ Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11209
+
11210
+ SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11211
+
11212
+ Result = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
11213
+ MemVT, MMO);
11214
+ Chain = Result.getValue(1);
11215
+ if (ExpandingVL) {
11216
+ MVT IndexVT = ContainerVT;
11217
+ if (ContainerVT.isFloatingPoint())
11218
+ IndexVT = ContainerVT.changeVectorElementTypeToInteger();
11219
+
11220
+ MVT IndexEltVT = IndexVT.getVectorElementType();
11221
+ bool UseVRGATHEREI16 = false;
11222
+ // If index vector is an i8 vector and the element count exceeds 256, we
11223
+ // should change the element type of index vector to i16 to avoid
11224
+ // overflow.
11225
+ if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
11226
+ // FIXME: We need to do vector splitting manually for LMUL=8 cases.
11227
+ if (getLMUL(IndexVT) == RISCVII::LMUL_8)
11228
+ return SDValue();
11229
+ IndexVT = IndexVT.changeVectorElementType(MVT::i16);
11230
+ UseVRGATHEREI16 = true;
11231
+ }
11232
+
11233
+ SDValue Iota =
11234
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11235
+ DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
11236
+ DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
11237
+ Result = DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
11238
+ : RISCVISD::VRGATHER_VV_VL,
11239
+ DL, ContainerVT, Result, Iota, PassThru, Mask,
11240
+ ExpandingVL);
11241
+ }
11242
+ }
11191
11243
11192
11244
if (VT.isFixedLengthVector())
11193
11245
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
0 commit comments