@@ -2247,6 +2247,36 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
2247
2247
}
2248
2248
}
2249
2249
2250
+ // / Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be
2251
+ // / converted to a narrower recipe. \p V is used by a wide recipe \p WideMember
2252
+ // / that feeds a store interleave group at index \p Idx, \p WideMember0 is the
2253
+ // / recipe feeding the same interleave group at index 0. A VPWidenLoadRecipe can
2254
+ // / be narrowed to an index-independent load if it feeds all wide ops at all
2255
+ // / indices (checked by via the operands of the wide recipe at lane0, \p
2256
+ // / WideMember0). A VPInterleaveRecipe can be narrowed to a wide load, if \p V
2257
+ // / is defined at \p Idx of a load interleave group.
2258
+ static bool canNarrowLoad (VPWidenRecipe *WideMember0, VPWidenRecipe *WideMember,
2259
+ VPValue *V, unsigned Idx) {
2260
+ auto *DefR = V->getDefiningRecipe ();
2261
+ if (!DefR)
2262
+ return false ;
2263
+ if (auto *W = dyn_cast<VPWidenLoadRecipe>(DefR))
2264
+ return !W->getMask () &&
2265
+ all_of (zip (WideMember0->operands (), WideMember->operands ()),
2266
+ [V](const auto P) {
2267
+ // V must be as at the same places in both WideMember0 and
2268
+ // WideMember.
2269
+ const auto &[WideMember0Op, WideMemberOp] = P;
2270
+ return (WideMember0Op == V) == (WideMemberOp == V);
2271
+ });
2272
+
2273
+ if (auto *IR = dyn_cast<VPInterleaveRecipe>(DefR))
2274
+ return IR->getInterleaveGroup ()->getFactor () ==
2275
+ IR->getInterleaveGroup ()->getNumMembers () &&
2276
+ IR->getVPValue (Idx) == V;
2277
+ return false ;
2278
+ }
2279
+
2250
2280
// / Returns true if \p IR is a full interleave group with factor and number of
2251
2281
// / members both equal to \p VF. The interleave group must also access the full
2252
2282
// / vector width \p VectorRegWidth.
@@ -2284,7 +2314,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
2284
2314
unsigned VectorRegWidth) {
2285
2315
using namespace llvm ::VPlanPatternMatch;
2286
2316
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion ();
2287
- if (VF.isScalable () || !VectorLoop)
2317
+ if (VF.isScalable () || !VectorLoop || Plan. getUF () != 1 )
2288
2318
return ;
2289
2319
2290
2320
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV ();
@@ -2309,6 +2339,8 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
2309
2339
if (R.mayWriteToMemory () && !InterleaveR)
2310
2340
return ;
2311
2341
2342
+ // All other ops are allowed, but we reject uses that cannot be converted
2343
+ // when checking all allowed consumers (store interleave groups) below.
2312
2344
if (!InterleaveR)
2313
2345
continue ;
2314
2346
@@ -2323,7 +2355,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
2323
2355
2324
2356
// For now, we only support full interleave groups storing load interleave
2325
2357
// groups.
2326
- if (! all_of (enumerate(InterleaveR->getStoredValues ()), [](auto Op) {
2358
+ if (all_of (enumerate(InterleaveR->getStoredValues ()), [](auto Op) {
2327
2359
VPRecipeBase *DefR = Op.value ()->getDefiningRecipe ();
2328
2360
if (!DefR)
2329
2361
return false ;
@@ -2333,31 +2365,67 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
2333
2365
IR->getInterleaveGroup ()->getNumMembers () &&
2334
2366
IR->getVPValue (Op.index ()) == Op.value ();
2335
2367
})) {
2368
+ StoreGroups.push_back (InterleaveR);
2369
+ continue ;
2370
+ }
2371
+
2372
+ // Check if all values feeding InterleaveR are matching wide recipes, which
2373
+ // operands that can be narrowed.
2374
+ auto *WideMember0 = dyn_cast_or_null<VPWidenRecipe>(
2375
+ InterleaveR->getStoredValues ()[0 ]->getDefiningRecipe ());
2376
+ if (!WideMember0)
2336
2377
return ;
2378
+ for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues ())) {
2379
+ auto *R = dyn_cast<VPWidenRecipe>(V->getDefiningRecipe ());
2380
+ if (!R || R->getOpcode () != WideMember0->getOpcode () ||
2381
+ R->getNumOperands () > 2 )
2382
+ return ;
2383
+ if (any_of (R->operands (), [WideMember0, Idx = I, R](VPValue *V) {
2384
+ return !canNarrowLoad (WideMember0, R, V, Idx);
2385
+ }))
2386
+ return ;
2337
2387
}
2338
2388
StoreGroups.push_back (InterleaveR);
2339
2389
}
2340
2390
2341
2391
if (StoreGroups.empty ())
2342
2392
return ;
2343
2393
2344
- // Convert InterleaveGroup R to a single VPWidenLoadRecipe.
2394
+ // Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
2345
2395
auto NarrowOp = [](VPRecipeBase *R) -> VPValue * {
2346
- auto *LoadGroup = cast<VPInterleaveRecipe>(R);
2347
- // Narrow interleave group to wide load, as transformed VPlan will only
2396
+ if (auto *LoadGroup = dyn_cast<VPInterleaveRecipe>(R)) {
2397
+ // Narrow interleave group to wide load, as transformed VPlan will only
2398
+ // process one original iteration.
2399
+ auto *L = new VPWidenLoadRecipe (
2400
+ *cast<LoadInst>(LoadGroup->getInterleaveGroup ()->getInsertPos ()),
2401
+ LoadGroup->getAddr (), LoadGroup->getMask (), /* Consecutive=*/ true ,
2402
+ /* Reverse=*/ false , LoadGroup->getDebugLoc ());
2403
+ L->insertBefore (LoadGroup);
2404
+ return L;
2405
+ }
2406
+
2407
+ auto *WideLoad = cast<VPWidenLoadRecipe>(R);
2408
+
2409
+ // Narrow wide load to uniform scalar load, as transformed VPlan will only
2348
2410
// process one original iteration.
2349
- auto *L = new VPWidenLoadRecipe (
2350
- *cast<LoadInst>(LoadGroup->getInterleaveGroup ()->getInsertPos ()),
2351
- LoadGroup->getAddr (), LoadGroup->getMask (), /* Consecutive=*/ true ,
2352
- /* Reverse=*/ false , LoadGroup->getDebugLoc ());
2353
- L->insertBefore (LoadGroup);
2354
- return L;
2411
+ auto *N = new VPReplicateRecipe (&WideLoad->getIngredient (),
2412
+ WideLoad->operands (), /* IsUniform*/ true );
2413
+ N->insertBefore (WideLoad);
2414
+ return N;
2355
2415
};
2356
2416
2357
2417
// Narrow operation tree rooted at store groups.
2358
2418
for (auto *StoreGroup : StoreGroups) {
2359
- VPValue *Res =
2360
- NarrowOp (StoreGroup->getStoredValues ()[0 ]->getDefiningRecipe ());
2419
+ VPValue *Res = nullptr ;
2420
+ if (auto *WideMember0 = dyn_cast<VPWidenRecipe>(
2421
+ StoreGroup->getStoredValues ()[0 ]->getDefiningRecipe ())) {
2422
+ for (unsigned Idx = 0 , E = WideMember0->getNumOperands (); Idx != E; ++Idx)
2423
+ WideMember0->setOperand (
2424
+ Idx, NarrowOp (WideMember0->getOperand (Idx)->getDefiningRecipe ()));
2425
+ Res = WideMember0;
2426
+ } else {
2427
+ Res = NarrowOp (StoreGroup->getStoredValues ()[0 ]->getDefiningRecipe ());
2428
+ }
2361
2429
2362
2430
auto *S = new VPWidenStoreRecipe (
2363
2431
*cast<StoreInst>(StoreGroup->getInterleaveGroup ()->getInsertPos ()),
0 commit comments