Skip to content

[SLP][NFC]Extract a check for strided loads into separate function, NFC #134876

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

alexey-bataev
Copy link
Member

No description provided.

Created using spr 1.3.5
@llvmbot
Copy link
Member

llvmbot commented Apr 8, 2025

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: Alexey Bataev (alexey-bataev)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/134876.diff

1 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+69-46)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0e6f7e8435e3a..06c1840529eb0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5597,6 +5597,71 @@ static bool isMaskedLoadCompress(
   return TotalVecCost < GatherCost;
 }
 
+/// Checks if strided loads can be generated out of \p VL loads with pointers \p
+/// PointerOps:
+/// 1. Target with strided load support is detected.
+/// 2. The number of loads is greater than MinProfitableStridedLoads, or the
+/// potential stride <= MaxProfitableLoadStride and the potential stride is
+/// power-of-2 (to avoid perf regressions for the very small number of loads)
+/// and max distance > number of loads, or potential stride is -1.
+/// 3. The loads are ordered, or number of unordered loads <=
+/// MaxProfitableUnorderedLoads, or loads are in reversed order. (this check is
+/// to avoid extra costs for very expensive shuffles).
+/// 4. Any pointer operand is an instruction with the users outside of the
+/// current graph (for masked gathers extra extractelement instructions
+/// might be required).
+static bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
+                          ArrayRef<unsigned> Order,
+                          const TargetTransformInfo &TTI, const DataLayout &DL,
+                          ScalarEvolution &SE,
+                          const bool IsAnyPointerUsedOutGraph, const int Diff) {
+  const unsigned Sz = VL.size();
+  const unsigned AbsoluteDiff = std::abs(Diff);
+  Type *ScalarTy = VL.front()->getType();
+  auto *VecTy = getWidenedType(ScalarTy, Sz);
+  if (IsAnyPointerUsedOutGraph ||
+      (AbsoluteDiff > Sz &&
+       (Sz > MinProfitableStridedLoads ||
+        (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
+         AbsoluteDiff % Sz == 0 && has_single_bit(AbsoluteDiff / Sz)))) ||
+      Diff == -(static_cast<int>(Sz) - 1)) {
+    int Stride = Diff / static_cast<int>(Sz - 1);
+    if (Diff != Stride * static_cast<int>(Sz - 1))
+      return false;
+    Align Alignment =
+        cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
+            ->getAlign();
+    if (!TTI.isLegalStridedLoadStore(VecTy, Alignment))
+      return false;
+    Value *Ptr0;
+    Value *PtrN;
+    if (Order.empty()) {
+      Ptr0 = PointerOps.front();
+      PtrN = PointerOps.back();
+    } else {
+      Ptr0 = PointerOps[Order.front()];
+      PtrN = PointerOps[Order.back()];
+    }
+    // Iterate through all pointers and check if all distances are
+    // unique multiple of Dist.
+    SmallSet<int, 4> Dists;
+    for (Value *Ptr : PointerOps) {
+      int Dist = 0;
+      if (Ptr == PtrN)
+        Dist = Diff;
+      else if (Ptr != Ptr0)
+        Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE);
+      // If the strides are not the same or repeated, we can't
+      // vectorize.
+      if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
+        break;
+    }
+    if (Dists.size() == Sz)
+      return true;
+  }
+  return false;
+}
+
 BoUpSLP::LoadsState
 BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
                            SmallVectorImpl<unsigned> &Order,
@@ -5670,59 +5735,17 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
       return LoadsState::Vectorize;
     // Simple check if not a strided access - clear order.
     bool IsPossibleStrided = *Diff % (Sz - 1) == 0;
-    // Try to generate strided load node if:
-    // 1. Target with strided load support is detected.
-    // 2. The number of loads is greater than MinProfitableStridedLoads,
-    // or the potential stride <= MaxProfitableLoadStride and the
-    // potential stride is power-of-2 (to avoid perf regressions for the very
-    // small number of loads) and max distance > number of loads, or potential
-    // stride is -1.
-    // 3. The loads are ordered, or number of unordered loads <=
-    // MaxProfitableUnorderedLoads, or loads are in reversed order.
-    // (this check is to avoid extra costs for very expensive shuffles).
-    // 4. Any pointer operand is an instruction with the users outside of the
-    // current graph (for masked gathers extra extractelement instructions
-    // might be required).
+    // Try to generate strided load node.
     auto IsAnyPointerUsedOutGraph =
         IsPossibleStrided && any_of(PointerOps, [&](Value *V) {
           return isa<Instruction>(V) && any_of(V->users(), [&](User *U) {
                    return !isVectorized(U) && !MustGather.contains(U);
                  });
         });
-    const unsigned AbsoluteDiff = std::abs(*Diff);
     if (IsPossibleStrided &&
-        (IsAnyPointerUsedOutGraph ||
-         (AbsoluteDiff > Sz &&
-          (Sz > MinProfitableStridedLoads ||
-           (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
-            AbsoluteDiff % Sz == 0 && has_single_bit(AbsoluteDiff / Sz)))) ||
-         *Diff == -(static_cast<int>(Sz) - 1))) {
-      int Stride = *Diff / static_cast<int>(Sz - 1);
-      if (*Diff == Stride * static_cast<int>(Sz - 1)) {
-        Align Alignment =
-            cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
-                ->getAlign();
-        if (TTI->isLegalStridedLoadStore(VecTy, Alignment)) {
-          // Iterate through all pointers and check if all distances are
-          // unique multiple of Dist.
-          SmallSet<int, 4> Dists;
-          for (Value *Ptr : PointerOps) {
-            int Dist = 0;
-            if (Ptr == PtrN)
-              Dist = *Diff;
-            else if (Ptr != Ptr0)
-              Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
-            // If the strides are not the same or repeated, we can't
-            // vectorize.
-            if (((Dist / Stride) * Stride) != Dist ||
-                !Dists.insert(Dist).second)
-              break;
-          }
-          if (Dists.size() == Sz)
-            return LoadsState::StridedVectorize;
-        }
-      }
-    }
+        isStridedLoad(VL, PointerOps, Order, *TTI, *DL, *SE,
+                      IsAnyPointerUsedOutGraph, *Diff))
+      return LoadsState::StridedVectorize;
     bool IsMasked;
     unsigned InterleaveFactor;
     SmallVector<int> CompressMask;

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - cheers

@alexey-bataev alexey-bataev merged commit edcbd4a into main Apr 8, 2025
10 of 13 checks passed
@alexey-bataev alexey-bataev deleted the users/alexey-bataev/spr/slpnfcextract-a-check-for-strided-loads-into-separate-function-nfc branch April 8, 2025 17:02
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Apr 8, 2025
…function, NFC

Reviewers: hiraditya, RKSimon

Reviewed By: RKSimon

Pull Request: llvm/llvm-project#134876
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants