From 4a74a77f0ca0f3794f9ecb4a21c8b381556c91d3 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 26 Dec 2024 16:25:04 +0000 Subject: [PATCH 1/3] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?= =?UTF-8?q?itial=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.5 --- .../llvm/Analysis/LoopAccessAnalysis.h | 10 ++++ .../Vectorize/LoopVectorizationLegality.h | 6 +++ llvm/lib/Analysis/LoopAccessAnalysis.cpp | 23 ++++---- .../Transforms/Vectorize/LoopVectorize.cpp | 53 +++++++++++++------ .../RISCV/riscv-vector-reverse.ll | 4 +- .../LoopVectorize/memdep-fold-tail.ll | 52 +----------------- llvm/test/Transforms/LoopVectorize/memdep.ll | 4 +- 7 files changed, 73 insertions(+), 79 deletions(-) diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index a35bc7402d1a8..d5cf959fb04ec 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -216,6 +216,12 @@ class MemoryDepChecker { return MaxSafeVectorWidthInBits; } + /// Return safe power-of-2 number of elements, which do not prevent store-load + /// forwarding and safe to operate simultaneously. + std::optional getStoreLoadForwardSafeVF() const { + return MaxStoreLoadForwardSafeVF; + } + /// In same cases when the dependency check fails we can still /// vectorize the loop with a dynamic array access check. bool shouldRetryWithRuntimeCheck() const { @@ -304,6 +310,10 @@ class MemoryDepChecker { /// restrictive. uint64_t MaxSafeVectorWidthInBits = -1U; + /// Maximum number of elements (power-of-2 and non-power-of-2), which do not + /// prevent store-load forwarding and safe to operate simultaneously. + std::optional MaxStoreLoadForwardSafeVF; + /// If we see a non-constant dependence distance we can still try to /// vectorize this loop with runtime checks. bool FoundNonConstantDistanceDependence = false; diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index fbe80eddbae07..462c11d841b84 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -412,6 +412,12 @@ class LoopVectorizationLegality { return getUncountableExitBlocks()[0]; } + /// Return safe power-of-2 number of elements, which do not prevent store-load + /// forwarding and safe to operate simultaneously. + std::optional getMaxStoreLoadForwardSafeVFPowerOf2() const { + return LAI->getDepChecker().getStoreLoadForwardSafeVF(); + } + /// Returns true if vector representation of the instruction \p I /// requires mask. bool isMaskRequired(const Instruction *I) const { diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 2c75d5625cb66..764600c3adae7 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1752,31 +1752,34 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, // cause any slowdowns. const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize; // Maximum vector factor. - uint64_t MaxVFWithoutSLForwardIssues = std::min( - VectorizerParams::MaxVectorWidth * TypeByteSize, MinDepDistBytes); + uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 = std::min( + VectorizerParams::MaxVectorWidth * TypeByteSize, + MaxStoreLoadForwardSafeVF.value_or(std::numeric_limits::max())); // Compute the smallest VF at which the store and load would be misaligned. - for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues; - VF *= 2) { + for (uint64_t VF = 2 * TypeByteSize; + VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) { // If the number of vector iteration between the store and the load are // small we could incur conflicts. if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) { - MaxVFWithoutSLForwardIssues = (VF >> 1); + MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1); break; } } - if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) { + if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) { LLVM_DEBUG( dbgs() << "LAA: Distance " << Distance << " that could cause a store-load forwarding conflict\n"); return true; } - if (MaxVFWithoutSLForwardIssues < MinDepDistBytes && - MaxVFWithoutSLForwardIssues != - VectorizerParams::MaxVectorWidth * TypeByteSize) - MinDepDistBytes = MaxVFWithoutSLForwardIssues; + if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) + MaxStoreLoadForwardSafeVF = 1; + else if (MaxVFWithoutSLForwardIssuesPowerOf2 < MaxStoreLoadForwardSafeVF && + MaxVFWithoutSLForwardIssuesPowerOf2 != + VectorizerParams::MaxVectorWidth * TypeByteSize) + MaxStoreLoadForwardSafeVF = MaxVFWithoutSLForwardIssuesPowerOf2; return false; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cb828b738d310..367a011323b51 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1436,8 +1436,10 @@ class LoopVectorizationCostModel { /// Selects and saves TailFoldingStyle for 2 options - if IV update may /// overflow or not. /// \param IsScalableVF true if scalable vector factors enabled. + /// \param TailFoldPowOf2 true if tail folding with power-of-2 + /// safe distance can be enabled. /// \param UserIC User specific interleave count. - void setTailFoldingStyles(bool IsScalableVF, unsigned UserIC) { + void setTailFoldingStyles(bool IsScalableVF, bool TailFoldPowOf2, unsigned UserIC) { assert(!ChosenTailFoldingStyle && "Tail folding must not be selected yet."); if (!Legal->canFoldTailByMasking()) { ChosenTailFoldingStyle = @@ -1446,24 +1448,37 @@ class LoopVectorizationCostModel { } if (!ForceTailFoldingStyle.getNumOccurrences()) { - ChosenTailFoldingStyle = std::make_pair( - TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/true), - TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/false)); + if (!TailFoldPowOf2) + ChosenTailFoldingStyle = + std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None); + else + ChosenTailFoldingStyle = std::make_pair( + TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/true), + TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/false)); return; } // Set styles when forced. ChosenTailFoldingStyle = std::make_pair(ForceTailFoldingStyle.getValue(), ForceTailFoldingStyle.getValue()); - if (ForceTailFoldingStyle != TailFoldingStyle::DataWithEVL) + if (ForceTailFoldingStyle != TailFoldingStyle::DataWithEVL) { + if (!TailFoldPowOf2) + ChosenTailFoldingStyle = + std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None); return; + } // Override forced styles if needed. // FIXME: use actual opcode/data type for analysis here. // FIXME: Investigate opportunity for fixed vector factor. - bool EVLIsLegal = UserIC <= 1 && + bool EVLIsLegal = UserIC <= 1 && IsScalableVF && TTI.hasActiveVectorLength(0, nullptr, Align()) && !EnableVPlanNativePath; if (!EVLIsLegal) { + if (!TailFoldPowOf2) { + ChosenTailFoldingStyle = + std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None); + return; + } // If for some reason EVL mode is unsupported, fallback to // DataWithoutLaneMask to try to vectorize the loop with folded tail // in a generic way. @@ -4016,11 +4031,15 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF( // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from // the memory accesses that is most restrictive (involved in the smallest // dependence distance). - unsigned MaxSafeElements = - llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType); + unsigned MaxSafeElements = Legal->getMaxSafeVectorWidthInBits() / WidestType; + if (Legal->isSafeForAnyVectorWidth()) + MaxSafeElements = bit_ceil(MaxSafeElements); + unsigned MaxSafeElementsPowerOf2 = bit_floor(std::gcd( + MaxSafeElements, Legal->getMaxStoreLoadForwardSafeVFPowerOf2().value_or( + 1ULL << countr_zero(MaxSafeElements)))); + auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElementsPowerOf2); + auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElementsPowerOf2); - auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements); - auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements); if (!Legal->isSafeForAnyVectorWidth()) this->MaxSafeElements = MaxSafeElements; @@ -4233,13 +4252,11 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n"); return MaxFactors; } + MaxPowerOf2RuntimeVF.reset(); } - // If we don't know the precise trip count, or if the trip count that we - // found modulo the vectorization factor is not zero, try to fold the tail - // by masking. - // FIXME: look for a smaller MaxVF that does divide TC rather than masking. - setTailFoldingStyles(MaxFactors.ScalableVF.isScalable(), UserIC); + setTailFoldingStyles(MaxFactors.ScalableVF.isScalable(), + !MaxPowerOf2RuntimeVF.has_value(), UserIC); if (foldTailByMasking()) { if (getTailFoldingStyle() == TailFoldingStyle::DataWithEVL) { LLVM_DEBUG( @@ -4258,6 +4275,12 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { return MaxFactors; } + if (MaxPowerOf2RuntimeVF) { + // Accept MaxFixedVF if we do not have a tail. + LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n"); + return MaxFactors; + } + // If there was a tail-folding hint/switch, but we can't fold the tail by // masking, fallback to a vectorization with a scalar epilogue. if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) { diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index eb60c24393df9..cbdd9a0649765 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -21,7 +21,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Found trip count: 0 ; CHECK-NEXT: LV: Found maximum trip count: 4294967295 ; CHECK-NEXT: LV: Scalable vectorization is available -; CHECK-NEXT: LV: The max safe fixed VF is: 67108864. +; CHECK-NEXT: LV: The max safe fixed VF is: 134217728. ; CHECK-NEXT: LV: The max safe scalable VF is: vscale x 4294967295. ; CHECK-NEXT: LV: Found uniform instruction: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Found uniform instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom @@ -268,7 +268,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Found trip count: 0 ; CHECK-NEXT: LV: Found maximum trip count: 4294967295 ; CHECK-NEXT: LV: Scalable vectorization is available -; CHECK-NEXT: LV: The max safe fixed VF is: 67108864. +; CHECK-NEXT: LV: The max safe fixed VF is: 134217728. ; CHECK-NEXT: LV: The max safe scalable VF is: vscale x 4294967295. ; CHECK-NEXT: LV: Found uniform instruction: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Found uniform instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll index d1ad7e3f4fc0d..ea592c1e1063a 100644 --- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll @@ -24,57 +24,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define void @maxvf3() { ; CHECK-LABEL: @maxvf3( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 14) -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 -; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP2]] -; CHECK-NEXT: store i8 69, ptr [[TMP3]], align 8 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] -; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] -; CHECK: pred.store.if1: -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP5]] -; CHECK-NEXT: store i8 69, ptr [[TMP6]], align 8 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] -; CHECK: pred.store.continue2: -; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <2 x i32> splat (i32 3), [[VEC_IND]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 -; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] -; CHECK: pred.store.if3: -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP9]] -; CHECK-NEXT: store i8 7, ptr [[TMP10]], align 8 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] -; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP12]] -; CHECK-NEXT: store i8 7, ptr [[TMP13]], align 8 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.continue6: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[J_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[J_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[AJ:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[J]] ; CHECK-NEXT: store i8 69, ptr [[AJ]], align 8 ; CHECK-NEXT: [[JP3:%.*]] = add nuw nsw i32 3, [[J]] @@ -82,7 +34,7 @@ define void @maxvf3() { ; CHECK-NEXT: store i8 7, ptr [[AJP3]], align 8 ; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[J_NEXT]], 15 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/memdep.ll b/llvm/test/Transforms/LoopVectorize/memdep.ll index b891b4312f18d..28cf3b61b2554 100644 --- a/llvm/test/Transforms/LoopVectorize/memdep.ll +++ b/llvm/test/Transforms/LoopVectorize/memdep.ll @@ -226,7 +226,7 @@ for.end: ;Check the new calculation of the maximum safe distance in bits which can be vectorized. ;The previous behavior did not take account that the stride was 2. -;Therefore the maxVF was computed as 8 instead of 4, as the dependence distance here is 6 iterations, given by |N-(N-12)|/2. +;Therefore the maxVF was computed as 8 instead of 2, as the dependence distance here is 6 iterations, given by |N-(N-12)|/2. ;#define M 32 ;#define N 2 * M @@ -242,7 +242,7 @@ for.end: ;} ; RIGHTVF-LABEL: @pr34283 -; RIGHTVF: <4 x i64> +; RIGHTVF: <2 x i64> ; WRONGVF-LABLE: @pr34283 ; WRONGVF-NOT: <8 x i64> From 620177358a267cfc1c4d18710493403b644982dd Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 26 Dec 2024 16:33:04 +0000 Subject: [PATCH 2/3] Fix formatting Created using spr 1.3.5 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 367a011323b51..32acdd3a58792 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1439,7 +1439,8 @@ class LoopVectorizationCostModel { /// \param TailFoldPowOf2 true if tail folding with power-of-2 /// safe distance can be enabled. /// \param UserIC User specific interleave count. - void setTailFoldingStyles(bool IsScalableVF, bool TailFoldPowOf2, unsigned UserIC) { + void setTailFoldingStyles(bool IsScalableVF, bool TailFoldPowOf2, + unsigned UserIC) { assert(!ChosenTailFoldingStyle && "Tail folding must not be selected yet."); if (!Legal->canFoldTailByMasking()) { ChosenTailFoldingStyle = From 85c7122e8ee72ffbc9c863a16874e7c110536f9a Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 12 Feb 2025 13:49:13 +0000 Subject: [PATCH 3/3] Fix formatting Created using spr 1.3.5 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4cd92b40a5f15..4cb02012a961f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4149,8 +4149,8 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { // by masking. // FIXME: look for a smaller MaxVF that does divide TC rather than masking. bool ContainsScalableVF = MaxFactors.ScalableVF.isNonZero(); - setTailFoldingStyles(ContainsScalableVF, - !MaxPowerOf2RuntimeVF.has_value(), UserIC); + setTailFoldingStyles(ContainsScalableVF, !MaxPowerOf2RuntimeVF.has_value(), + UserIC); if (foldTailByMasking()) { if (getTailFoldingStyle() == TailFoldingStyle::DataWithEVL) { LLVM_DEBUG(