Skip to content

Commit 1ce709c

Browse files
authored
[LV] Fix crash when building partial reductions using types that aren't known scale factors (#136680)
1 parent 8292e05 commit 1ce709c

File tree

2 files changed

+63
-3
lines changed

2 files changed

+63
-3
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9076,9 +9076,13 @@ bool VPRecipeBuilder::getScaledReductions(
90769076

90779077
PartialReductionChain Chain(RdxExitInstr, ExtA, ExtB, BinOp);
90789078

9079-
unsigned TargetScaleFactor =
9080-
PHI->getType()->getPrimitiveSizeInBits().getKnownScalarFactor(
9081-
A->getType()->getPrimitiveSizeInBits());
9079+
TypeSize PHISize = PHI->getType()->getPrimitiveSizeInBits();
9080+
TypeSize ASize = A->getType()->getPrimitiveSizeInBits();
9081+
9082+
if (!PHISize.hasKnownScalarFactor(ASize))
9083+
return false;
9084+
9085+
unsigned TargetScaleFactor = PHISize.getKnownScalarFactor(ASize);
90829086

90839087
if (LoopVectorizationPlanner::getDecisionAndClampRange(
90849088
[&](ElementCount VF) {

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,59 @@ for.body: ; preds = %for.body, %entry
6363
for.exit: ; preds = %for.body
6464
ret i32 %add
6565
}
66+
67+
; Test to ensure that we don't crash when evaluating an extend from a type
68+
; that is not a factor of the target type.
69+
define i40 @partial_reduce_not_known_factor(i32 %a, i32 %b, i16 %N) {
70+
; CHECK-LABEL: define i40 @partial_reduce_not_known_factor(
71+
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i16 [[N:%.*]]) #[[ATTR0]] {
72+
; CHECK-NEXT: [[ENTRY:.*]]:
73+
; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 0)
74+
; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i16 [[SMAX]] to i32
75+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[TMP0]], 1
76+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 4
77+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
78+
; CHECK: [[VECTOR_PH]]:
79+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 4
80+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
81+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B]], i64 0
82+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
83+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i64 0
84+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer
85+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[N_VEC]] to i16
86+
; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[BROADCAST_SPLAT2]] to <2 x i40>
87+
; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i32> [[BROADCAST_SPLAT]] to <2 x i40>
88+
; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i40> [[TMP4]], [[TMP3]]
89+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
90+
; CHECK: [[VECTOR_BODY]]:
91+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
92+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i40> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
93+
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i40> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
94+
; CHECK-NEXT: [[TMP6]] = or <2 x i40> [[VEC_PHI]], [[TMP5]]
95+
; CHECK-NEXT: [[TMP8]] = or <2 x i40> [[VEC_PHI3]], [[TMP5]]
96+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
97+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
98+
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
99+
; CHECK: [[MIDDLE_BLOCK]]:
100+
; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i40> [[TMP8]], [[TMP6]]
101+
; CHECK-NEXT: [[TMP9:%.*]] = call i40 @llvm.vector.reduce.or.v2i40(<2 x i40> [[BIN_RDX]])
102+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
103+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
104+
entry:
105+
br label %for.body
106+
107+
for.body:
108+
%red = phi i40 [ 0, %entry ], [ %1, %for.body ]
109+
%iv = phi i16 [ 0, %entry ], [ %iv.next, %for.body ]
110+
%resize = sext i32 %a to i40
111+
%resize4 = sext i32 %b to i40
112+
%0 = or i40 %resize4, %resize
113+
%1 = or i40 %red, %0
114+
%iv.next = add i16 %iv, 1
115+
%cmp = icmp slt i16 %iv, %N
116+
br i1 %cmp, label %for.body, label %exit
117+
118+
exit:
119+
%result.lcssa = phi i40 [ %1, %for.body ]
120+
ret i40 %result.lcssa
121+
}

0 commit comments

Comments
 (0)