@@ -859,6 +859,78 @@ exit:
859
859
ret i64 %1
860
860
}
861
861
862
+ ; Test case for https://github.com/llvm/llvm-project/issues/96294 with a stored
863
+ ; reduction which overwrites an earlier store.
864
+ define void @reduction_store (ptr noalias %src , ptr %dst , i1 %x ) #2 {
865
+ ; CHECK-LABEL: @reduction_store(
866
+ ; CHECK-NEXT: entry:
867
+ ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
868
+ ; CHECK: vector.ph:
869
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i1> poison, i1 [[X:%.*]], i64 0
870
+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT]], <8 x i1> poison, <8 x i32> zeroinitializer
871
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
872
+ ; CHECK: vector.body:
873
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
874
+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ <i32 0, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
875
+ ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
876
+ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP0]]
877
+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
878
+ ; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i1> [[BROADCAST_SPLAT]] to <8 x i64>
879
+ ; CHECK-NEXT: [[TMP4:%.*]] = lshr <8 x i64> [[TMP3]], <i64 12, i64 12, i64 12, i64 12, i64 12, i64 12, i64 12, i64 12>
880
+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i64> [[TMP4]] to <8 x i32>
881
+ ; CHECK-NEXT: [[TMP6]] = and <8 x i32> [[VEC_PHI]], [[TMP5]]
882
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
883
+ ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 24
884
+ ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
885
+ ; CHECK: middle.block:
886
+ ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP6]])
887
+ ; CHECK-NEXT: store i32 [[TMP8]], ptr [[DST:%.*]], align 4
888
+ ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
889
+ ; CHECK: scalar.ph:
890
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
891
+ ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
892
+ ; CHECK-NEXT: br label [[LOOP:%.*]]
893
+ ; CHECK: loop:
894
+ ; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], [[LOOP]] ]
895
+ ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
896
+ ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]]
897
+ ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
898
+ ; CHECK-NEXT: [[L_AND:%.*]] = and i32 [[L]], 3
899
+ ; CHECK-NEXT: store i32 [[L_AND]], ptr [[DST]], align 4
900
+ ; CHECK-NEXT: [[X_EXT:%.*]] = zext i1 [[X]] to i64
901
+ ; CHECK-NEXT: [[LSHR:%.*]] = lshr i64 [[X_EXT]], 12
902
+ ; CHECK-NEXT: [[T:%.*]] = trunc i64 [[LSHR]] to i32
903
+ ; CHECK-NEXT: [[RED_NEXT]] = and i32 [[RED]], [[T]]
904
+ ; CHECK-NEXT: store i32 [[RED_NEXT]], ptr [[DST]], align 4
905
+ ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
906
+ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 29
907
+ ; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
908
+ ; CHECK: exit:
909
+ ; CHECK-NEXT: ret void
910
+ ;
911
+ entry:
912
+ br label %loop
913
+
914
+ loop:
915
+ %red = phi i32 [ 0 , %entry ], [ %red.next , %loop ]
916
+ %iv = phi i32 [ 0 , %entry ], [ %iv.next , %loop ]
917
+ %gep.src = getelementptr inbounds i32 , ptr %src , i32 %iv
918
+ %l = load i32 , ptr %gep.src
919
+ %l.and = and i32 %l , 3
920
+ store i32 %l.and , ptr %dst , align 4
921
+ %x.ext = zext i1 %x to i64
922
+ %lshr = lshr i64 %x.ext , 12
923
+ %t = trunc i64 %lshr to i32
924
+ %red.next = and i32 %red , %t
925
+ store i32 %red.next , ptr %dst , align 4
926
+ %iv.next = add i32 %iv , 1
927
+ %ec = icmp eq i32 %iv , 29
928
+ br i1 %ec , label %exit , label %loop
929
+
930
+ exit:
931
+ ret void
932
+ }
933
+
862
934
declare void @llvm.assume (i1 noundef) #0
863
935
864
936
attributes #0 = { "target-cpu" ="penryn" }
0 commit comments