@@ -1328,19 +1328,19 @@ define void @unknown_inner_stride(ptr nocapture noundef %dst, ptr nocapture noun
13281328; CHECK: vector.body:
13291329; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
13301330; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 0
1331- ; CHECK-NEXT: [[TMP16 :%.*]] = add nsw i64 [[TMP14]], [[TMP11]]
1332- ; CHECK-NEXT: [[TMP17 :%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP16 ]]
1333- ; CHECK-NEXT: [[TMP18 :%.*]] = getelementptr inbounds i32, ptr [[TMP17 ]], i32 0
1334- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP18 ]], align 4, !alias.scope [[META60:![0-9]+]]
1335- ; CHECK-NEXT: [[TMP20 :%.*]] = add nsw i64 [[TMP14]], [[TMP12]]
1336- ; CHECK-NEXT: [[TMP21 :%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP20 ]]
1337- ; CHECK-NEXT: [[TMP22 :%.*]] = getelementptr inbounds i32, ptr [[TMP21 ]], i32 0
1338- ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP22 ]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META60]]
1339- ; CHECK-NEXT: [[TMP23 :%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]]
1340- ; CHECK-NEXT: store <4 x i32> [[TMP23 ]], ptr [[TMP22 ]], align 4, !alias.scope [[META63]], !noalias [[META60]]
1331+ ; CHECK-NEXT: [[TMP15 :%.*]] = add nsw i64 [[TMP14]], [[TMP11]]
1332+ ; CHECK-NEXT: [[TMP16 :%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP15 ]]
1333+ ; CHECK-NEXT: [[TMP17 :%.*]] = getelementptr inbounds i32, ptr [[TMP16 ]], i32 0
1334+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP17 ]], align 4, !alias.scope [[META60:![0-9]+]]
1335+ ; CHECK-NEXT: [[TMP18 :%.*]] = add nsw i64 [[TMP14]], [[TMP12]]
1336+ ; CHECK-NEXT: [[TMP19 :%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP18 ]]
1337+ ; CHECK-NEXT: [[TMP20 :%.*]] = getelementptr inbounds i32, ptr [[TMP19 ]], i32 0
1338+ ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP20 ]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META60]]
1339+ ; CHECK-NEXT: [[TMP21 :%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]]
1340+ ; CHECK-NEXT: store <4 x i32> [[TMP21 ]], ptr [[TMP20 ]], align 4, !alias.scope [[META63]], !noalias [[META60]]
13411341; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1342- ; CHECK-NEXT: [[TMP24 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1343- ; CHECK-NEXT: br i1 [[TMP24 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP65:![0-9]+]]
1342+ ; CHECK-NEXT: [[TMP22 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1343+ ; CHECK-NEXT: br i1 [[TMP22 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP65:![0-9]+]]
13441344; CHECK: middle.block:
13451345; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
13461346; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]]
@@ -1349,15 +1349,15 @@ define void @unknown_inner_stride(ptr nocapture noundef %dst, ptr nocapture noun
13491349; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
13501350; CHECK: inner.loop:
13511351; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER_LOOP]] ]
1352- ; CHECK-NEXT: [[TMP25 :%.*]] = mul nsw i64 [[INNER_IV]], [[TMP0]]
1353- ; CHECK-NEXT: [[TMP26 :%.*]] = add nsw i64 [[TMP25 ]], [[TMP11]]
1354- ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP26 ]]
1355- ; CHECK-NEXT: [[TMP27 :%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
1356- ; CHECK-NEXT: [[TMP28 :%.*]] = mul nsw i64 [[INNER_IV]], [[TMP1]]
1357- ; CHECK-NEXT: [[TMP29 :%.*]] = add nsw i64 [[TMP28 ]], [[TMP12]]
1358- ; CHECK-NEXT: [[ARRAYIDX11_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP29 ]]
1359- ; CHECK-NEXT: [[TMP30 :%.*]] = load i32, ptr [[ARRAYIDX11_US]], align 4
1360- ; CHECK-NEXT: [[ADD12_US:%.*]] = add nsw i32 [[TMP30 ]], [[TMP27 ]]
1352+ ; CHECK-NEXT: [[TMP23 :%.*]] = mul nsw i64 [[INNER_IV]], [[TMP0]]
1353+ ; CHECK-NEXT: [[TMP24 :%.*]] = add nsw i64 [[TMP23 ]], [[TMP11]]
1354+ ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP24 ]]
1355+ ; CHECK-NEXT: [[TMP25 :%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
1356+ ; CHECK-NEXT: [[TMP26 :%.*]] = mul nsw i64 [[INNER_IV]], [[TMP1]]
1357+ ; CHECK-NEXT: [[TMP27 :%.*]] = add nsw i64 [[TMP26 ]], [[TMP12]]
1358+ ; CHECK-NEXT: [[ARRAYIDX11_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP27 ]]
1359+ ; CHECK-NEXT: [[TMP28 :%.*]] = load i32, ptr [[ARRAYIDX11_US]], align 4
1360+ ; CHECK-NEXT: [[ADD12_US:%.*]] = add nsw i32 [[TMP28 ]], [[TMP25 ]]
13611361; CHECK-NEXT: store i32 [[ADD12_US]], ptr [[ARRAYIDX11_US]], align 4
13621362; CHECK-NEXT: [[INNER_IV_NEXT]] = add nuw nsw i64 [[INNER_IV]], 1
13631363; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], [[WIDE_TRIP_COUNT]]
@@ -1508,3 +1508,94 @@ inner.exit:
15081508outer.exit:
15091509 ret void
15101510}
1511+
1512+ ; TODO: STRIDE_CHECK can be eliminated via loop guards.
1513+ define void @stride_check_known_via_loop_guard (ptr %C , ptr %A , i32 %Acols ) {
1514+ ; CHECK-LABEL: define void @stride_check_known_via_loop_guard
1515+ ; CHECK-SAME: (ptr [[C:%.*]], ptr [[A:%.*]], i32 [[ACOLS:%.*]]) {
1516+ ; CHECK-NEXT: entry:
1517+ ; CHECK-NEXT: [[PRE_C:%.*]] = icmp ugt i32 [[ACOLS]], 0
1518+ ; CHECK-NEXT: br i1 [[PRE_C]], label [[EXIT:%.*]], label [[OUTER_HEADER_PREHEADER:%.*]]
1519+ ; CHECK: outer.header.preheader:
1520+ ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 8
1521+ ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[ACOLS]] to i64
1522+ ; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 3
1523+ ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[C]], i64 34359738368
1524+ ; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
1525+ ; CHECK: outer.header:
1526+ ; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i32 [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[OUTER_HEADER_PREHEADER]] ]
1527+ ; CHECK-NEXT: [[MUL_US:%.*]] = mul i32 [[OUTER_IV]], [[ACOLS]]
1528+ ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr double, ptr [[A]], i32 [[MUL_US]]
1529+ ; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
1530+ ; CHECK: vector.scevcheck:
1531+ ; CHECK-NEXT: br i1 true, label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
1532+ ; CHECK: vector.memcheck:
1533+ ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]]
1534+ ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[C]], [[SCEVGEP]]
1535+ ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1536+ ; CHECK-NEXT: [[STRIDE_CHECK:%.*]] = icmp slt i64 [[TMP1]], 0
1537+ ; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[FOUND_CONFLICT]], [[STRIDE_CHECK]]
1538+ ; CHECK-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1539+ ; CHECK: vector.ph:
1540+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1541+ ; CHECK: vector.body:
1542+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1543+ ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
1544+ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[C]], i32 [[TMP3]]
1545+ ; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX_US]], align 8, !alias.scope [[META69:![0-9]+]], !noalias [[META72:![0-9]+]]
1546+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP5]], i64 0
1547+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
1548+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i32 0
1549+ ; CHECK-NEXT: store <4 x double> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8, !alias.scope [[META72]]
1550+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1551+ ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
1552+ ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP74:![0-9]+]]
1553+ ; CHECK: middle.block:
1554+ ; CHECK-NEXT: br i1 true, label [[OUTER_LATCH]], label [[SCALAR_PH]]
1555+ ; CHECK: scalar.ph:
1556+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_HEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1557+ ; CHECK-NEXT: br label [[INNER:%.*]]
1558+ ; CHECK: inner:
1559+ ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ]
1560+ ; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds double, ptr [[C]], i32 [[INNER_IV]]
1561+ ; CHECK-NEXT: [[L:%.*]] = load double, ptr [[ARRAYIDX_US]], align 8
1562+ ; CHECK-NEXT: store double [[L]], ptr [[GEP_C]], align 8
1563+ ; CHECK-NEXT: [[INNER_IV_NEXT]] = add i32 [[INNER_IV]], 1
1564+ ; CHECK-NEXT: [[INNER_C:%.*]] = icmp eq i32 [[INNER_IV_NEXT]], 0
1565+ ; CHECK-NEXT: br i1 [[INNER_C]], label [[OUTER_LATCH]], label [[INNER]], !llvm.loop [[LOOP75:![0-9]+]]
1566+ ; CHECK: outer.latch:
1567+ ; CHECK-NEXT: [[OUTER_IV_NEXT]] = add i32 [[OUTER_IV]], 1
1568+ ; CHECK-NEXT: [[OUTER_C:%.*]] = icmp ult i32 [[OUTER_IV]], 128
1569+ ; CHECK-NEXT: br i1 [[OUTER_C]], label [[EXIT_LOOPEXIT:%.*]], label [[OUTER_HEADER]]
1570+ ; CHECK: exit.loopexit:
1571+ ; CHECK-NEXT: br label [[EXIT]]
1572+ ; CHECK: exit:
1573+ ; CHECK-NEXT: ret void
1574+ ;
1575+ entry:
1576+ %pre.c = icmp ugt i32 %Acols , 0
1577+ br i1 %pre.c , label %exit , label %outer.header
1578+
1579+ outer.header:
1580+ %outer.iv = phi i32 [ 0 , %entry ], [ %outer.iv.next , %outer.latch ]
1581+ %mul.us = mul i32 %outer.iv , %Acols
1582+ %arrayidx.us = getelementptr double , ptr %A , i32 %mul.us
1583+ br label %inner
1584+
1585+ inner:
1586+ %inner.iv = phi i32 [ 0 , %outer.header ], [ %inner.iv.next , %inner ]
1587+ %gep.C = getelementptr inbounds double , ptr %C , i32 %inner.iv
1588+ %l = load double , ptr %arrayidx.us , align 8
1589+ store double %l , ptr %gep.C , align 8
1590+ %inner.iv.next = add i32 %inner.iv , 1
1591+ %inner.c = icmp eq i32 %inner.iv.next , 0
1592+ br i1 %inner.c , label %outer.latch , label %inner
1593+
1594+ outer.latch:
1595+ %outer.iv.next = add i32 %outer.iv , 1
1596+ %outer.c = icmp ult i32 %outer.iv , 128
1597+ br i1 %outer.c , label %exit , label %outer.header
1598+
1599+ exit:
1600+ ret void
1601+ }
0 commit comments