|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
2 |
| -; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s |
| 2 | +; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck --check-prefixes=CHECK,POWEROF2 %s |
| 3 | +; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 -slp-vectorize-non-power-of-2 %s | FileCheck --check-prefixes=CHECK,NONPOWEROF2 %s |
3 | 4 |
|
4 | 5 | define i32 @test() {
|
5 | 6 | ; CHECK-LABEL: @test(
|
@@ -134,3 +135,99 @@ for.body:
|
134 | 135 | %6 = select <2 x i1> %4, <2 x float> %3, <2 x float> zeroinitializer
|
135 | 136 | br label %for.cond.cleanup
|
136 | 137 | }
|
| 138 | + |
| 139 | +define ptr @test4() { |
| 140 | +; POWEROF2-LABEL: @test4( |
| 141 | +; POWEROF2-NEXT: [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer |
| 142 | +; POWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 1, i32 2> |
| 143 | +; POWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 5, i32 6> |
| 144 | +; POWEROF2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 4, i32 0> |
| 145 | +; POWEROF2-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP2]], i64 0) |
| 146 | +; POWEROF2-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP3]], i64 2) |
| 147 | +; POWEROF2-NEXT: br label [[TMP8:%.*]] |
| 148 | +; POWEROF2: 7: |
| 149 | +; POWEROF2-NEXT: br label [[TMP8]] |
| 150 | +; POWEROF2: 8: |
| 151 | +; POWEROF2-NEXT: [[TMP9:%.*]] = phi <2 x float> [ poison, [[TMP7:%.*]] ], [ [[TMP4]], [[TMP0:%.*]] ] |
| 152 | +; POWEROF2-NEXT: [[TMP10:%.*]] = phi <4 x float> [ poison, [[TMP7]] ], [ [[TMP6]], [[TMP0]] ] |
| 153 | +; POWEROF2-NEXT: br label [[TMP11:%.*]] |
| 154 | +; POWEROF2: 11: |
| 155 | +; POWEROF2-NEXT: [[TMP12:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 0) |
| 156 | +; POWEROF2-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP12]], zeroinitializer |
| 157 | +; POWEROF2-NEXT: [[TMP14:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 2) |
| 158 | +; POWEROF2-NEXT: [[TMP15:%.*]] = fmul <2 x float> zeroinitializer, [[TMP14]] |
| 159 | +; POWEROF2-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP9]], i32 1 |
| 160 | +; POWEROF2-NEXT: [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP16]] |
| 161 | +; POWEROF2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP9]], i32 0 |
| 162 | +; POWEROF2-NEXT: [[TMP19:%.*]] = fmul float [[TMP18]], 0.000000e+00 |
| 163 | +; POWEROF2-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP13]], i32 0 |
| 164 | +; POWEROF2-NEXT: [[TMP21:%.*]] = fadd reassoc nsz float [[TMP20]], [[TMP17]] |
| 165 | +; POWEROF2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[TMP15]], i32 0 |
| 166 | +; POWEROF2-NEXT: [[TMP23:%.*]] = fadd reassoc nsz float [[TMP22]], [[TMP19]] |
| 167 | +; POWEROF2-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[TMP13]], i32 1 |
| 168 | +; POWEROF2-NEXT: [[TMP25:%.*]] = fadd reassoc nsz float [[TMP21]], [[TMP24]] |
| 169 | +; POWEROF2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[TMP15]], i32 1 |
| 170 | +; POWEROF2-NEXT: [[TMP27:%.*]] = fadd reassoc nsz float [[TMP23]], [[TMP26]] |
| 171 | +; POWEROF2-NEXT: [[TMP28:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP25]]) |
| 172 | +; POWEROF2-NEXT: [[TMP29:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP27]]) |
| 173 | +; POWEROF2-NEXT: ret ptr null |
| 174 | +; |
| 175 | +; NONPOWEROF2-LABEL: @test4( |
| 176 | +; NONPOWEROF2-NEXT: [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer |
| 177 | +; NONPOWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| 178 | +; NONPOWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <3 x i32> <i32 4, i32 5, i32 6> |
| 179 | +; NONPOWEROF2-NEXT: [[TMP4:%.*]] = call <6 x float> @llvm.vector.insert.v6f32.v3f32(<6 x float> poison, <3 x float> [[TMP2]], i64 0) |
| 180 | +; NONPOWEROF2-NEXT: [[TMP5:%.*]] = call <6 x float> @llvm.vector.insert.v6f32.v3f32(<6 x float> [[TMP4]], <3 x float> [[TMP3]], i64 3) |
| 181 | +; NONPOWEROF2-NEXT: br label [[TMP7:%.*]] |
| 182 | +; NONPOWEROF2: 6: |
| 183 | +; NONPOWEROF2-NEXT: br label [[TMP7]] |
| 184 | +; NONPOWEROF2: 7: |
| 185 | +; NONPOWEROF2-NEXT: [[TMP8:%.*]] = phi <6 x float> [ poison, [[TMP6:%.*]] ], [ [[TMP5]], [[TMP0:%.*]] ] |
| 186 | +; NONPOWEROF2-NEXT: br label [[TMP9:%.*]] |
| 187 | +; NONPOWEROF2: 9: |
| 188 | +; NONPOWEROF2-NEXT: [[TMP10:%.*]] = call <3 x float> @llvm.vector.extract.v3f32.v6f32(<6 x float> [[TMP8]], i64 0) |
| 189 | +; NONPOWEROF2-NEXT: [[TMP11:%.*]] = fmul <3 x float> zeroinitializer, [[TMP10]] |
| 190 | +; NONPOWEROF2-NEXT: [[TMP12:%.*]] = call <3 x float> @llvm.vector.extract.v3f32.v6f32(<6 x float> [[TMP8]], i64 3) |
| 191 | +; NONPOWEROF2-NEXT: [[TMP13:%.*]] = fmul <3 x float> zeroinitializer, [[TMP12]] |
| 192 | +; NONPOWEROF2-NEXT: [[TMP14:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP11]]) |
| 193 | +; NONPOWEROF2-NEXT: [[TMP15:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP13]]) |
| 194 | +; NONPOWEROF2-NEXT: [[TMP16:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP14]]) |
| 195 | +; NONPOWEROF2-NEXT: [[TMP17:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP15]]) |
| 196 | +; NONPOWEROF2-NEXT: ret ptr null |
| 197 | +; |
| 198 | + %1 = fadd <8 x float> zeroinitializer, zeroinitializer |
| 199 | + %2 = extractelement <8 x float> %1, i64 0 |
| 200 | + %3 = extractelement <8 x float> %1, i64 1 |
| 201 | + %4 = extractelement <8 x float> %1, i64 2 |
| 202 | + %5 = extractelement <8 x float> %1, i64 4 |
| 203 | + %6 = extractelement <8 x float> %1, i64 5 |
| 204 | + %7 = extractelement <8 x float> %1, i64 6 |
| 205 | + br label %9 |
| 206 | + |
| 207 | +8: |
| 208 | + br label %9 |
| 209 | + |
| 210 | +9: |
| 211 | + %10 = phi float [ 0.000000e+00, %8 ], [ %7, %0 ] |
| 212 | + %11 = phi float [ 0.000000e+00, %8 ], [ %6, %0 ] |
| 213 | + %12 = phi float [ 0.000000e+00, %8 ], [ %5, %0 ] |
| 214 | + %13 = phi float [ 0.000000e+00, %8 ], [ %4, %0 ] |
| 215 | + %14 = phi float [ 0.000000e+00, %8 ], [ %3, %0 ] |
| 216 | + %15 = phi float [ 0.000000e+00, %8 ], [ %2, %0 ] |
| 217 | + br label %16 |
| 218 | + |
| 219 | +16: |
| 220 | + %17 = fmul float %14, 0.000000e+00 |
| 221 | + %18 = fmul float 0.000000e+00, %11 |
| 222 | + %19 = fmul float 0.000000e+00, %15 |
| 223 | + %20 = fmul float %12, 0.000000e+00 |
| 224 | + %21 = fadd reassoc nsz float %17, %19 |
| 225 | + %22 = fadd reassoc nsz float %18, %20 |
| 226 | + %23 = fmul float %13, 0.000000e+00 |
| 227 | + %24 = fmul float %10, 0.000000e+00 |
| 228 | + %25 = fadd reassoc nsz float %21, %23 |
| 229 | + %26 = fadd reassoc nsz float %22, %24 |
| 230 | + %27 = tail call float @llvm.sqrt.f32(float %25) |
| 231 | + %28 = tail call float @llvm.sqrt.f32(float %26) |
| 232 | + ret ptr null |
| 233 | +} |
0 commit comments