You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x float> [[R:%.*]] to <8 x i8>
45
-
// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
46
-
// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x bfloat> [[TMP5]] to <8 x i8>
47
-
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> [[R]], <4 x bfloat> [[A]], <4 x bfloat> [[TMP5]])
35
+
// CHECK-NEXT: [[__REINT_128:%.*]] = alloca <4 x bfloat>, align 8
36
+
// CHECK-NEXT: [[__REINT1_128:%.*]] = alloca <2 x float>, align 8
37
+
// CHECK-NEXT: store <4 x bfloat> [[B:%.*]], ptr [[__REINT_128]], align 8
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[R:%.*]] to <8 x i8>
45
+
// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
46
+
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x bfloat> [[TMP3]] to <8 x i8>
47
+
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> [[R]], <4 x bfloat> [[A]], <4 x bfloat> [[TMP3]])
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
66
-
// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
67
-
// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x bfloat> [[TMP5]] to <16 x i8>
68
-
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[TMP5]])
56
+
// CHECK-NEXT: [[__REINT_130:%.*]] = alloca <8 x bfloat>, align 16
57
+
// CHECK-NEXT: [[__REINT1_130:%.*]] = alloca <4 x float>, align 16
58
+
// CHECK-NEXT: store <8 x bfloat> [[B:%.*]], ptr [[__REINT_130]], align 16
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
66
+
// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
67
+
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x bfloat> [[TMP3]] to <16 x i8>
68
+
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[TMP3]])
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x float> [[R:%.*]] to <8 x i8>
87
-
// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
88
-
// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x bfloat> [[TMP5]] to <8 x i8>
89
-
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> [[R]], <4 x bfloat> [[A]], <4 x bfloat> [[TMP5]])
77
+
// CHECK-NEXT: [[__REINT_132:%.*]] = alloca <8 x bfloat>, align 16
78
+
// CHECK-NEXT: [[__REINT1_132:%.*]] = alloca <2 x float>, align 8
79
+
// CHECK-NEXT: store <8 x bfloat> [[B:%.*]], ptr [[__REINT_132]], align 16
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[R:%.*]] to <8 x i8>
87
+
// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
88
+
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x bfloat> [[TMP3]] to <8 x i8>
89
+
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> [[R]], <4 x bfloat> [[A]], <4 x bfloat> [[TMP3]])
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
108
-
// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
109
-
// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x bfloat> [[TMP5]] to <16 x i8>
110
-
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[TMP5]])
98
+
// CHECK-NEXT: [[__REINT_126:%.*]] = alloca <4 x bfloat>, align 8
99
+
// CHECK-NEXT: [[__REINT1_126:%.*]] = alloca <4 x float>, align 16
100
+
// CHECK-NEXT: store <4 x bfloat> [[B:%.*]], ptr [[__REINT_126]], align 8
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
108
+
// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
109
+
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x bfloat> [[TMP3]] to <16 x i8>
110
+
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[TMP3]])
@@ -174,9 +174,9 @@ float32x4_t test_vbfmlaltq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
174
174
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
175
175
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
176
176
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
177
-
// CHECK-NEXT: [[VBFMLALBQ_V3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
178
-
// CHECK-NEXT: [[VBFMLALBQ_V4_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_V3_I]] to <16 x i8>
179
-
// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_V3_I]]
177
+
// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
178
+
// CHECK-NEXT: [[VBFMLALBQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_F323_I]] to <16 x i8>
179
+
// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_F323_I]]
@@ -203,9 +203,9 @@ float32x4_t test_vbfmlalbq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t
203
203
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
204
204
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
205
205
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
206
-
// CHECK-NEXT: [[VBFMLALBQ_V3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
207
-
// CHECK-NEXT: [[VBFMLALBQ_V4_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_V3_I]] to <16 x i8>
208
-
// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_V3_I]]
206
+
// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
207
+
// CHECK-NEXT: [[VBFMLALBQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_F323_I]] to <16 x i8>
208
+
// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_F323_I]]
@@ -232,9 +232,9 @@ float32x4_t test_vbfmlalbq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t
232
232
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
233
233
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
234
234
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
235
-
// CHECK-NEXT: [[VBFMLALTQ_V3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
236
-
// CHECK-NEXT: [[VBFMLALTQ_V4_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_V3_I]] to <16 x i8>
237
-
// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_V3_I]]
235
+
// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
236
+
// CHECK-NEXT: [[VBFMLALTQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_F323_I]] to <16 x i8>
237
+
// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_F323_I]]
@@ -261,9 +261,9 @@ float32x4_t test_vbfmlaltq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t
261
261
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
262
262
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
263
263
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
264
-
// CHECK-NEXT: [[VBFMLALTQ_V3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
265
-
// CHECK-NEXT: [[VBFMLALTQ_V4_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_V3_I]] to <16 x i8>
266
-
// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_V3_I]]
264
+
// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
265
+
// CHECK-NEXT: [[VBFMLALTQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_F323_I]] to <16 x i8>
266
+
// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_F323_I]]
0 commit comments