Skip to content

Commit 617ee25

Browse files
committed
[Clang] Regenerate test checks (NFC)
1 parent c52a46a commit 617ee25

File tree

3 files changed

+962
-962
lines changed

3 files changed

+962
-962
lines changed

clang/test/CodeGen/aarch64-bf16-dotprod-intrinsics.c

Lines changed: 73 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,19 @@ float32x4_t test_vbfdotq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b){
3232

3333
// CHECK-LABEL: @test_vbfdot_lane_f32(
3434
// CHECK-NEXT: entry:
35-
// CHECK-NEXT: [[__REINT_144:%.*]] = alloca <4 x bfloat>, align 8
36-
// CHECK-NEXT: [[__REINT1_144:%.*]] = alloca <2 x float>, align 8
37-
// CHECK-NEXT: store <4 x bfloat> [[B:%.*]], ptr [[__REINT_144]], align 8
38-
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT_144]], align 8
39-
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP1]] to <8 x i8>
40-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
41-
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer
42-
// CHECK-NEXT: store <2 x float> [[LANE]], ptr [[__REINT1_144]], align 8
43-
// CHECK-NEXT: [[TMP5:%.*]] = load <4 x bfloat>, ptr [[__REINT1_144]], align 8
44-
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x float> [[R:%.*]] to <8 x i8>
45-
// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
46-
// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x bfloat> [[TMP5]] to <8 x i8>
47-
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> [[R]], <4 x bfloat> [[A]], <4 x bfloat> [[TMP5]])
35+
// CHECK-NEXT: [[__REINT_128:%.*]] = alloca <4 x bfloat>, align 8
36+
// CHECK-NEXT: [[__REINT1_128:%.*]] = alloca <2 x float>, align 8
37+
// CHECK-NEXT: store <4 x bfloat> [[B:%.*]], ptr [[__REINT_128]], align 8
38+
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[__REINT_128]], align 8
39+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
40+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
41+
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer
42+
// CHECK-NEXT: store <2 x float> [[LANE]], ptr [[__REINT1_128]], align 8
43+
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr [[__REINT1_128]], align 8
44+
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[R:%.*]] to <8 x i8>
45+
// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
46+
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x bfloat> [[TMP3]] to <8 x i8>
47+
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> [[R]], <4 x bfloat> [[A]], <4 x bfloat> [[TMP3]])
4848
// CHECK-NEXT: ret <2 x float> [[VBFDOT3_I]]
4949
//
5050
float32x2_t test_vbfdot_lane_f32(float32x2_t r, bfloat16x4_t a, bfloat16x4_t b){
@@ -53,19 +53,19 @@ float32x2_t test_vbfdot_lane_f32(float32x2_t r, bfloat16x4_t a, bfloat16x4_t b){
5353

5454
// CHECK-LABEL: @test_vbfdotq_laneq_f32(
5555
// CHECK-NEXT: entry:
56-
// CHECK-NEXT: [[__REINT_146:%.*]] = alloca <8 x bfloat>, align 16
57-
// CHECK-NEXT: [[__REINT1_146:%.*]] = alloca <4 x float>, align 16
58-
// CHECK-NEXT: store <8 x bfloat> [[B:%.*]], ptr [[__REINT_146]], align 16
59-
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[__REINT_146]], align 16
60-
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <16 x i8>
61-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
62-
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
63-
// CHECK-NEXT: store <4 x float> [[LANE]], ptr [[__REINT1_146]], align 16
64-
// CHECK-NEXT: [[TMP5:%.*]] = load <8 x bfloat>, ptr [[__REINT1_146]], align 16
65-
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
66-
// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
67-
// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x bfloat> [[TMP5]] to <16 x i8>
68-
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[TMP5]])
56+
// CHECK-NEXT: [[__REINT_130:%.*]] = alloca <8 x bfloat>, align 16
57+
// CHECK-NEXT: [[__REINT1_130:%.*]] = alloca <4 x float>, align 16
58+
// CHECK-NEXT: store <8 x bfloat> [[B:%.*]], ptr [[__REINT_130]], align 16
59+
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[__REINT_130]], align 16
60+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <16 x i8>
61+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
62+
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
63+
// CHECK-NEXT: store <4 x float> [[LANE]], ptr [[__REINT1_130]], align 16
64+
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x bfloat>, ptr [[__REINT1_130]], align 16
65+
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
66+
// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
67+
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x bfloat> [[TMP3]] to <16 x i8>
68+
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[TMP3]])
6969
// CHECK-NEXT: ret <4 x float> [[VBFDOT3_I]]
7070
//
7171
float32x4_t test_vbfdotq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
@@ -74,19 +74,19 @@ float32x4_t test_vbfdotq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b
7474

7575
// CHECK-LABEL: @test_vbfdot_laneq_f32(
7676
// CHECK-NEXT: entry:
77-
// CHECK-NEXT: [[__REINT_148:%.*]] = alloca <8 x bfloat>, align 16
78-
// CHECK-NEXT: [[__REINT1_148:%.*]] = alloca <2 x float>, align 8
79-
// CHECK-NEXT: store <8 x bfloat> [[B:%.*]], ptr [[__REINT_148]], align 16
80-
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[__REINT_148]], align 16
81-
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <16 x i8>
82-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
83-
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP3]], <2 x i32> <i32 3, i32 3>
84-
// CHECK-NEXT: store <2 x float> [[LANE]], ptr [[__REINT1_148]], align 8
85-
// CHECK-NEXT: [[TMP5:%.*]] = load <4 x bfloat>, ptr [[__REINT1_148]], align 8
86-
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x float> [[R:%.*]] to <8 x i8>
87-
// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
88-
// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x bfloat> [[TMP5]] to <8 x i8>
89-
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> [[R]], <4 x bfloat> [[A]], <4 x bfloat> [[TMP5]])
77+
// CHECK-NEXT: [[__REINT_132:%.*]] = alloca <8 x bfloat>, align 16
78+
// CHECK-NEXT: [[__REINT1_132:%.*]] = alloca <2 x float>, align 8
79+
// CHECK-NEXT: store <8 x bfloat> [[B:%.*]], ptr [[__REINT_132]], align 16
80+
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[__REINT_132]], align 16
81+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <16 x i8>
82+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
83+
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <2 x i32> <i32 3, i32 3>
84+
// CHECK-NEXT: store <2 x float> [[LANE]], ptr [[__REINT1_132]], align 8
85+
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr [[__REINT1_132]], align 8
86+
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[R:%.*]] to <8 x i8>
87+
// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
88+
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x bfloat> [[TMP3]] to <8 x i8>
89+
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> [[R]], <4 x bfloat> [[A]], <4 x bfloat> [[TMP3]])
9090
// CHECK-NEXT: ret <2 x float> [[VBFDOT3_I]]
9191
//
9292
float32x2_t test_vbfdot_laneq_f32(float32x2_t r, bfloat16x4_t a, bfloat16x8_t b) {
@@ -95,19 +95,19 @@ float32x2_t test_vbfdot_laneq_f32(float32x2_t r, bfloat16x4_t a, bfloat16x8_t b)
9595

9696
// CHECK-LABEL: @test_vbfdotq_lane_f32(
9797
// CHECK-NEXT: entry:
98-
// CHECK-NEXT: [[__REINT_142:%.*]] = alloca <4 x bfloat>, align 8
99-
// CHECK-NEXT: [[__REINT1_142:%.*]] = alloca <4 x float>, align 16
100-
// CHECK-NEXT: store <4 x bfloat> [[B:%.*]], ptr [[__REINT_142]], align 8
101-
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT_142]], align 8
102-
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP1]] to <8 x i8>
103-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
104-
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> zeroinitializer
105-
// CHECK-NEXT: store <4 x float> [[LANE]], ptr [[__REINT1_142]], align 16
106-
// CHECK-NEXT: [[TMP5:%.*]] = load <8 x bfloat>, ptr [[__REINT1_142]], align 16
107-
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
108-
// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
109-
// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x bfloat> [[TMP5]] to <16 x i8>
110-
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[TMP5]])
98+
// CHECK-NEXT: [[__REINT_126:%.*]] = alloca <4 x bfloat>, align 8
99+
// CHECK-NEXT: [[__REINT1_126:%.*]] = alloca <4 x float>, align 16
100+
// CHECK-NEXT: store <4 x bfloat> [[B:%.*]], ptr [[__REINT_126]], align 8
101+
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[__REINT_126]], align 8
102+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
103+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
104+
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <4 x i32> zeroinitializer
105+
// CHECK-NEXT: store <4 x float> [[LANE]], ptr [[__REINT1_126]], align 16
106+
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x bfloat>, ptr [[__REINT1_126]], align 16
107+
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
108+
// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
109+
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x bfloat> [[TMP3]] to <16 x i8>
110+
// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[TMP3]])
111111
// CHECK-NEXT: ret <4 x float> [[VBFDOT3_I]]
112112
//
113113
float32x4_t test_vbfdotq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) {
@@ -119,9 +119,9 @@ float32x4_t test_vbfdotq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
119119
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
120120
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
121121
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <16 x i8>
122-
// CHECK-NEXT: [[VBFMMLAQ_V3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmmla(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[B]])
123-
// CHECK-NEXT: [[VBFMMLAQ_V4_I:%.*]] = bitcast <4 x float> [[VBFMMLAQ_V3_I]] to <16 x i8>
124-
// CHECK-NEXT: ret <4 x float> [[VBFMMLAQ_V3_I]]
122+
// CHECK-NEXT: [[VBFMMLAQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmmla(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[B]])
123+
// CHECK-NEXT: [[VBFMMLAQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMMLAQ_F323_I]] to <16 x i8>
124+
// CHECK-NEXT: ret <4 x float> [[VBFMMLAQ_F323_I]]
125125
//
126126
float32x4_t test_vbfmmlaq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
127127
return vbfmmlaq_f32(r, a, b);
@@ -132,9 +132,9 @@ float32x4_t test_vbfmmlaq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
132132
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
133133
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
134134
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <16 x i8>
135-
// CHECK-NEXT: [[VBFMLALBQ_V3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[B]])
136-
// CHECK-NEXT: [[VBFMLALBQ_V4_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_V3_I]] to <16 x i8>
137-
// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_V3_I]]
135+
// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[B]])
136+
// CHECK-NEXT: [[VBFMLALBQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_F323_I]] to <16 x i8>
137+
// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_F323_I]]
138138
//
139139
float32x4_t test_vbfmlalbq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
140140
return vbfmlalbq_f32(r, a, b);
@@ -145,9 +145,9 @@ float32x4_t test_vbfmlalbq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
145145
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
146146
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
147147
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <16 x i8>
148-
// CHECK-NEXT: [[VBFMLALTQ_V3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[B]])
149-
// CHECK-NEXT: [[VBFMLALTQ_V4_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_V3_I]] to <16 x i8>
150-
// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_V3_I]]
148+
// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[B]])
149+
// CHECK-NEXT: [[VBFMLALTQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_F323_I]] to <16 x i8>
150+
// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_F323_I]]
151151
//
152152
float32x4_t test_vbfmlaltq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
153153
return vbfmlaltq_f32(r, a, b);
@@ -174,9 +174,9 @@ float32x4_t test_vbfmlaltq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
174174
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
175175
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
176176
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
177-
// CHECK-NEXT: [[VBFMLALBQ_V3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
178-
// CHECK-NEXT: [[VBFMLALBQ_V4_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_V3_I]] to <16 x i8>
179-
// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_V3_I]]
177+
// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
178+
// CHECK-NEXT: [[VBFMLALBQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_F323_I]] to <16 x i8>
179+
// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_F323_I]]
180180
//
181181
float32x4_t test_vbfmlalbq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) {
182182
return vbfmlalbq_lane_f32(r, a, b, 0);
@@ -203,9 +203,9 @@ float32x4_t test_vbfmlalbq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t
203203
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
204204
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
205205
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
206-
// CHECK-NEXT: [[VBFMLALBQ_V3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
207-
// CHECK-NEXT: [[VBFMLALBQ_V4_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_V3_I]] to <16 x i8>
208-
// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_V3_I]]
206+
// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
207+
// CHECK-NEXT: [[VBFMLALBQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_F323_I]] to <16 x i8>
208+
// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_F323_I]]
209209
//
210210
float32x4_t test_vbfmlalbq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
211211
return vbfmlalbq_laneq_f32(r, a, b, 3);
@@ -232,9 +232,9 @@ float32x4_t test_vbfmlalbq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t
232232
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
233233
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
234234
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
235-
// CHECK-NEXT: [[VBFMLALTQ_V3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
236-
// CHECK-NEXT: [[VBFMLALTQ_V4_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_V3_I]] to <16 x i8>
237-
// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_V3_I]]
235+
// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
236+
// CHECK-NEXT: [[VBFMLALTQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_F323_I]] to <16 x i8>
237+
// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_F323_I]]
238238
//
239239
float32x4_t test_vbfmlaltq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) {
240240
return vbfmlaltq_lane_f32(r, a, b, 0);
@@ -261,9 +261,9 @@ float32x4_t test_vbfmlaltq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t
261261
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
262262
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
263263
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
264-
// CHECK-NEXT: [[VBFMLALTQ_V3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
265-
// CHECK-NEXT: [[VBFMLALTQ_V4_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_V3_I]] to <16 x i8>
266-
// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_V3_I]]
264+
// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
265+
// CHECK-NEXT: [[VBFMLALTQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_F323_I]] to <16 x i8>
266+
// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_F323_I]]
267267
//
268268
float32x4_t test_vbfmlaltq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
269269
return vbfmlaltq_laneq_f32(r, a, b, 3);

0 commit comments

Comments
 (0)