1
1
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2
2
// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 \
3
- // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
3
+ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg,instcombine | FileCheck %s
4
4
5
5
// REQUIRES: aarch64-registered-target || arm-registered-target
6
6
@@ -17,10 +17,8 @@ bfloat16x4_t test_vcreate_bf16(uint64_t a) {
17
17
18
18
// CHECK-LABEL: @test_vdup_n_bf16(
19
19
// CHECK-NEXT: entry:
20
- // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x bfloat> poison, bfloat [[V:%.*]], i32 0
21
- // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x bfloat> [[VECINIT_I]], bfloat [[V]], i32 1
22
- // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x bfloat> [[VECINIT1_I]], bfloat [[V]], i32 2
23
- // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x bfloat> [[VECINIT2_I]], bfloat [[V]], i32 3
20
+ // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x bfloat> poison, bfloat [[V:%.*]], i64 0
21
+ // CHECK-NEXT: [[VECINIT3_I:%.*]] = shufflevector <4 x bfloat> [[VECINIT_I]], <4 x bfloat> poison, <4 x i32> zeroinitializer
24
22
// CHECK-NEXT: ret <4 x bfloat> [[VECINIT3_I]]
25
23
//
26
24
bfloat16x4_t test_vdup_n_bf16 (bfloat16_t v ) {
@@ -29,14 +27,8 @@ bfloat16x4_t test_vdup_n_bf16(bfloat16_t v) {
29
27
30
28
// CHECK-LABEL: @test_vdupq_n_bf16(
31
29
// CHECK-NEXT: entry:
32
- // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x bfloat> poison, bfloat [[V:%.*]], i32 0
33
- // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x bfloat> [[VECINIT_I]], bfloat [[V]], i32 1
34
- // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x bfloat> [[VECINIT1_I]], bfloat [[V]], i32 2
35
- // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x bfloat> [[VECINIT2_I]], bfloat [[V]], i32 3
36
- // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x bfloat> [[VECINIT3_I]], bfloat [[V]], i32 4
37
- // CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x bfloat> [[VECINIT4_I]], bfloat [[V]], i32 5
38
- // CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x bfloat> [[VECINIT5_I]], bfloat [[V]], i32 6
39
- // CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x bfloat> [[VECINIT6_I]], bfloat [[V]], i32 7
30
+ // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x bfloat> poison, bfloat [[V:%.*]], i64 0
31
+ // CHECK-NEXT: [[VECINIT7_I:%.*]] = shufflevector <8 x bfloat> [[VECINIT_I]], <8 x bfloat> poison, <8 x i32> zeroinitializer
40
32
// CHECK-NEXT: ret <8 x bfloat> [[VECINIT7_I]]
41
33
//
42
34
bfloat16x8_t test_vdupq_n_bf16 (bfloat16_t v ) {
@@ -45,9 +37,7 @@ bfloat16x8_t test_vdupq_n_bf16(bfloat16_t v) {
45
37
46
38
// CHECK-LABEL: @test_vdup_lane_bf16(
47
39
// CHECK-NEXT: entry:
48
- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8>
49
- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat>
50
- // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
40
+ // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[V:%.*]], <4 x bfloat> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
51
41
// CHECK-NEXT: ret <4 x bfloat> [[LANE]]
52
42
//
53
43
bfloat16x4_t test_vdup_lane_bf16 (bfloat16x4_t v ) {
@@ -56,9 +46,7 @@ bfloat16x4_t test_vdup_lane_bf16(bfloat16x4_t v) {
56
46
57
47
// CHECK-LABEL: @test_vdupq_lane_bf16(
58
48
// CHECK-NEXT: entry:
59
- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8>
60
- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat>
61
- // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
49
+ // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[V:%.*]], <4 x bfloat> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
62
50
// CHECK-NEXT: ret <8 x bfloat> [[LANE]]
63
51
//
64
52
bfloat16x8_t test_vdupq_lane_bf16 (bfloat16x4_t v ) {
@@ -67,9 +55,7 @@ bfloat16x8_t test_vdupq_lane_bf16(bfloat16x4_t v) {
67
55
68
56
// CHECK-LABEL: @test_vdup_laneq_bf16(
69
57
// CHECK-NEXT: entry:
70
- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8>
71
- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat>
72
- // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
58
+ // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[V:%.*]], <8 x bfloat> poison, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
73
59
// CHECK-NEXT: ret <4 x bfloat> [[LANE]]
74
60
//
75
61
bfloat16x4_t test_vdup_laneq_bf16 (bfloat16x8_t v ) {
@@ -78,9 +64,7 @@ bfloat16x4_t test_vdup_laneq_bf16(bfloat16x8_t v) {
78
64
79
65
// CHECK-LABEL: @test_vdupq_laneq_bf16(
80
66
// CHECK-NEXT: entry:
81
- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8>
82
- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat>
83
- // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
67
+ // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[V:%.*]], <8 x bfloat> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
84
68
// CHECK-NEXT: ret <8 x bfloat> [[LANE]]
85
69
//
86
70
bfloat16x8_t test_vdupq_laneq_bf16 (bfloat16x8_t v ) {
@@ -98,7 +82,7 @@ bfloat16x8_t test_vcombine_bf16(bfloat16x4_t low, bfloat16x4_t high) {
98
82
99
83
// CHECK-LABEL: @test_vget_high_bf16(
100
84
// CHECK-NEXT: entry:
101
- // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]] , <4 x i32> <i32 4, i32 5, i32 6, i32 7>
85
+ // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> poison , <4 x i32> <i32 4, i32 5, i32 6, i32 7>
102
86
// CHECK-NEXT: ret <4 x bfloat> [[SHUFFLE_I]]
103
87
//
104
88
bfloat16x4_t test_vget_high_bf16 (bfloat16x8_t a ) {
@@ -107,7 +91,7 @@ bfloat16x4_t test_vget_high_bf16(bfloat16x8_t a) {
107
91
108
92
// CHECK-LABEL: @test_vget_low_bf16(
109
93
// CHECK-NEXT: entry:
110
- // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]] , <4 x i32> <i32 0, i32 1, i32 2, i32 3>
94
+ // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> poison , <4 x i32> <i32 0, i32 1, i32 2, i32 3>
111
95
// CHECK-NEXT: ret <4 x bfloat> [[SHUFFLE_I]]
112
96
//
113
97
bfloat16x4_t test_vget_low_bf16 (bfloat16x8_t a ) {
@@ -116,7 +100,7 @@ bfloat16x4_t test_vget_low_bf16(bfloat16x8_t a) {
116
100
117
101
// CHECK-LABEL: @test_vget_lane_bf16(
118
102
// CHECK-NEXT: entry:
119
- // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], i32 1
103
+ // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], i64 1
120
104
// CHECK-NEXT: ret bfloat [[VGET_LANE]]
121
105
//
122
106
bfloat16_t test_vget_lane_bf16 (bfloat16x4_t v ) {
@@ -125,7 +109,7 @@ bfloat16_t test_vget_lane_bf16(bfloat16x4_t v) {
125
109
126
110
// CHECK-LABEL: @test_vgetq_lane_bf16(
127
111
// CHECK-NEXT: entry:
128
- // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], i32 7
112
+ // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], i64 7
129
113
// CHECK-NEXT: ret bfloat [[VGETQ_LANE]]
130
114
//
131
115
bfloat16_t test_vgetq_lane_bf16 (bfloat16x8_t v ) {
@@ -134,7 +118,7 @@ bfloat16_t test_vgetq_lane_bf16(bfloat16x8_t v) {
134
118
135
119
// CHECK-LABEL: @test_vset_lane_bf16(
136
120
// CHECK-NEXT: entry:
137
- // CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i32 1
121
+ // CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i64 1
138
122
// CHECK-NEXT: ret <4 x bfloat> [[VSET_LANE]]
139
123
//
140
124
bfloat16x4_t test_vset_lane_bf16 (bfloat16_t a , bfloat16x4_t v ) {
@@ -143,7 +127,7 @@ bfloat16x4_t test_vset_lane_bf16(bfloat16_t a, bfloat16x4_t v) {
143
127
144
128
// CHECK-LABEL: @test_vsetq_lane_bf16(
145
129
// CHECK-NEXT: entry:
146
- // CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i32 7
130
+ // CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i64 7
147
131
// CHECK-NEXT: ret <8 x bfloat> [[VSET_LANE]]
148
132
//
149
133
bfloat16x8_t test_vsetq_lane_bf16 (bfloat16_t a , bfloat16x8_t v ) {
@@ -152,7 +136,7 @@ bfloat16x8_t test_vsetq_lane_bf16(bfloat16_t a, bfloat16x8_t v) {
152
136
153
137
// CHECK-LABEL: @test_vduph_lane_bf16(
154
138
// CHECK-NEXT: entry:
155
- // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], i32 1
139
+ // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], i64 1
156
140
// CHECK-NEXT: ret bfloat [[VGET_LANE]]
157
141
//
158
142
bfloat16_t test_vduph_lane_bf16 (bfloat16x4_t v ) {
@@ -161,7 +145,7 @@ bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) {
161
145
162
146
// CHECK-LABEL: @test_vduph_laneq_bf16(
163
147
// CHECK-NEXT: entry:
164
- // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], i32 7
148
+ // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], i64 7
165
149
// CHECK-NEXT: ret bfloat [[VGETQ_LANE]]
166
150
//
167
151
bfloat16_t test_vduph_laneq_bf16 (bfloat16x8_t v ) {
0 commit comments