@@ -50,4 +50,123 @@ define void @histogram_i32_literal_noscale(ptr %base, <vscale x 4 x i32> %indice
50
50
ret void
51
51
}
52
52
53
+ define void @histogram_i32_promote (ptr %base , <vscale x 2 x i64 > %indices , <vscale x 2 x i1 > %mask , i32 %inc ) #0 {
54
+ ; CHECK-LABEL: histogram_i32_promote:
55
+ ; CHECK: // %bb.0:
56
+ ; CHECK-NEXT: histcnt z1.d, p0/z, z0.d, z0.d
57
+ ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
58
+ ; CHECK-NEXT: mov z3.d, x1
59
+ ; CHECK-NEXT: ld1w { z2.d }, p0/z, [x0, z0.d, lsl #2]
60
+ ; CHECK-NEXT: ptrue p1.d
61
+ ; CHECK-NEXT: mad z1.d, p1/m, z3.d, z2.d
62
+ ; CHECK-NEXT: st1w { z1.d }, p0, [x0, z0.d, lsl #2]
63
+ ; CHECK-NEXT: ret
64
+ %buckets = getelementptr i32 , ptr %base , <vscale x 2 x i64 > %indices
65
+ call void @llvm.experimental.vector.histogram.add.nxv2p0.i32 (<vscale x 2 x ptr > %buckets , i32 %inc , <vscale x 2 x i1 > %mask )
66
+ ret void
67
+ }
68
+
69
+ define void @histogram_i16 (ptr %base , <vscale x 4 x i32 > %indices , <vscale x 4 x i1 > %mask , i16 %inc ) #0 {
70
+ ; CHECK-LABEL: histogram_i16:
71
+ ; CHECK: // %bb.0:
72
+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
73
+ ; CHECK-NEXT: mov z3.s, w1
74
+ ; CHECK-NEXT: ld1h { z2.s }, p0/z, [x0, z0.s, sxtw #1]
75
+ ; CHECK-NEXT: ptrue p1.s
76
+ ; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
77
+ ; CHECK-NEXT: st1h { z1.s }, p0, [x0, z0.s, sxtw #1]
78
+ ; CHECK-NEXT: ret
79
+ %buckets = getelementptr i16 , ptr %base , <vscale x 4 x i32 > %indices
80
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i16 (<vscale x 4 x ptr > %buckets , i16 %inc , <vscale x 4 x i1 > %mask )
81
+ ret void
82
+ }
83
+
84
+ define void @histogram_i8 (ptr %base , <vscale x 4 x i32 > %indices , <vscale x 4 x i1 > %mask , i8 %inc ) #0 {
85
+ ; CHECK-LABEL: histogram_i8:
86
+ ; CHECK: // %bb.0:
87
+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
88
+ ; CHECK-NEXT: mov z3.s, w1
89
+ ; CHECK-NEXT: ld1b { z2.s }, p0/z, [x0, z0.s, sxtw]
90
+ ; CHECK-NEXT: ptrue p1.s
91
+ ; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
92
+ ; CHECK-NEXT: st1b { z1.s }, p0, [x0, z0.s, sxtw]
93
+ ; CHECK-NEXT: ret
94
+ %buckets = getelementptr i8 , ptr %base , <vscale x 4 x i32 > %indices
95
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i8 (<vscale x 4 x ptr > %buckets , i8 %inc , <vscale x 4 x i1 > %mask )
96
+ ret void
97
+ }
98
+
99
+ define void @histogram_i16_2_lane (ptr %base , <vscale x 2 x i64 > %indices , <vscale x 2 x i1 > %mask , i16 %inc ) #0 {
100
+ ; CHECK-LABEL: histogram_i16_2_lane:
101
+ ; CHECK: // %bb.0:
102
+ ; CHECK-NEXT: histcnt z1.d, p0/z, z0.d, z0.d
103
+ ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
104
+ ; CHECK-NEXT: mov z3.d, x1
105
+ ; CHECK-NEXT: ld1h { z2.d }, p0/z, [x0, z0.d, lsl #1]
106
+ ; CHECK-NEXT: ptrue p1.d
107
+ ; CHECK-NEXT: mad z1.d, p1/m, z3.d, z2.d
108
+ ; CHECK-NEXT: st1h { z1.d }, p0, [x0, z0.d, lsl #1]
109
+ ; CHECK-NEXT: ret
110
+ %buckets = getelementptr i16 , ptr %base , <vscale x 2 x i64 > %indices
111
+ call void @llvm.experimental.vector.histogram.add.nxv2p0.i16 (<vscale x 2 x ptr > %buckets , i16 %inc , <vscale x 2 x i1 > %mask )
112
+ ret void
113
+ }
114
+
115
+ define void @histogram_i8_2_lane (ptr %base , <vscale x 2 x i64 > %indices , <vscale x 2 x i1 > %mask , i8 %inc ) #0 {
116
+ ; CHECK-LABEL: histogram_i8_2_lane:
117
+ ; CHECK: // %bb.0:
118
+ ; CHECK-NEXT: histcnt z1.d, p0/z, z0.d, z0.d
119
+ ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
120
+ ; CHECK-NEXT: mov z3.d, x1
121
+ ; CHECK-NEXT: ld1b { z2.d }, p0/z, [x0, z0.d]
122
+ ; CHECK-NEXT: ptrue p1.d
123
+ ; CHECK-NEXT: mad z1.d, p1/m, z3.d, z2.d
124
+ ; CHECK-NEXT: st1b { z1.d }, p0, [x0, z0.d]
125
+ ; CHECK-NEXT: ret
126
+ %buckets = getelementptr i8 , ptr %base , <vscale x 2 x i64 > %indices
127
+ call void @llvm.experimental.vector.histogram.add.nxv2p0.i8 (<vscale x 2 x ptr > %buckets , i8 %inc , <vscale x 2 x i1 > %mask )
128
+ ret void
129
+ }
130
+
131
+ define void @histogram_i16_literal_1 (ptr %base , <vscale x 4 x i32 > %indices , <vscale x 4 x i1 > %mask ) #0 {
132
+ ; CHECK-LABEL: histogram_i16_literal_1:
133
+ ; CHECK: // %bb.0:
134
+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
135
+ ; CHECK-NEXT: ld1h { z2.s }, p0/z, [x0, z0.s, sxtw #1]
136
+ ; CHECK-NEXT: add z1.s, z2.s, z1.s
137
+ ; CHECK-NEXT: st1h { z1.s }, p0, [x0, z0.s, sxtw #1]
138
+ ; CHECK-NEXT: ret
139
+ %buckets = getelementptr i16 , ptr %base , <vscale x 4 x i32 > %indices
140
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i16 (<vscale x 4 x ptr > %buckets , i16 1 , <vscale x 4 x i1 > %mask )
141
+ ret void
142
+ }
143
+
144
+ define void @histogram_i16_literal_2 (ptr %base , <vscale x 4 x i32 > %indices , <vscale x 4 x i1 > %mask ) #0 {
145
+ ; CHECK-LABEL: histogram_i16_literal_2:
146
+ ; CHECK: // %bb.0:
147
+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
148
+ ; CHECK-NEXT: ld1h { z2.s }, p0/z, [x0, z0.s, sxtw #1]
149
+ ; CHECK-NEXT: adr z1.s, [z2.s, z1.s, lsl #1]
150
+ ; CHECK-NEXT: st1h { z1.s }, p0, [x0, z0.s, sxtw #1]
151
+ ; CHECK-NEXT: ret
152
+ %buckets = getelementptr i16 , ptr %base , <vscale x 4 x i32 > %indices
153
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i16 (<vscale x 4 x ptr > %buckets , i16 2 , <vscale x 4 x i1 > %mask )
154
+ ret void
155
+ }
156
+
157
+ define void @histogram_i16_literal_3 (ptr %base , <vscale x 4 x i32 > %indices , <vscale x 4 x i1 > %mask ) #0 {
158
+ ; CHECK-LABEL: histogram_i16_literal_3:
159
+ ; CHECK: // %bb.0:
160
+ ; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
161
+ ; CHECK-NEXT: mov z3.s, #3 // =0x3
162
+ ; CHECK-NEXT: ld1h { z2.s }, p0/z, [x0, z0.s, sxtw #1]
163
+ ; CHECK-NEXT: ptrue p1.s
164
+ ; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
165
+ ; CHECK-NEXT: st1h { z1.s }, p0, [x0, z0.s, sxtw #1]
166
+ ; CHECK-NEXT: ret
167
+ %buckets = getelementptr i16 , ptr %base , <vscale x 4 x i32 > %indices
168
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i16 (<vscale x 4 x ptr > %buckets , i16 3 , <vscale x 4 x i1 > %mask )
169
+ ret void
170
+ }
171
+
53
172
attributes #0 = { "target-features" ="+sve2" vscale_range(1 , 16 ) }
0 commit comments