1
- ; RUN: opt -passes=loop-vectorize,instcombine,simplifycfg < %s -S -o - | FileCheck %s --check-prefix=CHECK
2
- ; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-COST
1
+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|@)" --version 5
2
+ ; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize < %s -S -o - 2>%t | FileCheck %s
3
+ ; RUN: cat %t | FileCheck %s --check-prefix=CHECK-COST
3
4
; REQUIRES: asserts
4
5
5
6
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
6
7
target triple = "aarch64--linux-gnu"
7
8
8
9
; CHECK-COST-LABEL: struct_return_widen
9
- ; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { half, half } @foo(half %in_val) #0
10
+ ; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { half, half } @foo(half %in_val)
10
11
; CHECK-COST: Cost of 10 for VF 2: WIDEN-CALL ir<%call> = call @foo(ir<%in_val>) (using library function: fixed_vec_foo)
11
12
; CHECK-COST: Cost of 58 for VF 4: REPLICATE ir<%call> = call @foo(ir<%in_val>)
12
13
; CHECK-COST: Cost of 122 for VF 8: REPLICATE ir<%call> = call @foo(ir<%in_val>)
13
14
14
15
define void @struct_return_widen (ptr noalias %in , ptr noalias writeonly %out_a , ptr noalias writeonly %out_b ) {
15
16
; CHECK-LABEL: define void @struct_return_widen(
16
17
; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) {
17
- ; CHECK-NEXT: [[ENTRY:.*]]:
18
- ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
19
- ; CHECK: [[VECTOR_BODY]]:
20
- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
21
- ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[IN]], i64 [[INDEX]]
22
- ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
23
- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, ptr [[TMP0]], align 2
24
- ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x half>, ptr [[TMP1]], align 2
25
- ; CHECK-NEXT: [[TMP2:%.*]] = call { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half> [[WIDE_LOAD]])
26
- ; CHECK-NEXT: [[TMP3:%.*]] = call { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half> [[WIDE_LOAD1]])
27
- ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x half>, <2 x half> } [[TMP2]], 0
28
- ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x half>, <2 x half> } [[TMP3]], 0
29
- ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <2 x half>, <2 x half> } [[TMP2]], 1
30
- ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x half>, <2 x half> } [[TMP3]], 1
31
- ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds half, ptr [[OUT_A]], i64 [[INDEX]]
32
- ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 4
33
- ; CHECK-NEXT: store <2 x half> [[TMP4]], ptr [[TMP8]], align 2
34
- ; CHECK-NEXT: store <2 x half> [[TMP5]], ptr [[TMP9]], align 2
35
- ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds half, ptr [[OUT_B]], i64 [[INDEX]]
36
- ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP10]], i64 4
37
- ; CHECK-NEXT: store <2 x half> [[TMP6]], ptr [[TMP10]], align 2
38
- ; CHECK-NEXT: store <2 x half> [[TMP7]], ptr [[TMP11]], align 2
39
- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
40
- ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
41
- ; CHECK-NEXT: br i1 [[TMP12]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
42
- ; CHECK: [[EXIT]]:
43
- ; CHECK-NEXT: ret void
18
+ ; CHECK: [[ENTRY:.*:]]
19
+ ; CHECK: [[VECTOR_PH:.*:]]
20
+ ; CHECK: [[VECTOR_BODY:.*:]]
21
+ ; CHECK: [[TMP2:%.*]] = call { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half> [[WIDE_LOAD:%.*]])
22
+ ; CHECK: [[TMP3:%.*]] = call { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half> [[WIDE_LOAD1:%.*]])
23
+ ; CHECK: [[MIDDLE_BLOCK:.*:]]
24
+ ; CHECK: [[SCALAR_PH:.*:]]
25
+ ; CHECK: [[FOR_BODY:.*:]]
26
+ ; CHECK: [[CALL:%.*]] = tail call { half, half } @foo(half [[IN_VAL:%.*]]) #[[ATTR2:[0-9]+]]
27
+ ; CHECK: [[EXIT:.*:]]
44
28
;
45
29
entry:
46
30
br label %for.body
@@ -65,41 +49,24 @@ exit:
65
49
}
66
50
67
51
; CHECK-COST-LABEL: struct_return_replicate
68
- ; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { half, half } @foo(half %in_val) #0
52
+ ; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { half, half } @foo(half %in_val)
69
53
; CHECK-COST: Cost of 26 for VF 2: REPLICATE ir<%call> = call @foo(ir<%in_val>)
70
54
; CHECK-COST: Cost of 58 for VF 4: REPLICATE ir<%call> = call @foo(ir<%in_val>)
71
55
; CHECK-COST: Cost of 122 for VF 8: REPLICATE ir<%call> = call @foo(ir<%in_val>)
72
56
73
57
define void @struct_return_replicate (ptr noalias %in , ptr noalias writeonly %out_a , ptr noalias writeonly %out_b ) {
74
58
; CHECK-LABEL: define void @struct_return_replicate(
75
59
; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) {
76
- ; CHECK-NEXT: [[ENTRY:.*]]:
77
- ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
78
- ; CHECK: [[VECTOR_BODY]]:
79
- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
80
- ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[IN]], i64 [[INDEX]]
81
- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, ptr [[TMP0]], align 2
82
- ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[WIDE_LOAD]], i64 0
83
- ; CHECK-NEXT: [[TMP2:%.*]] = tail call { half, half } @foo(half [[TMP1]]) #[[ATTR0:[0-9]+]]
84
- ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x half> [[WIDE_LOAD]], i64 1
85
- ; CHECK-NEXT: [[TMP4:%.*]] = tail call { half, half } @foo(half [[TMP3]]) #[[ATTR0]]
86
- ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { half, half } [[TMP2]], 0
87
- ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[TMP5]], i64 0
88
- ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { half, half } [[TMP2]], 1
89
- ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x half> poison, half [[TMP7]], i64 0
90
- ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { half, half } [[TMP4]], 0
91
- ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x half> [[TMP6]], half [[TMP9]], i64 1
92
- ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { half, half } [[TMP4]], 1
93
- ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x half> [[TMP8]], half [[TMP11]], i64 1
94
- ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds half, ptr [[OUT_A]], i64 [[INDEX]]
95
- ; CHECK-NEXT: store <2 x half> [[TMP10]], ptr [[TMP13]], align 2
96
- ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds half, ptr [[OUT_B]], i64 [[INDEX]]
97
- ; CHECK-NEXT: store <2 x half> [[TMP12]], ptr [[TMP14]], align 2
98
- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
99
- ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
100
- ; CHECK-NEXT: br i1 [[TMP15]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
101
- ; CHECK: [[EXIT]]:
102
- ; CHECK-NEXT: ret void
60
+ ; CHECK: [[ENTRY:.*:]]
61
+ ; CHECK: [[VECTOR_PH:.*:]]
62
+ ; CHECK: [[VECTOR_BODY:.*:]]
63
+ ; CHECK: [[TMP4:%.*]] = tail call { half, half } @foo(half [[TMP3:%.*]]) #[[ATTR3:[0-9]+]]
64
+ ; CHECK: [[TMP6:%.*]] = tail call { half, half } @foo(half [[TMP5:%.*]]) #[[ATTR3]]
65
+ ; CHECK: [[MIDDLE_BLOCK:.*:]]
66
+ ; CHECK: [[SCALAR_PH:.*:]]
67
+ ; CHECK: [[FOR_BODY:.*:]]
68
+ ; CHECK: [[CALL:%.*]] = tail call { half, half } @foo(half [[IN_VAL:%.*]]) #[[ATTR3]]
69
+ ; CHECK: [[EXIT:.*:]]
103
70
;
104
71
entry:
105
72
br label %for.body
@@ -108,7 +75,7 @@ for.body:
108
75
%iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
109
76
%arrayidx = getelementptr inbounds half , ptr %in , i64 %iv
110
77
%in_val = load half , ptr %arrayidx , align 2
111
- ; #3 does not have a fixed-size vector mapping (so replication is used)
78
+ ; #1 does not have a fixed-size vector mapping (so replication is used)
112
79
%call = tail call { half , half } @foo (half %in_val ) #1
113
80
%extract_a = extractvalue { half , half } %call , 0
114
81
%extract_b = extractvalue { half , half } %call , 1
@@ -124,10 +91,64 @@ exit:
124
91
ret void
125
92
}
126
93
94
+ ; CHECK-COST-LABEL: struct_return_scalable
95
+ ; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { half, half } @foo(half %in_val)
96
+ ; CHECK-COST: Cost of 26 for VF 2: REPLICATE ir<%call> = call @foo(ir<%in_val>)
97
+ ; CHECK-COST: Cost of 58 for VF 4: REPLICATE ir<%call> = call @foo(ir<%in_val>)
98
+ ; CHECK-COST: Cost of 122 for VF 8: REPLICATE ir<%call> = call @foo(ir<%in_val>)
99
+ ; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @foo(ir<%in_val>)
100
+ ; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @foo(ir<%in_val>)
101
+ ; CHECK-COST: Cost of Invalid for VF vscale x 4: REPLICATE ir<%call> = call @foo(ir<%in_val>)
102
+ ; CHECK-COST: Cost of 10 for VF vscale x 8: WIDEN-CALL ir<%call> = call @foo(ir<%in_val>, ir<true>) (using library function: scalable_vec_masked_foo)
103
+
104
+ define void @struct_return_scalable (ptr noalias %in , ptr noalias writeonly %out_a , ptr noalias writeonly %out_b ) #2 {
105
+ ; CHECK-LABEL: define void @struct_return_scalable(
106
+ ; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] {
107
+ ; CHECK: [[ENTRY:.*:]]
108
+ ; CHECK: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
109
+ ; CHECK: [[VECTOR_PH:.*:]]
110
+ ; CHECK: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
111
+ ; CHECK: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
112
+ ; CHECK: [[VECTOR_BODY:.*:]]
113
+ ; CHECK: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
114
+ ; CHECK: [[TMP12:%.*]] = call { <vscale x 8 x half>, <vscale x 8 x half> } @scalable_vec_masked_foo(<vscale x 8 x half> [[WIDE_LOAD:%.*]], <vscale x 8 x i1> splat (i1 true))
115
+ ; CHECK: [[TMP13:%.*]] = call { <vscale x 8 x half>, <vscale x 8 x half> } @scalable_vec_masked_foo(<vscale x 8 x half> [[WIDE_LOAD1:%.*]], <vscale x 8 x i1> splat (i1 true))
116
+ ; CHECK: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
117
+ ; CHECK: [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
118
+ ; CHECK: [[MIDDLE_BLOCK:.*:]]
119
+ ; CHECK: [[SCALAR_PH:.*:]]
120
+ ; CHECK: [[FOR_BODY:.*:]]
121
+ ; CHECK: [[CALL:%.*]] = tail call { half, half } @foo(half [[IN_VAL:%.*]]) #[[ATTR3]]
122
+ ; CHECK: [[EXIT:.*:]]
123
+ ;
124
+ entry:
125
+ br label %for.body
126
+
127
+ for.body:
128
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
129
+ %arrayidx = getelementptr inbounds half , ptr %in , i64 %iv
130
+ %in_val = load half , ptr %arrayidx , align 2
131
+ %call = tail call { half , half } @foo (half %in_val ) #1
132
+ %extract_a = extractvalue { half , half } %call , 0
133
+ %extract_b = extractvalue { half , half } %call , 1
134
+ %arrayidx2 = getelementptr inbounds half , ptr %out_a , i64 %iv
135
+ store half %extract_a , ptr %arrayidx2 , align 2
136
+ %arrayidx4 = getelementptr inbounds half , ptr %out_b , i64 %iv
137
+ store half %extract_b , ptr %arrayidx4 , align 2
138
+ %iv.next = add nuw nsw i64 %iv , 1
139
+ %exitcond.not = icmp eq i64 %iv.next , 1024
140
+ br i1 %exitcond.not , label %exit , label %for.body
141
+
142
+ exit:
143
+ ret void
144
+ }
145
+
146
+
127
147
declare { half , half } @foo (half )
128
148
129
149
declare { <2 x half >, <2 x half > } @fixed_vec_foo (<2 x half >)
130
- declare { <vscale x 4 x half >, <vscale x 4 x half > } @scalable_vec_masked_foo (<vscale x 4 x half >, <vscale x 4 x i1 >)
150
+ declare { <vscale x 8 x half >, <vscale x 8 x half > } @scalable_vec_masked_foo (<vscale x 8 x half >, <vscale x 8 x i1 >)
131
151
132
152
attributes #0 = { nounwind "vector-function-abi-variant" ="_ZGVnN2v_foo(fixed_vec_foo)" }
133
153
attributes #1 = { nounwind "vector-function-abi-variant" ="_ZGVsMxv_foo(scalable_vec_masked_foo)" }
154
+ attributes #2 = { "target-features" ="+sve" }
0 commit comments