|
3 | 3 | ; rdar://12713765 |
4 | 4 | ; When realign-stack is set to false, make sure we are not creating stack |
5 | 5 | ; objects that are assumed to be 64-byte aligned. |
6 | | -@T3_retval = common global <16 x float> zeroinitializer, align 16 |
7 | 6 |
|
8 | 7 | define void @test1(<16 x float>* noalias sret(<16 x float>) %agg.result) nounwind ssp "no-realign-stack" { |
9 | | -entry: |
10 | 8 | ; CHECK-LABEL: test1: |
11 | | -; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]] |
12 | | -; CHECK: mov r[[R2:[0-9]+]], r[[R1]] |
13 | | -; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]! |
14 | | -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] |
15 | | -; CHECK: add r[[R3:[0-9]+]], r[[R1]], #32 |
16 | | -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] |
17 | | -; CHECK: add r[[R3:[0-9]+]], r[[R1]], #48 |
18 | | -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] |
19 | | -; CHECK: mov r[[R2:[0-9]+]], sp |
20 | | -; CHECK: add r[[R3:[0-9]+]], r[[R2]], #48 |
21 | | -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] |
22 | | -; CHECK: add r[[R4:[0-9]+]], r[[R2]], #32 |
23 | | -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R4]]:128] |
24 | | -; CHECK: mov r[[R5:[0-9]+]], r[[R2]] |
25 | | -; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R5]]:128]! |
26 | | -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R5]]:128] |
27 | | -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R5]]:128] |
28 | | -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] |
29 | | -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R4]]:128] |
30 | | -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] |
31 | | -; CHECK: add r[[R1:[0-9]+]], r0, #48 |
32 | | -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] |
33 | | -; CHECK: add r[[R1:[0-9]+]], r0, #32 |
34 | | -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] |
35 | | -; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]! |
36 | | -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128] |
| 9 | +; CHECK: mov r[[PTR:[0-9]+]], r{{[0-9]+}} |
| 10 | +; CHECK: mov r[[NOTALIGNED:[0-9]+]], sp |
| 11 | +; CHECK: add r[[NOTALIGNED]], r[[NOTALIGNED]], #32 |
| 12 | +; CHECK: add r[[PTR]], r[[PTR]], #32 |
| 13 | +; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[NOTALIGNED]]:128] |
| 14 | +; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[PTR]]:128] |
| 15 | +; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[PTR]]:128] |
| 16 | +; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[NOTALIGNED]]:128] |
| 17 | +entry: |
37 | 18 | %retval = alloca <16 x float>, align 64 |
38 | | - %0 = load <16 x float>, <16 x float>* @T3_retval, align 16 |
39 | | - store <16 x float> %0, <16 x float>* %retval |
40 | | - %1 = load <16 x float>, <16 x float>* %retval |
41 | | - store <16 x float> %1, <16 x float>* %agg.result, align 16 |
| 19 | + %a1 = bitcast <16 x float>* %retval to float* |
| 20 | + %a2 = getelementptr inbounds float, float* %a1, i64 8 |
| 21 | + %a3 = bitcast float* %a2 to <4 x float>* |
| 22 | + |
| 23 | + %b1 = bitcast <16 x float>* %agg.result to float* |
| 24 | + %b2 = getelementptr inbounds float, float* %b1, i64 8 |
| 25 | + %b3 = bitcast float* %b2 to <4 x float>* |
| 26 | + |
| 27 | + %0 = load <4 x float>, <4 x float>* %a3, align 16 |
| 28 | + %1 = load <4 x float>, <4 x float>* %b3, align 16 |
| 29 | + store <4 x float> %0, <4 x float>* %b3, align 16 |
| 30 | + store <4 x float> %1, <4 x float>* %a3, align 16 |
42 | 31 | ret void |
43 | 32 | } |
44 | 33 |
|
45 | 34 | define void @test2(<16 x float>* noalias sret(<16 x float>) %agg.result) nounwind ssp { |
46 | | -entry: |
47 | 35 | ; CHECK-LABEL: test2: |
48 | | -; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]] |
49 | | -; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48 |
50 | | -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] |
51 | | -; CHECK: add r[[R2:[0-9]+]], r[[R1]], #32 |
52 | | -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] |
53 | | -; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]! |
54 | | -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] |
55 | | -; CHECK: mov r[[R1:[0-9]+]], sp |
56 | | -; CHECK: orr r[[R2:[0-9]+]], r[[R1]], #16 |
57 | | -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] |
58 | | -; CHECK: mov r[[R3:[0-9]+]], #32 |
59 | | -; CHECK: mov r[[R9:[0-9]+]], r[[R1]] |
60 | | -; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R9]]:128], r[[R3]] |
61 | | -; CHECK: mov r[[R3:[0-9]+]], r[[R9]] |
62 | | -; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]! |
63 | | -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] |
64 | | -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R9]]:128] |
65 | | -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] |
66 | | -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] |
67 | | -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] |
68 | | -; CHECK: add r[[R1:[0-9]+]], r0, #48 |
69 | | -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] |
70 | | -; CHECK: add r[[R1:[0-9]+]], r0, #32 |
71 | | -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] |
72 | | -; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]! |
73 | | -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128] |
| 36 | +; CHECK: mov r[[PTR:[0-9]+]], r{{[0-9]+}} |
| 37 | +; CHECK: mov r[[ALIGNED:[0-9]+]], sp |
| 38 | +; CHECK: orr r[[ALIGNED]], r[[ALIGNED]], #32 |
| 39 | +; CHECK: add r[[PTR]], r[[PTR]], #32 |
| 40 | +; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[ALIGNED]]:128] |
| 41 | +; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[PTR]]:128] |
| 42 | +; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[PTR]]:128] |
| 43 | +; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[ALIGNED]]:128] |
| 44 | +entry: |
| 45 | + %retval = alloca <16 x float>, align 64 |
| 46 | + %a1 = bitcast <16 x float>* %retval to float* |
| 47 | + %a2 = getelementptr inbounds float, float* %a1, i64 8 |
| 48 | + %a3 = bitcast float* %a2 to <4 x float>* |
74 | 49 |
|
| 50 | + %b1 = bitcast <16 x float>* %agg.result to float* |
| 51 | + %b2 = getelementptr inbounds float, float* %b1, i64 8 |
| 52 | + %b3 = bitcast float* %b2 to <4 x float>* |
75 | 53 |
|
76 | | -%retval = alloca <16 x float>, align 64 |
77 | | - %0 = load <16 x float>, <16 x float>* @T3_retval, align 16 |
78 | | - store <16 x float> %0, <16 x float>* %retval |
79 | | - %1 = load <16 x float>, <16 x float>* %retval |
80 | | - store <16 x float> %1, <16 x float>* %agg.result, align 16 |
| 54 | + %0 = load <4 x float>, <4 x float>* %a3, align 16 |
| 55 | + %1 = load <4 x float>, <4 x float>* %b3, align 16 |
| 56 | + store <4 x float> %0, <4 x float>* %b3, align 16 |
| 57 | + store <4 x float> %1, <4 x float>* %a3, align 16 |
81 | 58 | ret void |
82 | 59 | } |
0 commit comments