|
92 | 92 | RET
|
93 | 93 |
|
94 | 94 | TEXT ·subVV(SB),NOSPLIT,$0
|
95 |
| - JMP ·subVV_g(SB) |
| 95 | + MOV x+24(FP), X5 |
| 96 | + MOV y+48(FP), X6 |
| 97 | + MOV z+0(FP), X7 |
| 98 | + MOV z_len+8(FP), X30 |
| 99 | + |
| 100 | + MOV $4, X28 |
| 101 | + MOV $0, X29 // b = 0 |
| 102 | + |
| 103 | + BEQZ X30, done |
| 104 | + BLTU X30, X28, loop1 |
| 105 | + |
| 106 | +loop4: |
| 107 | + MOV 0(X5), X8 // x[0] |
| 108 | + MOV 0(X6), X9 // y[0] |
| 109 | + MOV 8(X5), X11 // x[1] |
| 110 | + MOV 8(X6), X12 // y[1] |
| 111 | + MOV 16(X5), X14 // x[2] |
| 112 | + MOV 16(X6), X15 // y[2] |
| 113 | + MOV 24(X5), X17 // x[3] |
| 114 | + MOV 24(X6), X18 // y[3] |
| 115 | + |
| 116 | + SUB X9, X8, X21 // z[0] = x[0] - y[0] |
| 117 | + SLTU X21, X8, X22 |
| 118 | + SUB X29, X21, X10 // z[0] = x[0] - y[0] - b |
| 119 | + SLTU X10, X21, X23 |
| 120 | + ADD X22, X23, X29 // next b |
| 121 | + |
| 122 | + SUB X12, X11, X24 // z[1] = x[1] - y[1] |
| 123 | + SLTU X24, X11, X25 |
| 124 | + SUB X29, X24, X13 // z[1] = x[1] - y[1] - b |
| 125 | + SLTU X13, X24, X26 |
| 126 | + ADD X25, X26, X29 // next b |
| 127 | + |
| 128 | + SUB X15, X14, X21 // z[2] = x[2] - y[2] |
| 129 | + SLTU X21, X14, X22 |
| 130 | + SUB X29, X21, X16 // z[2] = x[2] - y[2] - b |
| 131 | + SLTU X16, X21, X23 |
| 132 | + ADD X22, X23, X29 // next b |
| 133 | + |
| 134 | + SUB X18, X17, X21 // z[3] = x[3] - y[3] |
| 135 | + SLTU X21, X17, X22 |
| 136 | + SUB X29, X21, X19 // z[3] = x[3] - y[3] - b |
| 137 | + SLTU X19, X21, X23 |
| 138 | + ADD X22, X23, X29 // next b |
| 139 | + |
| 140 | + MOV X10, 0(X7) // z[0] |
| 141 | + MOV X13, 8(X7) // z[1] |
| 142 | + MOV X16, 16(X7) // z[2] |
| 143 | + MOV X19, 24(X7) // z[3] |
| 144 | + |
| 145 | + ADD $32, X5 |
| 146 | + ADD $32, X6 |
| 147 | + ADD $32, X7 |
| 148 | + SUB $4, X30 |
| 149 | + |
| 150 | + BGEU X30, X28, loop4 |
| 151 | + BEQZ X30, done |
| 152 | + |
| 153 | +loop1: |
| 154 | + MOV 0(X5), X10 // x |
| 155 | + MOV 0(X6), X11 // y |
| 156 | + |
| 157 | + SUB X11, X10, X12 // z = x - y |
| 158 | + SLTU X12, X10, X14 |
| 159 | + SUB X29, X12, X13 // z = x - y - b |
| 160 | + SLTU X13, X12, X15 |
| 161 | + ADD X14, X15, X29 // next b |
| 162 | + |
| 163 | + MOV X13, 0(X7) // z |
| 164 | + |
| 165 | + ADD $8, X5 |
| 166 | + ADD $8, X6 |
| 167 | + ADD $8, X7 |
| 168 | + SUB $1, X30 |
| 169 | + |
| 170 | + BNEZ X30, loop1 |
| 171 | + |
| 172 | +done: |
| 173 | + MOV X29, c+72(FP) // return b |
| 174 | + RET |
96 | 175 |
|
97 | 176 | TEXT ·addVW(SB),NOSPLIT,$0
|
98 | 177 | JMP ·addVW_g(SB)
|
|
0 commit comments