@@ -104,16 +104,13 @@ r = 1 // input n, output r
104
104
s = 2 // three temporary variables
105
105
M = 3
106
106
a = 11
107
- // Please be careful when changing this, it is pretty fragile:
108
- // 1, don't use unconditional branch as the linker is free to reorder the blocks;
109
- // 2. if a == 11, beware that the linker will use R11 if you use certain instructions.
107
+ // Be careful: R(a) == R11 will be used by the linker for synthesized instructions.
110
108
TEXT udiv<>(SB),NOSPLIT,$-4
111
109
CLZ R(q), R(s) // find normalizing shift
112
110
MOVW .S R(q)<<R(s), R(a)
113
- ADD R(a)>> 25 , PC, R(a) // most significant 7 bits of divisor
114
- MOVBU.NE (4* 36 - 64 ) (R(a )), R(a) // 36 == number of inst. between fast_udiv_tab and begin
111
+ MOVW $fast_udiv_tab<> -64 (SB), R(M)
112
+ MOVBU.NE R(a)>> 25 (R(M )), R(a) // index by most significant 7 bits of divisor
115
113
116
- begin:
117
114
SUB .S $7 , R(s)
118
115
RSB $0 , R(q), R(M) // M = -q
119
116
MOVW .PL R(a)<<R(s), R(q)
@@ -141,9 +138,7 @@ begin:
141
138
ADD .CC $1 , R(q)
142
139
ADD .PL R(M)<<1 , R(r)
143
140
ADD .PL $2 , R(q)
144
-
145
- // return, can't use RET here or fast_udiv_tab will be dropped during linking
146
- MOVW R14, R15
141
+ RET
147
142
148
143
udiv_by_large_d:
149
144
// at this point we know d>=2^(31-6)=2^25
@@ -160,20 +155,34 @@ udiv_by_large_d:
160
155
CMN R(r), R(M)
161
156
ADD .CS R(M), R(r)
162
157
ADD .CS $1 , R(q)
163
-
164
- // return, can't use RET here or fast_udiv_tab will be dropped during linking
165
- MOVW R14, R15
158
+ RET
166
159
167
160
udiv_by_0_or_1:
168
161
// carry set if d==1, carry clear if d==0
169
- MOVW .CS R(r), R(q)
170
- MOVW .CS $0 , R(r)
171
- BL.CC runtime·panicdivide(SB) // no way back
162
+ BCC udiv_by_0
163
+ MOVW R(r), R(q)
164
+ MOVW $0 , R(r)
165
+ RET
172
166
173
- // return, can't use RET here or fast_udiv_tab will be dropped during linking
174
- MOVW R14, R15
167
+ udiv_by_0:
168
+ // The ARM toolchain expects it can emit references to DIV and MOD
169
+ // instructions. The linker rewrites each pseudo-instruction into
170
+ // a sequence that pushes two values onto the stack and then calls
171
+ // _divu, _modu, _div, or _mod (below), all of which have a 16-byte
172
+ // frame plus the saved LR. The traceback routine knows the expanded
173
+ // stack frame size at the pseudo-instruction call site, but it
174
+ // doesn't know that the frame has a non-standard layout. In particular,
175
+ // it expects to find a saved LR in the bottom word of the frame.
176
+ // Unwind the stack back to the pseudo-instruction call site, copy the
177
+ // saved LR where the traceback routine will look for it, and make it
178
+ // appear that panicdivide was called from that PC.
179
+ MOVW 0 (R13), LR
180
+ ADD $20 , R13
181
+ MOVW 8 (R13), R1 // actual saved LR
182
+ MOVW R1, 0 (R13) // expected here for traceback
183
+ B runtime·panicdivide(SB)
175
184
176
- fast_udiv_tab:
185
+ TEXT fast_udiv_tab<>(SB),NOSPLIT, $-4
177
186
// var tab [64]byte
178
187
// tab[0] = 255; for i := 1; i <= 63; i++ { tab[i] = (1<<14)/(64+i) }
179
188
// laid out here as little-endian uint32s
0 commit comments