File tree 1 file changed +103
-70
lines changed
1 file changed +103
-70
lines changed Original file line number Diff line number Diff line change @@ -40,13 +40,13 @@ use_a_len:
40
40
BEQZ X5, cmp_len
41
41
42
42
MOV $32 , X6
43
- BLT X5, X6, loop4_check
43
+ BLT X5, X6, check8_unaligned
44
44
45
45
// Check alignment - if alignment differs we have to do one byte at a time.
46
46
AND $7 , X10, X7
47
47
AND $7 , X12, X8
48
- BNE X7, X8, loop4_check
49
- BEQZ X7, loop32_check
48
+ BNE X7, X8, check8_unaligned
49
+ BEQZ X7, compare32
50
50
51
51
// Check one byte at a time until we reach 8 byte alignment.
52
52
SUB X7, X5, X5
@@ -59,122 +59,155 @@ align:
59
59
ADD $1 , X12
60
60
BNEZ X7, align
61
61
62
- loop32_check :
63
- MOV $32 , X7
64
- BLT X5, X7, loop16_check
65
- loop32 :
62
+ check32 :
63
+ MOV $32 , X6
64
+ BLT X5, X6, compare16
65
+ compare32 :
66
66
MOV 0 (X10), X15
67
67
MOV 0 (X12), X16
68
68
MOV 8 (X10), X17
69
69
MOV 8 (X12), X18
70
- BEQ X15, X16, loop32a
71
- JMP cmp8a
72
- loop32a:
73
- BEQ X17, X18, loop32b
74
- JMP cmp8b
75
- loop32b:
70
+ BNE X15, X16, cmp8a
71
+ BNE X17, X18, cmp8b
76
72
MOV 16 (X10), X15
77
73
MOV 16 (X12), X16
78
74
MOV 24 (X10), X17
79
75
MOV 24 (X12), X18
80
- BEQ X15, X16, loop32c
81
- JMP cmp8a
82
- loop32c:
83
- BEQ X17, X18, loop32d
84
- JMP cmp8b
85
- loop32d:
76
+ BNE X15, X16, cmp8a
77
+ BNE X17, X18, cmp8b
86
78
ADD $32 , X10
87
79
ADD $32 , X12
88
80
ADD $-32 , X5
89
- BGE X5, X7, loop32
81
+ BGE X5, X6, compare32
90
82
BEQZ X5, cmp_len
91
83
92
- loop16_check :
84
+ check16 :
93
85
MOV $16 , X6
94
- BLT X5, X6, loop4_check
95
- loop16 :
86
+ BLT X5, X6, check8_unaligned
87
+ compare16 :
96
88
MOV 0 (X10), X15
97
89
MOV 0 (X12), X16
98
90
MOV 8 (X10), X17
99
91
MOV 8 (X12), X18
100
- BEQ X15, X16, loop16a
101
- JMP cmp8a
102
- loop16a:
103
- BEQ X17, X18, loop16b
104
- JMP cmp8b
105
- loop16b:
92
+ BNE X15, X16, cmp8a
93
+ BNE X17, X18, cmp8b
106
94
ADD $16 , X10
107
95
ADD $16 , X12
108
96
ADD $-16 , X5
109
- BGE X5, X6, loop16
110
97
BEQZ X5, cmp_len
111
98
112
- loop4_check :
113
- MOV $4 , X6
114
- BLT X5, X6, loop1
115
- loop4 :
99
+ check8_unaligned :
100
+ MOV $8 , X6
101
+ BLT X5, X6, check4_unaligned
102
+ compare8_unaligned :
116
103
MOVBU 0 (X10), X8
104
+ MOVBU 1 (X10), X15
105
+ MOVBU 2 (X10), X17
106
+ MOVBU 3 (X10), X19
107
+ MOVBU 4 (X10), X21
108
+ MOVBU 5 (X10), X23
109
+ MOVBU 6 (X10), X25
110
+ MOVBU 7 (X10), X29
117
111
MOVBU 0 (X12), X9
112
+ MOVBU 1 (X12), X16
113
+ MOVBU 2 (X12), X18
114
+ MOVBU 3 (X12), X20
115
+ MOVBU 4 (X12), X22
116
+ MOVBU 5 (X12), X24
117
+ MOVBU 6 (X12), X28
118
+ MOVBU 7 (X12), X30
119
+ BNE X8, X9, cmp1a
120
+ BNE X15, X16, cmp1b
121
+ BNE X17, X18, cmp1c
122
+ BNE X19, X20, cmp1d
123
+ BNE X21, X22, cmp1e
124
+ BNE X23, X24, cmp1f
125
+ BNE X25, X28, cmp1g
126
+ BNE X29, X30, cmp1h
127
+ ADD $8 , X10
128
+ ADD $8 , X12
129
+ ADD $-8 , X5
130
+ BGE X5, X6, compare8_unaligned
131
+ BEQZ X5, cmp_len
132
+
133
+ check4_unaligned:
134
+ MOV $4 , X6
135
+ BLT X5, X6, compare1
136
+ compare4_unaligned:
137
+ MOVBU 0 (X10), X8
118
138
MOVBU 1 (X10), X15
139
+ MOVBU 2 (X10), X17
140
+ MOVBU 3 (X10), X19
141
+ MOVBU 0 (X12), X9
119
142
MOVBU 1 (X12), X16
120
- BEQ X8, X9, loop4a
121
- SLTU X9, X8, X5
122
- SLTU X8, X9, X6
123
- JMP cmp_ret
124
- loop4a:
125
- BEQ X15, X16, loop4b
126
- SLTU X16, X15, X5
127
- SLTU X15, X16, X6
128
- JMP cmp_ret
129
- loop4b:
130
- MOVBU 2 (X10), X21
131
- MOVBU 2 (X12), X22
132
- MOVBU 3 (X10), X23
133
- MOVBU 3 (X12), X24
134
- BEQ X21, X22, loop4c
135
- SLTU X22, X21, X5
136
- SLTU X21, X22, X6
137
- JMP cmp_ret
138
- loop4c:
139
- BEQ X23, X24, loop4d
140
- SLTU X24, X23, X5
141
- SLTU X23, X24, X6
142
- JMP cmp_ret
143
- loop4d:
143
+ MOVBU 2 (X12), X18
144
+ MOVBU 3 (X12), X20
145
+ BNE X8, X9, cmp1a
146
+ BNE X15, X16, cmp1b
147
+ BNE X17, X18, cmp1c
148
+ BNE X19, X20, cmp1d
144
149
ADD $4 , X10
145
150
ADD $4 , X12
146
151
ADD $-4 , X5
147
- BGE X5, X6, loop4
152
+ BGE X5, X6, compare4_unaligned
148
153
149
- loop1 :
154
+ compare1 :
150
155
BEQZ X5, cmp_len
151
156
MOVBU 0 (X10), X8
152
157
MOVBU 0 (X12), X9
153
158
BNE X8, X9, cmp
154
159
ADD $1 , X10
155
160
ADD $1 , X12
156
161
ADD $-1 , X5
157
- JMP loop1
162
+ JMP compare1
158
163
159
164
// Compare 8 bytes of memory in X15/X16 that are known to differ.
160
165
cmp8a:
161
- MOV $0xff , X19
162
- cmp8a_loop:
163
- AND X15, X19, X8
164
- AND X16, X19, X9
165
- BNE X8, X9, cmp
166
- SLLI $8 , X19
167
- JMP cmp8a_loop
166
+ MOV X15, X17
167
+ MOV X16, X18
168
168
169
169
// Compare 8 bytes of memory in X17/X18 that are known to differ.
170
170
cmp8b:
171
171
MOV $0xff , X19
172
- cmp8b_loop :
172
+ cmp8_loop :
173
173
AND X17, X19, X8
174
174
AND X18, X19, X9
175
175
BNE X8, X9, cmp
176
176
SLLI $8 , X19
177
- JMP cmp8b_loop
177
+ JMP cmp8_loop
178
+
179
+ cmp1a:
180
+ SLTU X9, X8, X5
181
+ SLTU X8, X9, X6
182
+ JMP cmp_ret
183
+ cmp1b:
184
+ SLTU X16, X15, X5
185
+ SLTU X15, X16, X6
186
+ JMP cmp_ret
187
+ cmp1c:
188
+ SLTU X18, X17, X5
189
+ SLTU X17, X18, X6
190
+ JMP cmp_ret
191
+ cmp1d:
192
+ SLTU X20, X19, X5
193
+ SLTU X19, X20, X6
194
+ JMP cmp_ret
195
+ cmp1e:
196
+ SLTU X22, X21, X5
197
+ SLTU X21, X22, X6
198
+ JMP cmp_ret
199
+ cmp1f:
200
+ SLTU X24, X23, X5
201
+ SLTU X23, X24, X6
202
+ JMP cmp_ret
203
+ cmp1g:
204
+ SLTU X28, X25, X5
205
+ SLTU X25, X28, X6
206
+ JMP cmp_ret
207
+ cmp1h:
208
+ SLTU X30, X29, X5
209
+ SLTU X29, X30, X6
210
+ JMP cmp_ret
178
211
179
212
cmp_len:
180
213
MOV X11, X8
You can’t perform that action at this time.
0 commit comments