File tree 3 files changed +187
-2
lines changed 3 files changed +187
-2
lines changed Original file line number Diff line number Diff line change 2
2
// Use of this source code is governed by a BSD-style
3
3
// license that can be found in the LICENSE file.
4
4
5
- //go:build !386 && !amd64 && !s390x && !arm && !arm64 && !ppc64 && !ppc64le && !mips && !mipsle && !wasm && !mips64 && !mips64le
5
+ //go:build !386 && !amd64 && !s390x && !arm && !arm64 && !ppc64 && !ppc64le && !mips && !mipsle && !wasm && !mips64 && !mips64le && !riscv64
6
6
7
7
package bytealg
8
8
Original file line number Diff line number Diff line change 2
2
// Use of this source code is governed by a BSD-style
3
3
// license that can be found in the LICENSE file.
4
4
5
- //go:build 386 || amd64 || s390x || arm || arm64 || ppc64 || ppc64le || mips || mipsle || wasm || mips64 || mips64le
5
+ //go:build 386 || amd64 || s390x || arm || arm64 || ppc64 || ppc64le || mips || mipsle || wasm || mips64 || mips64le || riscv64
6
6
7
7
package bytealg
8
8
Original file line number Diff line number Diff line change
1
+ // Copyright 2022 The Go Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #include "go_asm.h"
6
+ #include "textflag.h"
7
+
8
+ TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0 -56
9
+ MOV a_base+0 (FP), X5
10
+ MOV a_len+8 (FP), X6
11
+ MOV b_base+24 (FP), X7
12
+ MOV b_len+32 (FP), X8
13
+ MOV $ret +48 (FP), X9
14
+ JMP compare<>(SB)
15
+
16
+ TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0 -40
17
+ MOV a_base+0 (FP), X5
18
+ MOV a_len+8 (FP), X6
19
+ MOV b_base+16 (FP), X7
20
+ MOV b_len+24 (FP), X8
21
+ MOV $ret +32 (FP), X9
22
+ JMP compare<>(SB)
23
+
24
+ // On entry:
25
+ // X5 points to start of a
26
+ // X6 length of a
27
+ // X7 points to start of b
28
+ // X8 length of b
29
+ // X9 points to the address to store the return value (-1/0/1)
30
+ TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
31
+ BEQ X5, X7, cmp_len
32
+
33
+ MOV X6, X10
34
+ BGE X8, X10, use_a_len // X10 = min(len(a), len(b))
35
+ MOV X8, X10
36
+ use_a_len:
37
+ BEQZ X10, cmp_len
38
+
39
+ MOV $32 , X11
40
+ BLT X10, X11, loop4_check
41
+
42
+ // Check alignment - if alignment differs we have to do one byte at a time.
43
+ AND $3 , X5, X12
44
+ AND $3 , X7, X13
45
+ BNE X12, X13, loop4_check
46
+ BEQZ X12, loop32_check
47
+
48
+ // Check one byte at a time until we reach 8 byte alignment.
49
+ SUB X12, X10, X10
50
+ align:
51
+ ADD $-1 , X12
52
+ MOVBU 0 (X5), X13
53
+ MOVBU 0 (X7), X14
54
+ BNE X13, X14, cmp
55
+ ADD $1 , X5
56
+ ADD $1 , X7
57
+ BNEZ X12, align
58
+
59
+ loop32_check:
60
+ MOV $32 , X12
61
+ BLT X10, X12, loop16_check
62
+ loop32:
63
+ MOV 0 (X5), X15
64
+ MOV 0 (X7), X16
65
+ MOV 8 (X5), X17
66
+ MOV 8 (X7), X18
67
+ BEQ X15, X16, loop32a
68
+ JMP cmp8a
69
+ loop32a:
70
+ BEQ X17, X18, loop32b
71
+ JMP cmp8b
72
+ loop32b:
73
+ MOV 16 (X5), X15
74
+ MOV 16 (X7), X16
75
+ MOV 24 (X5), X17
76
+ MOV 24 (X7), X18
77
+ BEQ X15, X16, loop32c
78
+ JMP cmp8a
79
+ loop32c:
80
+ BEQ X17, X18, loop32d
81
+ JMP cmp8b
82
+ loop32d:
83
+ ADD $32 , X5
84
+ ADD $32 , X7
85
+ ADD $-32 , X10
86
+ BGE X10, X12, loop32
87
+ BEQZ X10, cmp_len
88
+
89
+ loop16_check:
90
+ MOV $16 , X11
91
+ BLT X10, X11, loop4_check
92
+ loop16:
93
+ MOV 0 (X5), X15
94
+ MOV 0 (X7), X16
95
+ MOV 8 (X5), X17
96
+ MOV 8 (X7), X18
97
+ BEQ X15, X16, loop16a
98
+ JMP cmp8a
99
+ loop16a:
100
+ BEQ X17, X18, loop16b
101
+ JMP cmp8b
102
+ loop16b:
103
+ ADD $16 , X5
104
+ ADD $16 , X7
105
+ ADD $-16 , X10
106
+ BGE X10, X11, loop16
107
+ BEQZ X10, cmp_len
108
+
109
+ loop4_check:
110
+ MOV $4 , X11
111
+ BLT X10, X11, loop1
112
+ loop4:
113
+ MOVBU 0 (X5), X13
114
+ MOVBU 0 (X7), X14
115
+ MOVBU 1 (X5), X15
116
+ MOVBU 1 (X7), X16
117
+ BEQ X13, X14, loop4a
118
+ SLTU X14, X13, X10
119
+ SLTU X13, X14, X11
120
+ JMP cmp_ret
121
+ loop4a:
122
+ BEQ X15, X16, loop4b
123
+ SLTU X16, X15, X10
124
+ SLTU X15, X16, X11
125
+ JMP cmp_ret
126
+ loop4b:
127
+ MOVBU 2 (X5), X21
128
+ MOVBU 2 (X7), X22
129
+ MOVBU 3 (X5), X23
130
+ MOVBU 3 (X7), X24
131
+ BEQ X21, X22, loop4c
132
+ SLTU X22, X21, X10
133
+ SLTU X21, X22, X11
134
+ JMP cmp_ret
135
+ loop4c:
136
+ BEQ X23, X24, loop4d
137
+ SLTU X24, X23, X10
138
+ SLTU X23, X24, X11
139
+ JMP cmp_ret
140
+ loop4d:
141
+ ADD $4 , X5
142
+ ADD $4 , X7
143
+ ADD $-4 , X10
144
+ BGE X10, X11, loop4
145
+
146
+ loop1:
147
+ BEQZ X10, cmp_len
148
+ MOVBU 0 (X5), X13
149
+ MOVBU 0 (X7), X14
150
+ BNE X13, X14, cmp
151
+ ADD $1 , X5
152
+ ADD $1 , X7
153
+ ADD $-1 , X10
154
+ JMP loop1
155
+
156
+ // Compare 8 bytes of memory in X15/X16 that are known to differ.
157
+ cmp8a:
158
+ MOV $0xff , X19
159
+ cmp8a_loop:
160
+ AND X15, X19, X13
161
+ AND X16, X19, X14
162
+ BNE X13, X14, cmp
163
+ SLLI $8 , X19
164
+ JMP cmp8a_loop
165
+
166
+ // Compare 8 bytes of memory in X17/X18 that are known to differ.
167
+ cmp8b:
168
+ MOV $0xff , X19
169
+ cmp8b_loop:
170
+ AND X17, X19, X13
171
+ AND X18, X19, X14
172
+ BNE X13, X14, cmp
173
+ SLLI $8 , X19
174
+ JMP cmp8b_loop
175
+
176
+ cmp_len:
177
+ MOV X6, X13
178
+ MOV X8, X14
179
+ cmp:
180
+ SLTU X14, X13, X10
181
+ SLTU X13, X14, X11
182
+ cmp_ret:
183
+ SUB X10, X11, X12
184
+ MOV X12, (X9)
185
+ RET
You can’t perform that action at this time.
0 commit comments