24
24
25
25
package suffixarray
26
26
27
- import "sort"
27
+ import (
28
+ "sort"
29
+ )
28
30
29
- func qsufsort (data []byte ) []int {
31
+ func qsufsort32 (data []byte ) []int32 {
30
32
// initial sorting by first byte of suffix
31
- sa := sortedByFirstByte (data )
33
+ sa := sortedByFirstByte32 (data )
32
34
if len (sa ) < 2 {
33
35
return sa
34
36
}
35
37
// initialize the group lookup table
36
38
// this becomes the inverse of the suffix array when all groups are sorted
37
- inv := initGroups (sa , data )
39
+ inv := initGroups32 (sa , data )
38
40
39
41
// the index starts 1-ordered
40
- sufSortable := & suffixSortable {sa : sa , inv : inv , h : 1 }
42
+ sufSortable := & suffixSortable32 {sa : sa , inv : inv , h : 1 }
41
43
42
- for sa [0 ] > - len (sa ) { // until all suffixes are one big sorted group
44
+ for sa [0 ] > - int32 ( len (sa ) ) { // until all suffixes are one big sorted group
43
45
// The suffixes are h-ordered, make them 2*h-ordered
44
- pi := 0 // pi is first position of first group
45
- sl := 0 // sl is negated length of sorted groups
46
- for pi < len (sa ) {
46
+ pi := int32 ( 0 ) // pi is first position of first group
47
+ sl := int32 ( 0 ) // sl is negated length of sorted groups
48
+ for pi < int32 ( len (sa ) ) {
47
49
if s := sa [pi ]; s < 0 { // if pi starts sorted group
48
50
pi -= s // skip over sorted group
49
51
sl += s // add negated length to sl
@@ -67,12 +69,12 @@ func qsufsort(data []byte) []int {
67
69
}
68
70
69
71
for i := range sa { // reconstruct suffix array from inverse
70
- sa [inv [i ]] = i
72
+ sa [inv [i ]] = int32 ( i )
71
73
}
72
74
return sa
73
75
}
74
76
75
- func sortedByFirstByte (data []byte ) []int {
77
+ func sortedByFirstByte32 (data []byte ) []int32 {
76
78
// total byte counts
77
79
var count [256 ]int
78
80
for _ , b := range data {
@@ -84,20 +86,20 @@ func sortedByFirstByte(data []byte) []int {
84
86
count [b ], sum = sum , count [b ]+ sum
85
87
}
86
88
// iterate through bytes, placing index into the correct spot in sa
87
- sa := make ([]int , len (data ))
89
+ sa := make ([]int32 , len (data ))
88
90
for i , b := range data {
89
- sa [count [b ]] = i
91
+ sa [count [b ]] = int32 ( i )
90
92
count [b ]++
91
93
}
92
94
return sa
93
95
}
94
96
95
- func initGroups (sa []int , data []byte ) []int {
97
+ func initGroups32 (sa []int32 , data []byte ) []int32 {
96
98
// label contiguous same-letter groups with the same group number
97
- inv := make ([]int , len (data ))
98
- prevGroup := len (sa ) - 1
99
+ inv := make ([]int32 , len (data ))
100
+ prevGroup := int32 ( len (sa ) ) - 1
99
101
groupByte := data [sa [prevGroup ]]
100
- for i := len (sa ) - 1 ; i >= 0 ; i -- {
102
+ for i := int32 ( len (sa ) ) - 1 ; i >= 0 ; i -- {
101
103
if b := data [sa [i ]]; b < groupByte {
102
104
if prevGroup == i + 1 {
103
105
sa [i + 1 ] = - 1
@@ -114,13 +116,13 @@ func initGroups(sa []int, data []byte) []int {
114
116
// This is necessary to ensure the suffix "a" is before "aba"
115
117
// when using a potentially unstable sort.
116
118
lastByte := data [len (data )- 1 ]
117
- s := - 1
119
+ s := int32 ( - 1 )
118
120
for i := range sa {
119
121
if sa [i ] >= 0 {
120
122
if data [sa [i ]] == lastByte && s == - 1 {
121
- s = i
123
+ s = int32 ( i )
122
124
}
123
- if sa [i ] == len (sa )- 1 {
125
+ if sa [i ] == int32 ( len (sa ) )- 1 {
124
126
sa [i ], sa [s ] = sa [s ], sa [i ]
125
127
inv [sa [s ]] = s
126
128
sa [s ] = - 1 // mark it as an isolated sorted group
@@ -131,31 +133,31 @@ func initGroups(sa []int, data []byte) []int {
131
133
return inv
132
134
}
133
135
134
- type suffixSortable struct {
135
- sa []int
136
- inv []int
137
- h int
138
- buf []int // common scratch space
136
+ type suffixSortable32 struct {
137
+ sa []int32
138
+ inv []int32
139
+ h int32
140
+ buf []int32 // common scratch space
139
141
}
140
142
141
- func (x * suffixSortable ) Len () int { return len (x .sa ) }
142
- func (x * suffixSortable ) Less (i , j int ) bool { return x .inv [x .sa [i ]+ x .h ] < x .inv [x .sa [j ]+ x .h ] }
143
- func (x * suffixSortable ) Swap (i , j int ) { x .sa [i ], x .sa [j ] = x .sa [j ], x .sa [i ] }
143
+ func (x * suffixSortable32 ) Len () int { return len (x .sa ) }
144
+ func (x * suffixSortable32 ) Less (i , j int ) bool { return x .inv [x .sa [i ]+ x .h ] < x .inv [x .sa [j ]+ x .h ] }
145
+ func (x * suffixSortable32 ) Swap (i , j int ) { x .sa [i ], x .sa [j ] = x .sa [j ], x .sa [i ] }
144
146
145
- func (x * suffixSortable ) updateGroups (offset int ) {
147
+ func (x * suffixSortable32 ) updateGroups (offset int32 ) {
146
148
bounds := x .buf [0 :0 ]
147
149
group := x .inv [x .sa [0 ]+ x .h ]
148
150
for i := 1 ; i < len (x .sa ); i ++ {
149
151
if g := x .inv [x .sa [i ]+ x .h ]; g > group {
150
- bounds = append (bounds , i )
152
+ bounds = append (bounds , int32 ( i ) )
151
153
group = g
152
154
}
153
155
}
154
- bounds = append (bounds , len (x .sa ))
156
+ bounds = append (bounds , int32 ( len (x .sa ) ))
155
157
x .buf = bounds
156
158
157
159
// update the group numberings after all new groups are determined
158
- prev := 0
160
+ prev := int32 ( 0 )
159
161
for _ , b := range bounds {
160
162
for i := prev ; i < b ; i ++ {
161
163
x.inv [x.sa [i ]] = offset + b - 1
0 commit comments