Skip to content

Commit 11da2b2

Browse files
dr2chasemknyszek
authored andcommitted
runtime: copy some functions from math/bits to runtime/internal/sys
CL 201765 activated calls from the runtime to functions in math/bits. When coverage and race detection were simultaneously enabled, this caused a crash when the covered+race-checked code in math/bits was called from the runtime before there was even a P. PS Win for gdlv in helping sort this out. TODO - next CL intrinsifies the new functions in runtime/internal/sys TODO/Would-be-nice - Ctz64 and TrailingZeros64 are the same function; 386.s is intrinsified; clean all that up. Fixes #35461. Updates #35112. Change-Id: I750a54dba493130ad3e68a06530ede7687d41e1d Reviewed-on: https://go-review.googlesource.com/c/go/+/206199 Reviewed-by: Michael Knyszek <[email protected]> Run-TryBot: Michael Knyszek <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent 42db1da commit 11da2b2

File tree

7 files changed

+232
-106
lines changed

7 files changed

+232
-106
lines changed

src/go/build/deps_test.go

Lines changed: 47 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@ var pkgDeps = map[string][]string{
3636
// L0 is the lowest level, core, nearly unavoidable packages.
3737
"errors": {"runtime", "internal/reflectlite"},
3838
"io": {"errors", "sync", "sync/atomic"},
39-
"math/bits": {"unsafe"},
40-
"runtime": {"math/bits", "unsafe", "runtime/internal/atomic", "runtime/internal/sys", "runtime/internal/math", "internal/cpu", "internal/bytealg"},
39+
"runtime": {"unsafe", "runtime/internal/atomic", "runtime/internal/sys", "runtime/internal/math", "internal/cpu", "internal/bytealg"},
4140
"runtime/internal/sys": {},
4241
"runtime/internal/atomic": {"unsafe", "internal/cpu"},
4342
"runtime/internal/math": {"runtime/internal/sys"},
@@ -65,6 +64,7 @@ var pkgDeps = map[string][]string{
6564
// L1 adds simple functions and strings processing,
6665
// but not Unicode tables.
6766
"math": {"internal/cpu", "unsafe", "math/bits"},
67+
"math/bits": {"unsafe"},
6868
"math/cmplx": {"math"},
6969
"math/rand": {"L0", "math"},
7070
"strconv": {"L0", "unicode/utf8", "math", "math/bits"},
@@ -243,51 +243,51 @@ var pkgDeps = map[string][]string{
243243
"go/types": {"L4", "GOPARSER", "container/heap", "go/constant"},
244244

245245
// One of a kind.
246-
"archive/tar": {"L4", "OS", "syscall", "os/user"},
247-
"archive/zip": {"L4", "OS", "compress/flate"},
248-
"container/heap": {"sort"},
249-
"compress/bzip2": {"L4"},
250-
"compress/flate": {"L4"},
251-
"compress/gzip": {"L4", "compress/flate"},
252-
"compress/lzw": {"L4"},
253-
"compress/zlib": {"L4", "compress/flate"},
254-
"context": {"errors", "internal/reflectlite", "sync", "sync/atomic", "time"},
255-
"database/sql": {"L4", "container/list", "context", "database/sql/driver", "database/sql/internal"},
256-
"database/sql/driver": {"L4", "context", "time", "database/sql/internal"},
257-
"debug/dwarf": {"L4"},
258-
"debug/elf": {"L4", "OS", "debug/dwarf", "compress/zlib"},
259-
"debug/gosym": {"L4"},
260-
"debug/macho": {"L4", "OS", "debug/dwarf", "compress/zlib"},
261-
"debug/pe": {"L4", "OS", "debug/dwarf", "compress/zlib"},
262-
"debug/plan9obj": {"L4", "OS"},
263-
"encoding": {"L4"},
264-
"encoding/ascii85": {"L4"},
265-
"encoding/asn1": {"L4", "math/big"},
266-
"encoding/csv": {"L4"},
267-
"encoding/gob": {"L4", "OS", "encoding"},
268-
"encoding/hex": {"L4"},
269-
"encoding/json": {"L4", "encoding"},
270-
"encoding/pem": {"L4"},
271-
"encoding/xml": {"L4", "encoding"},
272-
"flag": {"L4", "OS"},
273-
"go/build": {"L4", "OS", "GOPARSER", "internal/goroot", "internal/goversion"},
274-
"html": {"L4"},
275-
"image/draw": {"L4", "image/internal/imageutil"},
276-
"image/gif": {"L4", "compress/lzw", "image/color/palette", "image/draw"},
277-
"image/internal/imageutil": {"L4"},
278-
"image/jpeg": {"L4", "image/internal/imageutil"},
279-
"image/png": {"L4", "compress/zlib"},
280-
"index/suffixarray": {"L4", "regexp"},
281-
"internal/goroot": {"L4", "OS"},
282-
"internal/singleflight": {"sync"},
283-
"internal/trace": {"L4", "OS", "container/heap"},
284-
"internal/xcoff": {"L4", "OS", "debug/dwarf"},
285-
"math/big": {"L4"},
286-
"mime": {"L4", "OS", "syscall", "internal/syscall/windows/registry"},
287-
"mime/quotedprintable": {"L4"},
288-
"net/internal/socktest": {"L4", "OS", "syscall", "internal/syscall/windows"},
289-
"net/url": {"L4"},
290-
"plugin": {"L0", "OS", "CGO"},
246+
"archive/tar": {"L4", "OS", "syscall", "os/user"},
247+
"archive/zip": {"L4", "OS", "compress/flate"},
248+
"container/heap": {"sort"},
249+
"compress/bzip2": {"L4"},
250+
"compress/flate": {"L4"},
251+
"compress/gzip": {"L4", "compress/flate"},
252+
"compress/lzw": {"L4"},
253+
"compress/zlib": {"L4", "compress/flate"},
254+
"context": {"errors", "internal/reflectlite", "sync", "sync/atomic", "time"},
255+
"database/sql": {"L4", "container/list", "context", "database/sql/driver", "database/sql/internal"},
256+
"database/sql/driver": {"L4", "context", "time", "database/sql/internal"},
257+
"debug/dwarf": {"L4"},
258+
"debug/elf": {"L4", "OS", "debug/dwarf", "compress/zlib"},
259+
"debug/gosym": {"L4"},
260+
"debug/macho": {"L4", "OS", "debug/dwarf", "compress/zlib"},
261+
"debug/pe": {"L4", "OS", "debug/dwarf", "compress/zlib"},
262+
"debug/plan9obj": {"L4", "OS"},
263+
"encoding": {"L4"},
264+
"encoding/ascii85": {"L4"},
265+
"encoding/asn1": {"L4", "math/big"},
266+
"encoding/csv": {"L4"},
267+
"encoding/gob": {"L4", "OS", "encoding"},
268+
"encoding/hex": {"L4"},
269+
"encoding/json": {"L4", "encoding"},
270+
"encoding/pem": {"L4"},
271+
"encoding/xml": {"L4", "encoding"},
272+
"flag": {"L4", "OS"},
273+
"go/build": {"L4", "OS", "GOPARSER", "internal/goroot", "internal/goversion"},
274+
"html": {"L4"},
275+
"image/draw": {"L4", "image/internal/imageutil"},
276+
"image/gif": {"L4", "compress/lzw", "image/color/palette", "image/draw"},
277+
"image/internal/imageutil": {"L4"},
278+
"image/jpeg": {"L4", "image/internal/imageutil"},
279+
"image/png": {"L4", "compress/zlib"},
280+
"index/suffixarray": {"L4", "regexp"},
281+
"internal/goroot": {"L4", "OS"},
282+
"internal/singleflight": {"sync"},
283+
"internal/trace": {"L4", "OS", "container/heap"},
284+
"internal/xcoff": {"L4", "OS", "debug/dwarf"},
285+
"math/big": {"L4"},
286+
"mime": {"L4", "OS", "syscall", "internal/syscall/windows/registry"},
287+
"mime/quotedprintable": {"L4"},
288+
"net/internal/socktest": {"L4", "OS", "syscall", "internal/syscall/windows"},
289+
"net/url": {"L4"},
290+
"plugin": {"L0", "OS", "CGO"},
291291
"runtime/pprof/internal/profile": {"L4", "OS", "compress/gzip", "regexp"},
292292
"testing/internal/testdeps": {"L4", "internal/testlog", "runtime/pprof", "regexp"},
293293
"text/scanner": {"L4", "OS"},

src/runtime/export_test.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
package runtime
88

99
import (
10-
"math/bits"
1110
"runtime/internal/atomic"
1211
"runtime/internal/sys"
1312
"unsafe"
@@ -360,7 +359,7 @@ func ReadMemStatsSlow() (base, slow MemStats) {
360359
slow.HeapReleased += uint64(pg) * pageSize
361360
}
362361
for _, p := range allp {
363-
pg := bits.OnesCount64(p.pcache.scav)
362+
pg := sys.OnesCount64(p.pcache.scav)
364363
slow.HeapReleased += uint64(pg) * pageSize
365364
}
366365

@@ -894,7 +893,7 @@ func PageCachePagesLeaked() (leaked uintptr) {
894893
// Since we're going past len(allp) we may see nil Ps.
895894
// Just ignore them.
896895
if p != nil {
897-
leaked += uintptr(bits.OnesCount64(p.pcache.cache))
896+
leaked += uintptr(sys.OnesCount64(p.pcache.cache))
898897
}
899898
}
900899

src/runtime/internal/sys/intrinsics.go

Lines changed: 15 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,16 @@
44

55
// +build !386
66

7+
// TODO finish intrinsifying 386, deadcode the assembly, remove build tags, merge w/ intrinsics_common
8+
// TODO replace all uses of CtzXX with TrailingZerosXX; they are the same.
9+
710
package sys
811

912
// Using techniques from http://supertech.csail.mit.edu/papers/debruijn.pdf
1013

11-
const deBruijn64 = 0x0218a392cd3d5dbf
14+
const deBruijn64ctz = 0x0218a392cd3d5dbf
1215

13-
var deBruijnIdx64 = [64]byte{
16+
var deBruijnIdx64ctz = [64]byte{
1417
0, 1, 2, 7, 3, 13, 8, 19,
1518
4, 25, 14, 28, 9, 34, 20, 40,
1619
5, 17, 26, 38, 15, 46, 29, 48,
@@ -21,9 +24,9 @@ var deBruijnIdx64 = [64]byte{
2124
61, 22, 43, 51, 60, 42, 59, 58,
2225
}
2326

24-
const deBruijn32 = 0x04653adf
27+
const deBruijn32ctz = 0x04653adf
2528

26-
var deBruijnIdx32 = [32]byte{
29+
var deBruijnIdx32ctz = [32]byte{
2730
0, 1, 2, 6, 3, 11, 7, 16,
2831
4, 14, 12, 21, 8, 23, 17, 26,
2932
31, 5, 10, 15, 13, 20, 22, 25,
@@ -33,20 +36,20 @@ var deBruijnIdx32 = [32]byte{
3336
// Ctz64 counts trailing (low-order) zeroes,
3437
// and if all are zero, then 64.
3538
func Ctz64(x uint64) int {
36-
x &= -x // isolate low-order bit
37-
y := x * deBruijn64 >> 58 // extract part of deBruijn sequence
38-
i := int(deBruijnIdx64[y]) // convert to bit index
39-
z := int((x - 1) >> 57 & 64) // adjustment if zero
39+
x &= -x // isolate low-order bit
40+
y := x * deBruijn64ctz >> 58 // extract part of deBruijn sequence
41+
i := int(deBruijnIdx64ctz[y]) // convert to bit index
42+
z := int((x - 1) >> 57 & 64) // adjustment if zero
4043
return i + z
4144
}
4245

4346
// Ctz32 counts trailing (low-order) zeroes,
4447
// and if all are zero, then 32.
4548
func Ctz32(x uint32) int {
46-
x &= -x // isolate low-order bit
47-
y := x * deBruijn32 >> 27 // extract part of deBruijn sequence
48-
i := int(deBruijnIdx32[y]) // convert to bit index
49-
z := int((x - 1) >> 26 & 32) // adjustment if zero
49+
x &= -x // isolate low-order bit
50+
y := x * deBruijn32ctz >> 27 // extract part of deBruijn sequence
51+
i := int(deBruijnIdx32ctz[y]) // convert to bit index
52+
z := int((x - 1) >> 26 & 32) // adjustment if zero
5053
return i + z
5154
}
5255

@@ -55,25 +58,6 @@ func Ctz8(x uint8) int {
5558
return int(ntz8tab[x])
5659
}
5760

58-
var ntz8tab = [256]uint8{
59-
0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
60-
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
61-
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
62-
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
63-
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
64-
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
65-
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
66-
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
67-
0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
68-
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
69-
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
70-
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
71-
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
72-
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
73-
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
74-
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
75-
}
76-
7761
// Bswap64 returns its input with byte order reversed
7862
// 0x0102030405060708 -> 0x0807060504030201
7963
func Bswap64(x uint64) uint64 {
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
// Copyright 2019 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package sys
6+
7+
// Copied from math/bits to avoid dependence.
8+
9+
var len8tab = [256]uint8{
10+
0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
11+
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
12+
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
13+
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
14+
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
15+
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
16+
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
17+
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
18+
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
19+
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
20+
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
21+
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
22+
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
23+
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
24+
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
25+
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
26+
}
27+
28+
var ntz8tab = [256]uint8{
29+
0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
30+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
31+
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
32+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
33+
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
34+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
35+
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
36+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
37+
0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
38+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
39+
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
40+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
41+
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
42+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
43+
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
44+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
45+
}
46+
47+
// len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
48+
func Len64(x uint64) (n int) {
49+
if x >= 1<<32 {
50+
x >>= 32
51+
n = 32
52+
}
53+
if x >= 1<<16 {
54+
x >>= 16
55+
n += 16
56+
}
57+
if x >= 1<<8 {
58+
x >>= 8
59+
n += 8
60+
}
61+
return n + int(len8tab[x])
62+
}
63+
64+
// --- OnesCount ---
65+
66+
const m0 = 0x5555555555555555 // 01010101 ...
67+
const m1 = 0x3333333333333333 // 00110011 ...
68+
const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
69+
70+
// OnesCount64 returns the number of one bits ("population count") in x.
71+
func OnesCount64(x uint64) int {
72+
// Implementation: Parallel summing of adjacent bits.
73+
// See "Hacker's Delight", Chap. 5: Counting Bits.
74+
// The following pattern shows the general approach:
75+
//
76+
// x = x>>1&(m0&m) + x&(m0&m)
77+
// x = x>>2&(m1&m) + x&(m1&m)
78+
// x = x>>4&(m2&m) + x&(m2&m)
79+
// x = x>>8&(m3&m) + x&(m3&m)
80+
// x = x>>16&(m4&m) + x&(m4&m)
81+
// x = x>>32&(m5&m) + x&(m5&m)
82+
// return int(x)
83+
//
84+
// Masking (& operations) can be left away when there's no
85+
// danger that a field's sum will carry over into the next
86+
// field: Since the result cannot be > 64, 8 bits is enough
87+
// and we can ignore the masks for the shifts by 8 and up.
88+
// Per "Hacker's Delight", the first line can be simplified
89+
// more, but it saves at best one instruction, so we leave
90+
// it alone for clarity.
91+
const m = 1<<64 - 1
92+
x = x>>1&(m0&m) + x&(m0&m)
93+
x = x>>2&(m1&m) + x&(m1&m)
94+
x = (x>>4 + x) & (m2 & m)
95+
x += x >> 8
96+
x += x >> 16
97+
x += x >> 32
98+
return int(x) & (1<<7 - 1)
99+
}
100+
101+
var deBruijn64tab = [64]byte{
102+
0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
103+
62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
104+
63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
105+
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
106+
}
107+
108+
const deBruijn64 = 0x03f79d71b4ca8b09
109+
110+
// TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
111+
func TrailingZeros64(x uint64) int {
112+
if x == 0 {
113+
return 64
114+
}
115+
// If popcount is fast, replace code below with return popcount(^x & (x - 1)).
116+
//
117+
// x & -x leaves only the right-most bit set in the word. Let k be the
118+
// index of that bit. Since only a single bit is set, the value is two
119+
// to the power of k. Multiplying by a power of two is equivalent to
120+
// left shifting, in this case by k bits. The de Bruijn (64 bit) constant
121+
// is such that all six bit, consecutive substrings are distinct.
122+
// Therefore, if we have a left shifted version of this constant we can
123+
// find by how many bits it was shifted by looking at which six bit
124+
// substring ended up at the top of the word.
125+
// (Knuth, volume 4, section 7.3.1)
126+
return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
127+
}
128+
129+
// LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
130+
func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
131+
132+
// LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
133+
func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
134+
135+
// TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
136+
func TrailingZeros8(x uint8) int {
137+
return int(ntz8tab[x])
138+
}
139+
140+
// Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
141+
func Len8(x uint8) int {
142+
return int(len8tab[x])
143+
}

0 commit comments

Comments
 (0)