|
| 1 | +// Copyright 2019 The Go Authors. All rights reserved. |
| 2 | +// Use of this source code is governed by a BSD-style |
| 3 | +// license that can be found in the LICENSE file. |
| 4 | + |
| 5 | +package sys |
| 6 | + |
| 7 | +// Copied from math/bits to avoid dependence. |
| 8 | + |
| 9 | +var len8tab = [256]uint8{ |
| 10 | + 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, |
| 11 | + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, |
| 12 | + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, |
| 13 | + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, |
| 14 | + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, |
| 15 | + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, |
| 16 | + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, |
| 17 | + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, |
| 18 | + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| 19 | + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| 20 | + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| 21 | + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| 22 | + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| 23 | + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| 24 | + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| 25 | + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| 26 | +} |
| 27 | + |
| 28 | +var ntz8tab = [256]uint8{ |
| 29 | + 0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 30 | + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 31 | + 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 32 | + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 33 | + 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 34 | + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 35 | + 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 36 | + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 37 | + 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 38 | + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 39 | + 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 40 | + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 41 | + 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 42 | + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 43 | + 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 44 | + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 45 | +} |
| 46 | + |
| 47 | +// len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0. |
| 48 | +func Len64(x uint64) (n int) { |
| 49 | + if x >= 1<<32 { |
| 50 | + x >>= 32 |
| 51 | + n = 32 |
| 52 | + } |
| 53 | + if x >= 1<<16 { |
| 54 | + x >>= 16 |
| 55 | + n += 16 |
| 56 | + } |
| 57 | + if x >= 1<<8 { |
| 58 | + x >>= 8 |
| 59 | + n += 8 |
| 60 | + } |
| 61 | + return n + int(len8tab[x]) |
| 62 | +} |
| 63 | + |
| 64 | +// --- OnesCount --- |
| 65 | + |
| 66 | +const m0 = 0x5555555555555555 // 01010101 ... |
| 67 | +const m1 = 0x3333333333333333 // 00110011 ... |
| 68 | +const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ... |
| 69 | + |
| 70 | +// OnesCount64 returns the number of one bits ("population count") in x. |
| 71 | +func OnesCount64(x uint64) int { |
| 72 | + // Implementation: Parallel summing of adjacent bits. |
| 73 | + // See "Hacker's Delight", Chap. 5: Counting Bits. |
| 74 | + // The following pattern shows the general approach: |
| 75 | + // |
| 76 | + // x = x>>1&(m0&m) + x&(m0&m) |
| 77 | + // x = x>>2&(m1&m) + x&(m1&m) |
| 78 | + // x = x>>4&(m2&m) + x&(m2&m) |
| 79 | + // x = x>>8&(m3&m) + x&(m3&m) |
| 80 | + // x = x>>16&(m4&m) + x&(m4&m) |
| 81 | + // x = x>>32&(m5&m) + x&(m5&m) |
| 82 | + // return int(x) |
| 83 | + // |
| 84 | + // Masking (& operations) can be left away when there's no |
| 85 | + // danger that a field's sum will carry over into the next |
| 86 | + // field: Since the result cannot be > 64, 8 bits is enough |
| 87 | + // and we can ignore the masks for the shifts by 8 and up. |
| 88 | + // Per "Hacker's Delight", the first line can be simplified |
| 89 | + // more, but it saves at best one instruction, so we leave |
| 90 | + // it alone for clarity. |
| 91 | + const m = 1<<64 - 1 |
| 92 | + x = x>>1&(m0&m) + x&(m0&m) |
| 93 | + x = x>>2&(m1&m) + x&(m1&m) |
| 94 | + x = (x>>4 + x) & (m2 & m) |
| 95 | + x += x >> 8 |
| 96 | + x += x >> 16 |
| 97 | + x += x >> 32 |
| 98 | + return int(x) & (1<<7 - 1) |
| 99 | +} |
| 100 | + |
| 101 | +var deBruijn64tab = [64]byte{ |
| 102 | + 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, |
| 103 | + 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, |
| 104 | + 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, |
| 105 | + 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, |
| 106 | +} |
| 107 | + |
| 108 | +const deBruijn64 = 0x03f79d71b4ca8b09 |
| 109 | + |
| 110 | +// TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0. |
| 111 | +func TrailingZeros64(x uint64) int { |
| 112 | + if x == 0 { |
| 113 | + return 64 |
| 114 | + } |
| 115 | + // If popcount is fast, replace code below with return popcount(^x & (x - 1)). |
| 116 | + // |
| 117 | + // x & -x leaves only the right-most bit set in the word. Let k be the |
| 118 | + // index of that bit. Since only a single bit is set, the value is two |
| 119 | + // to the power of k. Multiplying by a power of two is equivalent to |
| 120 | + // left shifting, in this case by k bits. The de Bruijn (64 bit) constant |
| 121 | + // is such that all six bit, consecutive substrings are distinct. |
| 122 | + // Therefore, if we have a left shifted version of this constant we can |
| 123 | + // find by how many bits it was shifted by looking at which six bit |
| 124 | + // substring ended up at the top of the word. |
| 125 | + // (Knuth, volume 4, section 7.3.1) |
| 126 | + return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)]) |
| 127 | +} |
| 128 | + |
| 129 | +// LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0. |
| 130 | +func LeadingZeros64(x uint64) int { return 64 - Len64(x) } |
| 131 | + |
| 132 | +// LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0. |
| 133 | +func LeadingZeros8(x uint8) int { return 8 - Len8(x) } |
| 134 | + |
| 135 | +// TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0. |
| 136 | +func TrailingZeros8(x uint8) int { |
| 137 | + return int(ntz8tab[x]) |
| 138 | +} |
| 139 | + |
| 140 | +// Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0. |
| 141 | +func Len8(x uint8) int { |
| 142 | + return int(len8tab[x]) |
| 143 | +} |
0 commit comments