Skip to content
This repository was archived by the owner on Apr 29, 2020. It is now read-only.

feat: adds js implementation of rabin chunker for windows and browser #30

Merged
merged 1 commit into from
May 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
"hamt-sharding": "~0.0.2",
"ipfs-unixfs": "~0.1.16",
"ipld-dag-pb": "~0.17.2",
"long": "^4.0.0",
"multicodec": "~0.5.1",
"multihashing-async": "~0.7.0",
"superstruct": "~0.6.1"
Expand Down
232 changes: 208 additions & 24 deletions src/chunker/rabin.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
'use strict'

const errCode = require('err-code')

let createRabin
const Long = require('long')
const BufferList = require('bl')
let rabin

module.exports = async function * rabinChunker (source, options) {
if (!createRabin) {
if (!rabin) {
try {
createRabin = require('rabin')

if (typeof createRabin !== 'function') {
throw errCode(new Error(`createRabin was not a function`), 'ERR_UNSUPPORTED')
}
} catch (err) {
throw errCode(new Error(`Rabin chunker not available, it may have failed to install or not be supported on this platform`), 'ERR_UNSUPPORTED')
rabin = nativeRabin()
} catch (_) {
// fallback to js implementation
rabin = jsRabin()
}
}

Expand All @@ -30,30 +28,216 @@ module.exports = async function * rabinChunker (source, options) {
}

const sizepow = Math.floor(Math.log2(avg))
const rabin = createRabin({

for await (const chunk of rabin(source, {
min: min,
max: max,
bits: sizepow,
window: options.window,
polynomial: options.polynomial
})
})) {
yield chunk
}
}

const nativeRabin = () => {
const createRabin = require('rabin')

if (typeof rabin !== 'function') {
throw errCode(new Error(`rabin was not a function`), 'ERR_UNSUPPORTED')
}

return async function * (source, options) {
const rabin = createRabin(options)

// TODO: rewrite rabin using node streams v3
for await (const chunk of source) {
rabin.buffers.append(chunk)
rabin.pending.push(chunk)

const sizes = []

rabin.rabin.fingerprint(rabin.pending, sizes)
rabin.pending = []

for (let i = 0; i < sizes.length; i++) {
const size = sizes[i]
const buf = rabin.buffers.slice(0, size)
rabin.buffers.consume(size)

yield buf
}
}

if (rabin.buffers.length) {
yield rabin.buffers.slice(0)
}
}
}

const jsRabin = () => {
// see https://github.com/datproject/rabin/blob/c0378395dc0a125ab21ac176ec504f9995b34e62/src/rabin.cc
class Rabin {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

npm i rabin.js?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean rabinjs? It has no repo on npm and only 5 downloads a week.

Not sure it's ready for production.

constructor (options) {
this.window = new Array(options.window || 64).fill(Long.fromInt(0))
this.wpos = 0
this.count = 0
this.digest = Long.fromInt(0)
this.chunkLength = 0
this.polynomial = options.polynomial
this.polynomialDegree = 53
this.polynomialShift = this.polynomialDegree - 8
this.averageBits = options.bits || 12
this.minSize = options.min || 8 * 1024
this.maxSize = options.max || 32 * 1024
this.mask = Long.fromInt(1).shiftLeft(this.averageBits).subtract(1)
this.modTable = []
this.outTable = []

this.calculateTables()
}

calculateTables () {
for (let i = 0; i < 256; i++) {
let hash = Long.fromInt(0, true)

hash = this.appendByte(hash, i)

for (let j = 0; j < this.window.length - 1; j++) {
hash = this.appendByte(hash, 0)
}

this.outTable[i] = hash
}

const k = this.deg(this.polynomial)

for (let i = 0; i < 256; i++) {
const b = Long.fromInt(i, true)

this.modTable[i] = b.shiftLeft(k)
.modulo(this.polynomial)
.or(b.shiftLeft(k))
}
}

deg (p) {
let mask = Long.fromString('0x8000000000000000', true, 16)

for (let i = 0; i < 64; i++) {
if (mask.and(p).greaterThan(0)) {
return Long.fromInt(63 - i)
}

mask = mask.shiftRight(1)
}

// TODO: rewrite rabin using node streams v3
for await (const chunk of source) {
rabin.buffers.append(chunk)
rabin.pending.push(chunk)
return Long.fromInt(-1)
}

appendByte (hash, b) {
hash = hash.shiftLeft(8)
hash = hash.or(b)

return hash.modulo(this.polynomial)
}

getFingerprints (bufs) {
const lengths = []

for (let i = 0; i < bufs.length; i++) {
let buf = bufs[i]

while (true) {
const remaining = this.nextChunk(buf)

if (remaining < 0) {
break
}

buf = buf.slice(remaining)

lengths.push(this.chunkLength)
}
}

return lengths
}

nextChunk (buf) {
for (let i = 0; i < buf.length; i++) {
const val = Long.fromInt(buf[i])

this.slide(val)

this.count++

if ((this.count >= this.minSize && this.digest.and(this.mask).equals(0)) || this.count >= this.maxSize) {
this.chunkLength = this.count

this.reset()

return i + 1
}
}

return -1
}

slide (value) {
const out = this.window[this.wpos].toInt() & 255
this.window[this.wpos] = value
this.digest = this.digest.xor(this.outTable[out])
this.wpos = (this.wpos + 1) % this.window.length

this.append(value)
}

reset () {
this.window = this.window.map(() => Long.fromInt(0))
this.wpos = 0
this.count = 0
this.digest = Long.fromInt(0)

const sizes = []
this.slide(Long.fromInt(1))
}

rabin.rabin.fingerprint(rabin.pending, sizes)
rabin.pending = []
append (value) {
const index = this.digest.shiftRight(this.polynomialShift).toInt() & 255
this.digest = this.digest.shiftLeft(8)
this.digest = this.digest.or(value)

for (let i = 0; i < sizes.length; i++) {
const size = sizes[i]
const buf = rabin.buffers.slice(0, size)
rabin.buffers.consume(size)
const entry = this.modTable[index]

if (entry) {
this.digest = this.digest.xor(entry)
}
}
}

return async function * (source, options) {
const r = new Rabin(options)
const buffers = new BufferList()
let pending = []

for await (const chunk of source) {
buffers.append(chunk)
pending.push(chunk)

const sizes = r.getFingerprints(pending)
pending = []

for (let i = 0; i < sizes.length; i++) {
var size = sizes[i]
var buf = buffers.slice(0, size)
buffers.consume(size)

yield buf
}
}

yield buf
if (buffers.length) {
yield buffers.slice(0)
}
}
}
4 changes: 2 additions & 2 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ const ChunkerOptions = struct({
maxChunkSize: 'number?',
avgChunkSize: 'number?',
window: 'number?',
polynomial: 'string?'
polynomial: 'number?'
}, {
maxChunkSize: 262144,
avgChunkSize: 262144,
window: 16,
polynomial: '0x3DF305DFB2A805'
polynomial: 17437180132763653 // https://github.com/ipfs/go-ipfs-chunker/blob/d0125832512163708c0804a3cda060e21acddae4/rabin.go#L11
})

const BuilderOptions = struct({
Expand Down
6 changes: 0 additions & 6 deletions test/chunker-fixed-size.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,6 @@ const rawFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1MiB.txt'
describe('chunker: fixed size', function () {
this.timeout(30000)

before(function () {
if (!isNode) {
this.skip()
}
})

it('chunks non flat buffers', async () => {
const b1 = Buffer.alloc(2 * 256)
const b2 = Buffer.alloc(1 * 256)
Expand Down
25 changes: 0 additions & 25 deletions test/chunker-rabin-browser.spec.js

This file was deleted.

Loading