Skip to content
This repository was archived by the owner on Aug 12, 2020. It is now read-only.

Commit 269b35a

Browse files
authored
Merge pull request #214 from ipfs/use-raw-nodes-for-leaves
chore: use raw nodes for leaf data
2 parents 0af9bf4 + 9d44a75 commit 269b35a

File tree

6 files changed

+141
-17
lines changed

6 files changed

+141
-17
lines changed

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ IPFS unixFS Engine
55
[![](https://img.shields.io/badge/project-IPFS-blue.svg?style=flat-square)](http://ipfs.io/)
66
[![](https://img.shields.io/badge/freenode-%23ipfs-blue.svg?style=flat-square)](http://webchat.freenode.net/?channels=%23ipfs)
77
[![standard-readme compliant](https://img.shields.io/badge/standard--readme-OK-green.svg?style=flat-square)](https://github.com/RichardLitt/standard-readme)
8-
[![Build Status](https://travis-ci.org/ipfs/js-ipfs-unixfs-engine.svg?style=flat-square)](https://travis-ci.org/ipfs/js-ipfs-unixfs-engine)
8+
[![Build Status](https://travis-ci.org/ipfs/js-ipfs-unixfs-engine.svg?branch=master&style=flat-square)](https://travis-ci.org/ipfs/js-ipfs-unixfs-engine)
99
[![Coverage Status](https://coveralls.io/repos/github/ipfs/js-ipfs-unixfs-engine/badge.svg?branch=master)](https://coveralls.io/github/ipfs/js-ipfs-unixfs-engine?branch=master)
1010
[![Dependency Status](https://david-dm.org/ipfs/js-ipfs-unixfs-engine.svg?style=flat-square)](https://david-dm.org/ipfs/js-ipfs-unixfs-engine)
1111
[![js-standard-style](https://img.shields.io/badge/code%20style-standard-brightgreen.svg?style=flat-square)](https://github.com/feross/standard)
@@ -141,14 +141,15 @@ The input's file paths and directory structure will be preserved in the [`dag-pb
141141
- `trickle`: builds [a trickle tree](https://github.com/ipfs/specs/pull/57#issuecomment-265205384)
142142
- `maxChildrenPerNode` (positive integer, defaults to `174`): the maximum children per node for the `balanced` and `trickle` DAG builder strategies
143143
- `layerRepeat` (positive integer, defaults to 4): (only applicable to the `trickle` DAG builder strategy). The maximum repetition of parent nodes for each layer of the tree.
144-
- `reduceSingleLeafToSelf` (boolean, defaults to `false`): optimization for, when reducing a set of nodes with one node, reduce it to that node.
144+
- `reduceSingleLeafToSelf` (boolean, defaults to `true`): optimization for, when reducing a set of nodes with one node, reduce it to that node.
145145
- `dirBuilder` (object): the options for the directory builder
146146
- `hamt` (object): the options for the HAMT sharded directory builder
147147
- bits (positive integer, defaults to `8`): the number of bits at each bucket of the HAMT
148148
- `progress` (function): a function that will be called with the byte length of chunks as a file is added to ipfs.
149149
- `onlyHash` (boolean, defaults to false): Only chunk and hash - do not write to disk
150150
- `hashAlg` (string): multihash hashing algorithm to use
151151
- `cidVersion` (integer, default 0): the CID version to use when storing the data (storage keys are based on the CID, _including_ it's version)
152+
- `rawLeafNodes` (boolean, defaults to false): When a file would span multiple DAGNodes, if this is true the leaf nodes will be marked as `raw` `unixfs` nodes
152153

153154
### Exporter
154155

src/builder/builder.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ const DAGNode = dagPB.DAGNode
1616
const defaultOptions = {
1717
chunkerOptions: {
1818
maxChunkSize: 262144
19-
}
19+
},
20+
rawLeafNodes: false
2021
}
2122

2223
module.exports = function (createChunker, ipld, createReducer, _options) {
@@ -96,6 +97,7 @@ module.exports = function (createChunker, ipld, createReducer, _options) {
9697

9798
let previous
9899
let count = 0
100+
const leafType = options.rawLeafNodes ? 'raw' : 'file'
99101

100102
pull(
101103
file.content,
@@ -106,7 +108,7 @@ module.exports = function (createChunker, ipld, createReducer, _options) {
106108
}
107109
return Buffer.from(chunk)
108110
}),
109-
pull.map(buffer => new UnixFS('file', buffer)),
111+
pull.map(buffer => new UnixFS(leafType, buffer)),
110112
pull.asyncMap((fileNode, callback) => {
111113
DAGNode.create(fileNode.marshal(), [], options.hashAlg, (err, node) => {
112114
callback(err, { DAGNode: node, fileNode: fileNode })

src/builder/index.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ const reducers = {
1313
const defaultOptions = {
1414
strategy: 'balanced',
1515
highWaterMark: 100,
16-
reduceSingleLeafToSelf: false
16+
reduceSingleLeafToSelf: true
1717
}
1818

1919
module.exports = function (Chunker, ipld, _options) {

src/builder/reduce.js

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,52 @@ const DAGNode = dagPB.DAGNode
1010

1111
module.exports = function (file, ipld, options) {
1212
return function (leaves, callback) {
13-
if (leaves.length === 1 && (leaves[0].single || options.reduceSingleLeafToSelf)) {
14-
const leave = leaves[0]
15-
callback(null, {
16-
path: file.path,
17-
multihash: leave.multihash,
18-
size: leave.size,
19-
leafSize: leave.leafSize,
20-
name: leave.name
21-
})
22-
return // early
13+
if (leaves.length === 1 && leaves[0].single && options.reduceSingleLeafToSelf) {
14+
const leaf = leaves[0]
15+
16+
if (!options.rawLeafNodes) {
17+
return callback(null, {
18+
path: file.path,
19+
multihash: leaf.multihash,
20+
size: leaf.size,
21+
leafSize: leaf.leafSize,
22+
name: leaf.name
23+
})
24+
}
25+
26+
// we are using raw leaf nodes, this file only has one node but it'll be marked raw
27+
// so convert it back to a file node
28+
return waterfall([
29+
(cb) => ipld.get(new CID(leaf.multihash), cb),
30+
(result, cb) => {
31+
const meta = UnixFS.unmarshal(result.value.data)
32+
const fileNode = new UnixFS('file', meta.data)
33+
34+
DAGNode.create(fileNode.marshal(), [], options.hashAlg, (err, node) => {
35+
cb(err, { DAGNode: node, fileNode: fileNode })
36+
})
37+
},
38+
(result, cb) => {
39+
let cid = new CID(result.DAGNode.multihash)
40+
41+
if (options.cidVersion === 1) {
42+
cid = cid.toV1()
43+
}
44+
45+
ipld.put(result.DAGNode, { cid }, (err) => cb(err, result))
46+
},
47+
(result, cb) => {
48+
cb(null, {
49+
multihash: result.DAGNode.multihash,
50+
size: result.DAGNode.size,
51+
leafSize: result.fileNode.fileSize(),
52+
name: ''
53+
})
54+
}
55+
], callback)
2356
}
2457

25-
// create a parent node and add all the leafs
58+
// create a parent node and add all the leaves
2659
const f = new UnixFS('file')
2760

2861
const links = leaves.map((leaf) => {

src/importer/index.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ const chunkers = {
1414
}
1515

1616
const defaultOptions = {
17-
chunker: 'fixed'
17+
chunker: 'fixed',
18+
rawLeafNodes: false
1819
}
1920

2021
module.exports = function (ipld, _options) {

test/importer.js

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ const CID = require('cids')
1515
const Ipld = require('ipld')
1616
const loadFixture = require('aegir/fixtures')
1717
const each = require('async/each')
18+
const waterfall = require('async/waterfall')
19+
const parallel = require('async/parallel')
20+
const UnixFs = require('ipfs-unixfs')
1821

1922
function stringifyMh (files) {
2023
return files.map((file) => {
@@ -104,7 +107,67 @@ const strategyOverrides = {
104107
size: 2669627
105108
}
106109
}
110+
}
111+
112+
const checkLeafNodeTypes = (ipld, options, expected, done) => {
113+
waterfall([
114+
(cb) => pull(
115+
pull.once({
116+
path: '/foo',
117+
content: Buffer.alloc(262144 + 5).fill(1)
118+
}),
119+
importer(ipld, options),
120+
pull.collect(cb)
121+
),
122+
(files, cb) => ipld.get(new CID(files[0].multihash), cb),
123+
(result, cb) => {
124+
const node = result.value
125+
const meta = UnixFs.unmarshal(node.data)
126+
127+
expect(meta.type).to.equal('file')
128+
expect(node.links.length).to.equal(2)
129+
130+
parallel(
131+
node.links.map(link => {
132+
return (done) => {
133+
waterfall([
134+
(next) => ipld.get(new CID(link.multihash), next),
135+
(result, next) => {
136+
const node = result.value
137+
const meta = UnixFs.unmarshal(node.data)
138+
139+
expect(meta.type).to.equal(expected)
140+
141+
next()
142+
}
143+
], done)
144+
}
145+
}), cb)
146+
}
147+
], done)
148+
}
107149

150+
const checkNodeLinks = (ipld, options, expected, done) => {
151+
waterfall([
152+
(cb) => pull(
153+
pull.once({
154+
path: '/foo',
155+
content: Buffer.alloc(100).fill(1)
156+
}),
157+
importer(ipld, options),
158+
pull.collect(cb)
159+
),
160+
(files, cb) => ipld.get(new CID(files[0].multihash), cb),
161+
(result, cb) => {
162+
const node = result.value
163+
const meta = UnixFs.unmarshal(node.data)
164+
165+
expect(meta.type).to.equal('file')
166+
expect(node.links.length).to.equal(expected)
167+
168+
cb()
169+
}
170+
], done)
108171
}
109172

110173
module.exports = (repo) => {
@@ -517,6 +580,30 @@ module.exports = (repo) => {
517580
pull.collect(onCollected)
518581
)
519582
})
583+
584+
it('imports file with raw leaf nodes when specified', (done) => {
585+
checkLeafNodeTypes(ipld, {
586+
rawLeafNodes: true
587+
}, 'raw', done)
588+
})
589+
590+
it('imports file with file leaf nodes when specified', (done) => {
591+
checkLeafNodeTypes(ipld, {
592+
rawLeafNodes: false
593+
}, 'file', done)
594+
})
595+
596+
it('reduces file to single node when specified', (done) => {
597+
checkNodeLinks(ipld, {
598+
reduceSingleLeafToSelf: true
599+
}, 0, done)
600+
})
601+
602+
it('does not reduce file to single node when overidden by options', (done) => {
603+
checkNodeLinks(ipld, {
604+
reduceSingleLeafToSelf: false
605+
}, 1, done)
606+
})
520607
})
521608
})
522609
}

0 commit comments

Comments
 (0)