Skip to content
This repository was archived by the owner on Aug 12, 2020. It is now read-only.

Commit 6325db9

Browse files
committed
Adds begin/end byte slices to exporter
1 parent 2b5a81f commit 6325db9

File tree

11 files changed

+466
-477
lines changed

11 files changed

+466
-477
lines changed

README.md

Lines changed: 12 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,9 @@ filesStream.on('data', (file) => file.content.pipe(process.stdout))
165165
const Exporter = require('ipfs-unixfs-engine').Exporter
166166
```
167167

168-
### new Exporter(<cid or ipfsPath>, <dag or ipld-resolver>)
168+
### new Exporter(<cid or ipfsPath>, <dag or ipld-resolver>, <options>)
169169

170-
Uses the given [dag API] or an [ipld-resolver instance][] to fetch an IPFS [UnixFS][] object(s) by their multiaddress.
170+
Uses the given [dag API][] or an [ipld-resolver instance][] to fetch an IPFS [UnixFS][] object(s) by their multiaddress.
171171

172172
Creates a new readable stream in object mode that outputs objects of the form
173173

@@ -178,57 +178,6 @@ Creates a new readable stream in object mode that outputs objects of the form
178178
}
179179
```
180180

181-
Errors are received as with a normal stream, by listening on the `'error'` event to be emitted.
182-
183-
184-
[dag API]: https://github.com/ipfs/interface-ipfs-core/blob/master/SPEC/DAG.md
185-
[ipld-resolver instance]: https://github.com/ipld/js-ipld-resolver
186-
[UnixFS]: https://github.com/ipfs/specs/tree/master/unixfs
187-
188-
## Reader
189-
190-
The `reader` allows you to receive part or all of a file as a [pull-stream].
191-
192-
#### Reader example
193-
194-
```js
195-
const readable = require('ipfs-unixfs-engine').readable
196-
const pull = require('pull-stream')
197-
const drain = require('pull-stream/sinks/collect')
198-
199-
pull(
200-
readable(cid, ipldResolver)
201-
collect((error, chunks) => {
202-
// do something with the file chunks and/or handle errors
203-
})
204-
)
205-
```
206-
207-
#### Reader API
208-
209-
```js
210-
const reader = require('ipfs-unixfs-engine').reader
211-
```
212-
213-
### reader(<cid or ipfsPath>, <dag or ipld-resolver>, <begin>, <end>)
214-
215-
Uses the given [dag API][] or an [ipld-resolver instance][] to fetch an IPFS [UnixFS][] object by their multiaddress.
216-
217-
Creates a new [pull-stream][] that sends the requested chunks of data as a series of [Buffer][] objects.
218-
219-
```js
220-
const readable = require('ipfs-unixfs-engine').readable
221-
const pull = require('pull-stream')
222-
const drain = require('pull-stream/sinks/drain')
223-
224-
pull(
225-
readable(cid, ipldResolver),
226-
drain((chunk) => {
227-
// do something with the file chunk
228-
})
229-
)
230-
```
231-
232181
#### `begin` and `end`
233182

234183
`begin` and `end` arguments can optionally be passed to the reader function. These follow the same semantics as the JavaScript [`Array.slice(begin, end)`][] method.
@@ -240,14 +189,17 @@ A negative `begin` starts the slice from the end of the stream and a negative `e
240189
See [the tests](test/reader.js) for examples of using these arguments.
241190

242191
```js
243-
const readable = require('ipfs-unixfs-engine').readable
192+
const exporter = require('ipfs-unixfs-engine').exporter
244193
const pull = require('pull-stream')
245194
const drain = require('pull-stream/sinks/drain')
246195

247196
pull(
248-
readable(cid, ipldResolver, 0, 10)
249-
drain((chunk) => {
250-
// chunk is a Buffer containing only the first 10 bytes of the stream
197+
exporter(cid, ipldResolver, {
198+
begin: 0,
199+
end: 10
200+
})
201+
drain((file) => {
202+
// file.content() is a pull stream containing only the first 10 bytes of the file
251203
})
252204
)
253205
```
@@ -257,23 +209,22 @@ pull(
257209
Errors are received by [pull-stream][] sinks.
258210

259211
```js
260-
const readable = require('ipfs-unixfs-engine').readable
212+
const exporter = require('ipfs-unixfs-engine').exporter
261213
const pull = require('pull-stream')
262214
const drain = require('pull-stream/sinks/collect')
263215

264216
pull(
265-
readable(cid, ipldResolver, 0, 10)
217+
exporter(cid, ipldResolver)
266218
collect((error, chunks) => {
267219
// handle the error
268220
})
269221
)
270222
```
271223

272-
[pull-stream]: https://www.npmjs.com/package/pull-stream
273-
[Buffer]: https://www.npmjs.com/package/buffer
274224
[dag API]: https://github.com/ipfs/interface-ipfs-core/blob/master/SPEC/DAG.md
275225
[ipld-resolver instance]: https://github.com/ipld/js-ipld-resolver
276226
[UnixFS]: https://github.com/ipfs/specs/tree/master/unixfs
227+
[pull-stream]: https://www.npmjs.com/package/pull-stream
277228
[`Array.slice(begin, end)`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/slice
278229

279230
## Contribute

src/exporter/file.js

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
'use strict'
22

33
const traverse = require('pull-traverse')
4+
const traverseSlice = require('./traverse-slice')
45
const UnixFS = require('ipfs-unixfs')
56
const CID = require('cids')
67
const pull = require('pull-stream')
78
const paramap = require('pull-paramap')
89

910
// Logic to export a single (possibly chunked) unixfs file.
10-
module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth) => {
11+
module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth, begin, end) => {
1112
function getData (node) {
1213
try {
1314
const file = UnixFS.unmarshal(node.data)
@@ -31,19 +32,27 @@ module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth)
3132
return pull.empty()
3233
}
3334

34-
let content = pull(
35-
traverse.depthFirst(node, visitor),
36-
pull.map(getData)
37-
)
38-
3935
const file = UnixFS.unmarshal(node.data)
36+
const fileSize = size || file.fileSize()
37+
38+
let content
39+
40+
if (!isNaN(begin)) {
41+
content = traverseSlice(node, dag, begin, end)
42+
} else {
43+
content = pull(
44+
traverse.depthFirst(node, visitor),
45+
pull.map(getData)
46+
)
47+
}
48+
4049
return pull.values([{
4150
depth: depth,
4251
content: content,
4352
name: name,
4453
path: path,
4554
hash: node.multihash,
46-
size: size || file.fileSize(),
55+
size: fileSize,
4756
type: 'file'
4857
}])
4958
}

src/exporter/index.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,13 @@ function pathBaseAndRest (path) {
3636
}
3737

3838
const defaultOptions = {
39-
maxDepth: Infinity
39+
maxDepth: Infinity,
40+
begin: undefined,
41+
end: undefined
4042
}
4143

42-
module.exports = (path, dag, _options) => {
43-
const options = Object.assign({}, defaultOptions, _options)
44+
module.exports = (path, dag, options) => {
45+
options = Object.assign({}, defaultOptions, options)
4446

4547
let dPath
4648
try {

src/exporter/resolve.js

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,33 +32,33 @@ function createResolver (dag, options, depth, parent) {
3232
return pull.error(new Error('no depth'))
3333
}
3434
if (item.object) {
35-
return cb(null, resolveItem(item.object, item))
35+
return cb(null, resolveItem(item.object, item, options.begin, options.end))
3636
}
3737
dag.get(new CID(item.multihash), (err, node) => {
3838
if (err) {
3939
return cb(err)
4040
}
4141
// const name = item.fromPathRest ? item.name : item.path
42-
cb(null, resolveItem(node.value, item))
42+
cb(null, resolveItem(node.value, item, options.begin, options.end))
4343
})
4444
}),
4545
pull.flatten(),
4646
pull.filter(Boolean),
4747
pull.filter((node) => node.depth <= options.maxDepth)
4848
)
4949

50-
function resolveItem (node, item) {
51-
return resolve(node, item.name, item.path, item.pathRest, item.size, dag, item.parent || parent, item.depth)
50+
function resolveItem (node, item, begin, end) {
51+
return resolve(node, item.name, item.path, item.pathRest, item.size, dag, item.parent || parent, item.depth, begin, end)
5252
}
5353

54-
function resolve (node, name, path, pathRest, size, dag, parentNode, depth) {
54+
function resolve (node, name, path, pathRest, size, dag, parentNode, depth, begin, end) {
5555
const type = typeOf(node)
5656
const nodeResolver = resolvers[type]
5757
if (!nodeResolver) {
5858
return pull.error(new Error('Unkown node type ' + type))
5959
}
6060
const resolveDeep = createResolver(dag, options, depth, node)
61-
return nodeResolver(node, name, path, pathRest, resolveDeep, size, dag, parentNode, depth)
61+
return nodeResolver(node, name, path, pathRest, resolveDeep, size, dag, parentNode, depth, begin, end)
6262
}
6363
}
6464

src/exporter/traverse-slice.js

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
'use strict'
2+
3+
const CID = require('cids')
4+
const pull = require('pull-stream')
5+
const asyncValues = require('pull-async-values')
6+
const asyncMap = require('pull-stream/throughs/async-map')
7+
const map = require('pull-stream/throughs/map')
8+
const UnixFS = require('ipfs-unixfs')
9+
const waterfall = require('async/waterfall')
10+
11+
module.exports = (fileNode, dag, begin = 0, end) => {
12+
let streamPosition = 0
13+
14+
return pull(
15+
asyncValues((cb) => {
16+
const meta = UnixFS.unmarshal(fileNode.data)
17+
18+
if (meta.type !== 'file') {
19+
return cb(new Error(`Node ${fileNode} was not a file (was ${meta.type}), can only read files`))
20+
}
21+
22+
const fileSize = meta.fileSize()
23+
24+
if (!end || end > fileSize) {
25+
end = fileSize
26+
}
27+
28+
if (begin < 0) {
29+
begin = fileSize + begin
30+
}
31+
32+
if (end < 0) {
33+
end = fileSize + end
34+
}
35+
36+
const links = fileNode.links
37+
38+
if (!links || !links.length) {
39+
if (meta.data && meta.data.length) {
40+
// file was small enough to fit in one DAGNode so has no links
41+
return cb(null, [(done) => done(null, meta.data)])
42+
}
43+
44+
return cb(new Error(`Path ${fileNode} had no links or data`))
45+
}
46+
47+
const linkedDataSize = links.reduce((acc, curr) => acc + curr.size, 0)
48+
const overhead = (linkedDataSize - meta.fileSize()) / links.length
49+
50+
// create an array of functions to fetch link data
51+
cb(null, links.map((link) => (done) => {
52+
// DAGNode Links report unixfs object data sizes $overhead bytes (typically 14)
53+
// larger than they actually are due to the protobuf wrapper
54+
const bytesInLinkedObjectData = link.size - overhead
55+
56+
if (begin > (streamPosition + bytesInLinkedObjectData)) {
57+
// Start byte is after this block so skip it
58+
streamPosition += bytesInLinkedObjectData
59+
60+
return done()
61+
}
62+
63+
if (end < streamPosition) {
64+
// End byte was before this block so skip it
65+
streamPosition += bytesInLinkedObjectData
66+
67+
return done()
68+
}
69+
70+
// transform the multihash to a cid, the cid to a node and the node to some data
71+
waterfall([
72+
(next) => dag.get(new CID(link.multihash), next),
73+
(node, next) => next(null, node.value.data),
74+
(data, next) => next(null, UnixFS.unmarshal(data).data)
75+
], done)
76+
}))
77+
}),
78+
asyncMap((loadLinkData, cb) => loadLinkData(cb)),
79+
pull.filter(Boolean),
80+
map((data) => {
81+
const block = extractDataFromBlock(data, streamPosition, begin, end)
82+
83+
streamPosition += data.length
84+
85+
return block
86+
})
87+
)
88+
}
89+
90+
function extractDataFromBlock (block, streamPosition, begin, end) {
91+
const blockLength = block.length
92+
93+
if (end - streamPosition < blockLength) {
94+
// If the end byte is in the current block, truncate the block to the end byte
95+
block = block.slice(0, end - streamPosition)
96+
}
97+
98+
if (begin > streamPosition && begin < (streamPosition + blockLength)) {
99+
// If the start byte is in the current block, skip to the start byte
100+
block = block.slice(begin - streamPosition)
101+
}
102+
103+
return block
104+
}

src/index.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,3 @@
22

33
exports.importer = exports.Importer = require('./importer')
44
exports.exporter = exports.Exporter = require('./exporter')
5-
exports.reader = exports.Reader = require('./reader')

0 commit comments

Comments
 (0)