Skip to content

Commit b9f488d

Browse files
authored
fix: Speed up cache reads (#255)
This PR speeds up `read.stream` and `read` by skipping `fs.stat` call if `size` was passed via `opts`. Currently, the only reason for doing a `stat` call is to get the size (and throw the size mismatch error if the size is different). This is unnecessary for 3 reasons: 1. In the case of `read.stream`, the stream already compares the sizes at the end and throws an error if there's a mismatch. 2. In the case of `read`, we can compare the sizes after reading the cache contents 3. In both cases we are already doing an integrity check which would automatically fail if there's a size difference since the hashes would be different. In this PR, the `stat` call is only made if the user does not pass a `size` property via `opts`. This makes sense because without knowing the `size`, the stream has to make an unnecessary `fs.read` call at the end before closing which has a significant cost (that cost is much, much greater than the cost of doing `fs.stat`). On my machine, the benchmarks with this change look like this: ``` ┌─────────┬─────────────────────┬─────────┬────────────────────┬───────────┬─────────┐ │ (index) │ Task Name │ ops/sec │ Average Time (ns) │ Margin │ Samples │ ├─────────┼─────────────────────┼─────────┼────────────────────┼───────────┼─────────┤ │ 0 │ 'read.stream (new)' │ '4,643' │ 215352.03841424757 │ '±10.20%' │ 465 │ │ 1 │ 'read.stream (old)' │ '3,933' │ 254237.5665025663 │ '±7.17%' │ 394 │ │ 2 │ 'read (old)' │ '2,915' │ 343045.55719845917 │ '±13.42%' │ 292 │ │ 3 │ 'read (new)' │ '4,392' │ 227636.30011033904 │ '±12.14%' │ 449 │ └─────────┴─────────────────────┴─────────┴────────────────────┴───────────┴─────────┘ ``` That's a solid 16% improvement in the case of `read.stream` and 36% improvement in the case of `read`.
1 parent 7eab139 commit b9f488d

File tree

1 file changed

+8
-9
lines changed

1 file changed

+8
-9
lines changed

lib/content/read.js

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,20 @@ async function read (cache, integrity, opts = {}) {
1313
const { size } = opts
1414
const { stat, cpath, sri } = await withContentSri(cache, integrity, async (cpath, sri) => {
1515
// get size
16-
const stat = await fs.stat(cpath)
16+
const stat = size ? { size } : await fs.stat(cpath)
1717
return { stat, cpath, sri }
1818
})
19-
if (typeof size === 'number' && stat.size !== size) {
20-
throw sizeError(size, stat.size)
21-
}
2219

2320
if (stat.size > MAX_SINGLE_READ_SIZE) {
2421
return readPipeline(cpath, stat.size, sri, new Pipeline()).concat()
2522
}
2623

2724
const data = await fs.readFile(cpath, { encoding: null })
25+
26+
if (stat.size !== data.length) {
27+
throw sizeError(stat.size, data.length)
28+
}
29+
2830
if (!ssri.checkData(data, sri)) {
2931
throw integrityError(sri, cpath)
3032
}
@@ -55,13 +57,10 @@ function readStream (cache, integrity, opts = {}) {
5557
// Set all this up to run on the stream and then just return the stream
5658
Promise.resolve().then(async () => {
5759
const { stat, cpath, sri } = await withContentSri(cache, integrity, async (cpath, sri) => {
58-
// just stat to ensure it exists
59-
const stat = await fs.stat(cpath)
60+
// get size
61+
const stat = size ? { size } : await fs.stat(cpath)
6062
return { stat, cpath, sri }
6163
})
62-
if (typeof size === 'number' && size !== stat.size) {
63-
return stream.emit('error', sizeError(size, stat.size))
64-
}
6564

6665
return readPipeline(cpath, stat.size, sri, stream)
6766
}).catch(err => stream.emit('error', err))

0 commit comments

Comments
 (0)