Skip to content

Commit ec60d33

Browse files
mjsir911isaacs
authored andcommitted
Cache the tarballs in pacote
Tell `fetch` not to cache it's downloaded tarballs, leave that to us Also see npm/cli#2976 for my first attempt & some discussions as to how we got here. Fix: #73 Fix: npm/cli#2160 Close: npm/cli#2976 PR-URL: #74 Credit: @mjsir911 Close: #74 Reviewed-by: @isaacs EDIT(@isaacs): Updated to add test, allow make-fetch-happen to use the cache, pipe into cache properly, and avoid cache double-writes.
1 parent 99738cd commit ec60d33

File tree

3 files changed

+48
-11
lines changed

3 files changed

+48
-11
lines changed

lib/fetcher.js

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ const _istream = Symbol('_istream')
4040
const _assertType = Symbol('_assertType')
4141
const _tarballFromCache = Symbol('_tarballFromCache')
4242
const _tarballFromResolved = Symbol.for('pacote.Fetcher._tarballFromResolved')
43+
const _cacheFetches = Symbol.for('pacote.Fetcher._cacheFetches')
4344

4445
class FetcherBase {
4546
constructor (spec, opts) {
@@ -166,8 +167,8 @@ class FetcherBase {
166167
}
167168

168169
// private, should be overridden.
169-
// Note that they should *not* calculate or check integrity, but *just*
170-
// return the raw tarball data stream.
170+
// Note that they should *not* calculate or check integrity or cache,
171+
// but *just* return the raw tarball data stream.
171172
[_tarballFromResolved] () {
172173
throw this.notImplementedError
173174
}
@@ -194,6 +195,10 @@ class FetcherBase {
194195
return cacache.get.stream.byDigest(this.cache, this.integrity, this.opts)
195196
}
196197

198+
get [_cacheFetches] () {
199+
return true
200+
}
201+
197202
[_istream] (stream) {
198203
// everyone will need one of these, either for verifying or calculating
199204
// We always set it, because we have might only have a weak legacy hex
@@ -203,7 +208,27 @@ class FetcherBase {
203208
// gets to the point of re-setting the integrity.
204209
const istream = ssri.integrityStream(this.opts)
205210
istream.on('integrity', i => this.integrity = i)
206-
return stream.on('error', er => istream.emit('error', er)).pipe(istream)
211+
// we have to return a stream that gets ALL the data, and proxies errors,
212+
// but then pipe from the original tarball stream into the cache as well.
213+
// To do this without losing any data, and since the cacache put stream
214+
// is not a passthrough, we have to pipe from the original stream into
215+
// the cache AFTER we pipe into the istream.
216+
// If the cacache put stream was a passthrough, we could just return
217+
// a single pipeline with all three, but that would make cacache put
218+
// streams less useful, because they'd have to be read from to work.
219+
stream.on('error', er => istream.emit('error', er))
220+
stream.pipe(istream)
221+
if (this.opts.cache && this[_cacheFetches]) {
222+
const cstream = cacache.put.stream(
223+
this.opts.cache,
224+
`pacote:tarball:${this.from}`,
225+
this.opts
226+
)
227+
// best-effort. cache write errors should not crash the fetch.
228+
cstream.on('error', (er) => {})
229+
stream.pipe(cstream)
230+
}
231+
return istream
207232
}
208233

209234
pickIntegrityAlgorithm () {
@@ -232,7 +257,9 @@ class FetcherBase {
232257
// An ENOENT trying to read a tgz file, for example, is Right Out.
233258
isRetriableError (er) {
234259
// TODO: check error class, once those are rolled out to our deps
235-
return this.isDataCorruptionError(er) || er.code === 'ENOENT'
260+
return this.isDataCorruptionError(er) ||
261+
er.code === 'ENOENT' ||
262+
er.code === 'EISDIR'
236263
}
237264

238265
// Mostly internal, but has some uses

lib/remote.js

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ const Minipass = require('minipass')
88
// The default registry URL is a string of great magic.
99
const magic = /^https?:\/\/registry\.npmjs\.org\//
1010

11+
const _cacheFetches = Symbol.for('pacote.Fetcher._cacheFetches')
1112
const _headers = Symbol('_headers')
1213
class RemoteFetcher extends Fetcher {
1314
constructor (spec, opts) {
@@ -21,6 +22,12 @@ class RemoteFetcher extends Fetcher {
2122
this.pkgid = opts.pkgid ? opts.pkgid : `remote:${nameat}${this.resolved}`
2223
}
2324

25+
// Don't need to cache tarball fetches in pacote, because make-fetch-happen
26+
// will write into cacache anyway.
27+
get [_cacheFetches] () {
28+
return false
29+
}
30+
2431
[_tarballFromResolved] () {
2532
const stream = new Minipass()
2633
const fetchOpts = {

test/file.js

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,29 @@ const FileFetcher = require('../lib/file.js')
22
const t = require('tap')
33
const { relative, resolve, basename } = require('path')
44
const npa = require('npm-package-arg')
5-
const { promisify } = require('util')
6-
const rimraf = promisify(require('rimraf'))
7-
const mkdirp = require('mkdirp')
8-
const me = t.testdir()
5+
const me = t.testdir({cache: {}})
6+
const cache = resolve(me, 'cache')
97
t.cleanSnapshot = str => str.split(process.cwd()).join('${CWD}')
108

119
const abbrev = resolve(__dirname, 'fixtures/abbrev-1.1.1.tgz')
1210
const abbrevspec = `file:${relative(process.cwd(), abbrev)}`
1311

1412
t.test('basic', async t => {
15-
const f = new FileFetcher(abbrevspec, {})
13+
const f = new FileFetcher(abbrevspec, { cache })
1614
t.same(f.types, ['file'])
1715
const fm = await f.manifest()
1816
t.matchSnapshot(fm, 'manifest')
1917
t.equal(fm, f.package)
2018
t.equal(await f.manifest(), fm, 'cached manifest')
2119
t.matchSnapshot(await f.packument(), 'packument')
2220
const pj = me + '/extract/package.json'
23-
return t.resolveMatchSnapshot(f.extract(me + '/extract'), 'extract')
24-
.then(() => t.matchSnapshot(require(pj), 'package.json extracted'))
21+
t.matchSnapshot(await f.extract(me + '/extract'), 'extract')
22+
t.matchSnapshot(require(pj), 'package.json extracted')
23+
const fs = require('fs')
24+
// just verify that the file is there.
25+
t.same(fs.readdirSync(resolve(cache, 'content-v2/sha512/9e/77')), [
26+
'bdfc8890fe1cc8858ea97439db06dcfb0e33d32ab634d0fff3bcf4a6e69385925eb1b86ac69d79ff56d4cd35f36d01f67dff546d7a192ccd4f6a7138a2d1',
27+
], 'write cache content file')
2528
})
2629

2730
const binString = resolve(__dirname, 'fixtures/bin-string.tgz')

0 commit comments

Comments
 (0)