diff --git a/.gitignore b/.gitignore index db426f86..103a91cf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ - .DS_Store -tests/repo-tests* +test/repo-tests* # Logs logs @@ -31,4 +30,4 @@ build/Release node_modules dist -lib \ No newline at end of file +lib diff --git a/package.json b/package.json index 72e54add..155d9174 100644 --- a/package.json +++ b/package.json @@ -36,28 +36,28 @@ "devDependencies": { "aegir": "^2.1.1", "block-stream2": "^1.1.0", - "brfs": "^1.4.3", "bs58": "^3.0.0", "buffer-loader": "0.0.1", "chai": "^3.5.0", "fs-blob-store": "^5.2.1", - "highland": "^2.7.4", "idb-plus-blob-store": "^1.0.0", "ipfs-repo": "^0.6.1", - "mocha": "^2.3.4", "ncp": "^2.0.0", "pre-commit": "^1.1.2", "raw-loader": "^0.5.1", "rimraf": "^2.5.1", + "streamifier": "^0.1.1", "string-to-stream": "^1.0.1" }, "dependencies": { "async": "^1.5.2", "block-stream2": "^1.1.0", "debug": "^2.2.0", + "eventemitter2": "^1.0.0", "ipfs-blocks": "^0.2.0", "ipfs-merkle-dag": "^0.4.0", "ipfs-unixfs": "^0.1.0", + "readable-stream": "2.0.6", "through2": "^2.0.0" }, "contributors": [ @@ -68,4 +68,4 @@ "greenkeeperio-bot ", "nginnever " ] -} \ No newline at end of file +} diff --git a/src/importer.js b/src/importer.js index 722fd33a..a22d06a5 100644 --- a/src/importer.js +++ b/src/importer.js @@ -3,247 +3,272 @@ const debug = require('debug') const log = debug('importer') log.err = debug('importer:error') -const fs = require('fs') -const mDAG = require('ipfs-merkle-dag') -const FixedSizeChunker = require('./chunker-fixed-size') +const fsc = require('./chunker-fixed-size') const through2 = require('through2') +const merkleDAG = require('ipfs-merkle-dag') const UnixFS = require('ipfs-unixfs') -const async = require('async') +const EE2 = require('eventemitter2').EventEmitter2 +const util = require('util') +const bs58 = require('bs58') -exports = module.exports +exports = module.exports = Importer const CHUNK_SIZE = 262144 -// Use a layout + chunkers to convert a directory (or file) to the layout format -exports.import = (target, dagService, options, callback) => { - if (typeof options === 'function') { callback = options; options = {} } - - if (!target) { return callback(new Error('must specify target')) } - if (!dagService) { return callback(new Error('must specify dag service')) } - - // options.recursive : follow dirs - // options.chunkers : obj with chunkers to each type of data, { default: dumb-chunker } - - options = options || {} - - if (Buffer.isBuffer(target)) { - bufferImporter(target, callback) - } else if (typeof target.on === 'function') { - // TODO Create Stream Importer - // streamImporter(options.stream, callback) - return callback(new Error('stream importer has not been built yet')) - } else if (typeof target === 'string') { - var stats - try { - stats = fs.statSync(target) - } catch (e) { - return callback(e) - } - if (stats.isFile()) { - fileImporter(target, callback) - } else if (stats.isDirectory() && options.recursive) { - dirImporter(target, callback) - } else { - return callback(new Error('recursive must be true to add a directory')) - } - } - - function fileImporter (path, callback) { - const stats = fs.statSync(path) - if (stats.size > CHUNK_SIZE) { - const links = [] // { Hash: , Size: , Name: } - fs.createReadStream(path) - .pipe(new FixedSizeChunker(CHUNK_SIZE)) - .pipe(through2((chunk, enc, cb) => { - // TODO: check if this is right (I believe it should be type 'raw' - // https://github.com/ipfs/go-ipfs/issues/2331 - const raw = new UnixFS('file', chunk) - - const node = new mDAG.DAGNode(raw.marshal()) - - dagService.add(node, function (err) { - if (err) { - return log.err(err) - } - links.push({ - Hash: node.multihash(), - Size: node.size(), - leafSize: raw.fileSize(), - Name: '' - }) - cb() - }) - }, (cb) => { - const file = new UnixFS('file') - const parentNode = new mDAG.DAGNode() - links.forEach((l) => { - file.addBlockSize(l.leafSize) - const link = new mDAG.DAGLink(l.Name, l.Size, l.Hash) - parentNode.addRawLink(link) - }) - - parentNode.data = file.marshal() - dagService.add(parentNode, (err) => { - if (err) { - return log.err(err) - } - - const pathSplit = path.split('/') - const fileName = pathSplit[pathSplit.length - 1] +util.inherits(Importer, EE2) - callback(null, { - Hash: parentNode.multihash(), - Size: parentNode.size(), - Name: fileName - }) && cb() - }) - })) - } else { - // create just one file node with the data directly - var buf = fs.readFileSync(path) - const fileUnixFS = new UnixFS('file', buf) - const fileNode = new mDAG.DAGNode(fileUnixFS.marshal()) - - dagService.add(fileNode, (err) => { - if (err) { - return log.err(err) - } - - const split = path.split('/') - const fileName = split[split.length - 1] +function Importer (dagService, options) { + if (!(this instanceof Importer)) { + return new Importer(dagService) + } - callback(null, { - Hash: fileNode.multihash(), - Size: fileNode.size(), - Name: fileName - }) - }) - } + if (!dagService) { + return new Error('must specify a dagService') } - function dirImporter (path, callback) { - const files = fs.readdirSync(path) - const dirUnixFS = new UnixFS('directory') - const dirNode = new mDAG.DAGNode() + const files = [] + var counter = 0 - if (files.length === 0) { - dirNode.data = dirUnixFS.marshal() - dagService.add(dirNode, (err) => { + this.add = (fl) => { + counter++ + if (!fl.stream) { + // 1. create the empty dir dag node + // 2. write it to the dag store + // 3. add to the files array {path: <>, hash: <>} + // 4. emit the path + hash + const d = new UnixFS('directory') + const n = new merkleDAG.DAGNode() + n.data = d.marshal() + dagService.add(n, (err) => { if (err) { - return callback(err) + return this.emit('err', 'Failed to store' + fl.path) + } + const el = { + path: fl.path, + multihash: n.multihash(), + size: n.size(), + dataSize: d.fileSize() } - const split = path.split('/') - const dirName = split[split.length - 1] - - callback(null, { - Hash: dirNode.multihash(), - Size: dirNode.size(), - Name: dirName - }) + files.push(el) + this.emit('file', el) }) return } - async.map( - files, - (file, cb) => { - const filePath = path + '/' + file - const stats = fs.statSync(filePath) - if (stats.isFile()) { - return fileImporter(filePath, cb) - } if (stats.isDirectory()) { - return dirImporter(filePath, cb) - } else { - return cb(new Error('Found a weird file' + path + file)) - } - }, - (err, results) => { - if (err) { - return callback(err) - } - results.forEach((result) => { - dirNode.addRawLink(new mDAG.DAGLink(result.Name, result.Size, result.Hash)) - }) + const leaves = [] - dirNode.data = dirUnixFS.marshal() + fl.stream + .pipe(fsc(CHUNK_SIZE)) + .pipe(through2((chunk, enc, cb) => { + // 1. create the unixfs merkledag node + // 2. add its hash and size to the leafs array - dagService.add(dirNode, (err) => { - if (err) { - return callback(err) - } + // TODO - Support really large files + // a) check if we already reach max chunks if yes + // a.1) create a parent node for all of the current leaves + // b.2) clean up the leaves array and add just the parent node - const split = path.split('/') - const dirName = split[split.length - 1] + const l = new UnixFS('file', chunk) + const n = new merkleDAG.DAGNode(l.marshal()) - callback(null, { - Hash: dirNode.multihash(), - Size: dirNode.size(), - Name: dirName - }) - }) - }) - } - function bufferImporter (buffer, callback) { - const links = [] // { Hash: , Size: , Name: } - if (buffer.length > CHUNK_SIZE) { - var fsc = new FixedSizeChunker(CHUNK_SIZE) - fsc.write(buffer) - fsc.end() - fsc.pipe(through2((chunk, enc, cb) => { - // TODO: check if this is right (I believe it should be type 'raw' - // https://github.com/ipfs/go-ipfs/issues/2331 - const raw = new UnixFS('file', chunk) - const node = new mDAG.DAGNode(raw.marshal()) - - dagService.add(node, function (err) { + dagService.add(n, function (err) { if (err) { - return log.err(err) + return this.emit('err', 'Failed to store chunk of' + fl.path) } - links.push({ - Hash: node.multihash(), - Size: node.size(), - leafSize: raw.fileSize(), + + leaves.push({ + Hash: n.multihash(), + Size: n.size(), + leafSize: l.fileSize(), Name: '' }) + cb() }) }, (cb) => { - const file = new UnixFS('file') - const parentNode = new mDAG.DAGNode() - links.forEach((l) => { - file.addBlockSize(l.leafSize) - const link = new mDAG.DAGLink(l.Name, l.Size, l.Hash) - parentNode.addRawLink(link) + if (leaves.length === 1) { + // 1. add to the files array {path: <>, hash: <>} + // 2. emit the path + hash + + const el = { + path: fl.path, + multihash: leaves[0].Hash, + size: leaves[0].Size, + dataSize: leaves[0].leafSize + } + + files.push(el) + this.emit('file', el) + return done(cb) + } + // 1. create a parent node and add all the leafs + // 2. add to the files array {path: <>, hash: <>} + // 3. emit the path + hash of the parent node + + const f = new UnixFS('file') + const n = new merkleDAG.DAGNode() + + leaves.forEach((leaf) => { + f.addBlockSize(leaf.leafSize) + const l = new merkleDAG.DAGLink(leaf.Name, leaf.Size, leaf.Hash) + n.addRawLink(l) }) - parentNode.data = file.marshal() - dagService.add(parentNode, (err) => { + + n.data = f.marshal() + dagService.add(n, (err) => { if (err) { - return log.err(err) + this.emit('err', 'Failed to store' + fl.path) + return cb() } - callback(null, { - Hash: parentNode.multihash(), - Size: parentNode.size() - }) && cb() + const el = { + path: fl.path, + multihash: n.multihash(), + size: n.size() + // dataSize: f.fileSize() + } + + files.push(el) + this.emit('file', el) + return done(cb) }) })) - } else { - // create just one file node with the data directly - const fileUnixFS = new UnixFS('file', buffer) - const fileNode = new mDAG.DAGNode(fileUnixFS.marshal()) + function done (cb) { + counter-- + cb() + } + } - dagService.add(fileNode, (err) => { - if (err) { - return log.err(err) + this.finish = () => { + // if (files.length === 1) { + // // The file was already emitted, nothing to do here + // return + // } + + if (counter > 0) { + return setTimeout(this.finish, 200) + } + + // file struct + // { + // path: // full path + // multihash: // multihash of the dagNode + // size: // cumulative size + // dataSize: // dagNode size + // } + + // 1) convert files to a tree + // for each path, split, add to a json tree and in the end the name of the + // file points to an object that is has a key multihash and respective value + // { foo: { bar: { baz.txt: }}} + // the stop condition is if the value is not an object + const fileTree = {} + + files.forEach((file) => { + let splitted = file.path.split('/') + if (splitted.length === 1) { + return // adding just one file + // fileTree[file.path] = bs58.encode(file.multihash).toString() + } + if (splitted[0] === '') { + splitted = splitted.slice(1) + } + var tmpTree = fileTree + + for (var i = 0; i < splitted.length; i++) { + if (!tmpTree[splitted[i]]) { + tmpTree[splitted[i]] = {} } + if (i === splitted.length - 1) { + tmpTree[splitted[i]] = file.multihash + } else { + tmpTree = tmpTree[splitted[i]] + } + } + }) - callback(null, { - Hash: fileNode.multihash(), - Size: fileNode.size() - }) + if (Object.keys(fileTree).length === 0) { + return // no dirs to be created + } + + // 2) create a index for multihash: { size, dataSize } so + // that we can fetch these when creating the merkle dag nodes + + const mhIndex = {} + + files.forEach((file) => { + mhIndex[bs58.encode(file.multihash)] = { + size: file.size, + dataSize: file.dataSize + } + }) + + // 3) expand leaves recursively + // create a dirNode + // Object.keys + // If the value is an Object + // create a dir Node + // Object.keys + // Once finished, add the result as a link to the dir node + // If the value is not an object + // add as a link to the dirNode + + function traverse (tree, base) { + const keys = Object.keys(tree) + let tmpTree = tree + keys.map((key) => { + if (typeof tmpTree[key] === 'object' && + !Buffer.isBuffer(tmpTree[key])) { + tmpTree[key] = traverse.call(this, tmpTree[key], base ? base + '/' + key : key) + } + }) + + // at this stage, all keys are multihashes + // create a dir node + // add all the multihashes as links + // return this new node multihash + + const d = new UnixFS('directory') + const n = new merkleDAG.DAGNode() + + keys.forEach((key) => { + const b58mh = bs58.encode(tmpTree[key]) + const l = new merkleDAG.DAGLink( + key, mhIndex[b58mh].size, tmpTree[key]) + n.addRawLink(l) }) + + n.data = d.marshal() + dagService.add(n, (err) => { + if (err) { + this.emit('err', 'failed to store dirNode') + } + }) + + if (!base) { + return + } + + const el = { + path: base, + multihash: n.multihash(), + size: n.size() + // dataSize: '' // f.fileSize() + } + + this.emit('file', el) + + mhIndex[bs58.encode(n.multihash())] = { size: n.size() } + return n.multihash() } + /* const rootHash = */ traverse.call(this, fileTree) + + // TODO + // Since we never shoot for adding multiple directions at the root level, the following might not be necessary, reserving it for later: + // + // if at the first level, there was only one key (most cases) + // do nothing, if there was many, emit a rootHash with '/' + // emit root hash as well (as '/') } - // function streamImporter (stream, callback) {} } diff --git a/test/browser.js b/test/browser.js index 575396f7..76a57ecd 100644 --- a/test/browser.js +++ b/test/browser.js @@ -1,7 +1,6 @@ /* eslint-env mocha */ 'use strict' -const tests = require('./buffer-test') const async = require('async') const store = require('idb-plus-blob-store') const _ = require('lodash') @@ -46,17 +45,9 @@ describe('IPFS data importing tests on the Browser', function () { }) // create the repo constant to be used in the import a small buffer test - const options = { - stores: { - keys: store, - config: store, - datastore: store, - // datastoreLegacy: needs https://github.com/ipfs/js-ipfs-repo/issues/6#issuecomment-164650642 - logs: store, - locks: store, - version: store - } - } - const repo = new IPFSRepo('ipfs', options) - tests(repo) + const repo = new IPFSRepo('ipfs', {stores: store}) + + require('./test-exporter')(repo) + require('./test-importer')(repo) + require('./test-fixed-size-chunker') }) diff --git a/test/buffer-test.js b/test/buffer-test.js deleted file mode 100644 index b057ff4d..00000000 --- a/test/buffer-test.js +++ /dev/null @@ -1,143 +0,0 @@ -/* eslint-env mocha */ -'use strict' - -const unixFSEngine = require('./../src') -const importer = unixFSEngine.importer -const exporter = unixFSEngine.exporter -const BlockService = require('ipfs-blocks').BlockService -const DAGService = require('ipfs-merkle-dag').DAGService -const DAGNode = require('ipfs-merkle-dag').DAGNode -const UnixFS = require('ipfs-unixfs') - -const expect = require('chai').expect - -const smallBuf = require('buffer!./test-data/200Bytes.txt') -const bigBuf = require('buffer!./test-data/1.2MiB.txt') -const bigBlock = require('buffer!./test-data/1.2MiB.txt.block') -const bigLink = require('buffer!./test-data/1.2MiB.txt.link-block0') -const marbuf = require('buffer!./test-data/200Bytes.txt.block') - -module.exports = function (repo) { - describe('layout: importer', function () { - it('import a small buffer', function (done) { - // this is just like "import a small file" - const bs = new BlockService(repo) - const ds = new DAGService(bs) - const buf = smallBuf - importer.import(buf, ds, function (err, stat) { - expect(err).to.not.exist - ds.get(stat.Hash, function (err, node) { - expect(err).to.not.exist - const smallDAGNode = new DAGNode() - smallDAGNode.unMarshal(marbuf) - expect(node.size()).to.equal(smallDAGNode.size()) - expect(node.multihash()).to.deep.equal(smallDAGNode.multihash()) - done() - }) - }) - }) - - it('import a big buffer', function (done) { - // this is just like "import a big file" - const buf = bigBuf - const bs = new BlockService(repo) - const ds = new DAGService(bs) - importer.import(buf, ds, function (err, stat) { - expect(err).to.not.exist - ds.get(stat.Hash, function (err, node) { - expect(err).to.not.exist - const bigDAGNode = new DAGNode() - bigDAGNode.unMarshal(bigBlock) - expect(node.size()).to.equal(bigDAGNode.size()) - expect(node.links).to.deep.equal(bigDAGNode.links) - - const nodeUnixFS = UnixFS.unmarshal(node.data) - const bigDAGNodeUnixFS = UnixFS.unmarshal(bigDAGNode.data) - expect(nodeUnixFS.type).to.equal(bigDAGNodeUnixFS.type) - expect(nodeUnixFS.data).to.deep.equal(bigDAGNodeUnixFS.data) - expect(nodeUnixFS.blockSizes).to.deep.equal(bigDAGNodeUnixFS.blockSizes) - expect(nodeUnixFS.fileSize()).to.equal(bigDAGNodeUnixFS.fileSize()) - - expect(node.data).to.deep.equal(bigDAGNode.data) - expect(node.multihash()).to.deep.equal(bigDAGNode.multihash()) - - ds.get(node.links[0].hash, function (err, node) { - expect(err).to.not.exist - const leaf = new DAGNode() - - const marbuf2 = bigLink - leaf.unMarshal(marbuf2) - expect(node.links).to.deep.equal(leaf.links) - expect(node.links.length).to.equal(0) - expect(leaf.links.length).to.equal(0) - expect(leaf.marshal()).to.deep.equal(marbuf2) - const nodeUnixFS = UnixFS.unmarshal(node.data) - const leafUnixFS = UnixFS.unmarshal(leaf.data) - expect(nodeUnixFS.type).to.equal(leafUnixFS.type) - expect(nodeUnixFS.fileSize()).to.equal(leafUnixFS.fileSize()) - expect(nodeUnixFS.data).to.deep.equal(leafUnixFS.data) - expect(nodeUnixFS.blockSizes).to.deep.equal(leafUnixFS.blockSizes) - expect(node.data).to.deep.equal(leaf.data) - expect(node.marshal()).to.deep.equal(leaf.marshal()) - done() - }) - }) - }) - }) - - it('export a file with no links', (done) => { - const hash = 'QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8' - const bs = new BlockService(repo) - const ds = new DAGService(bs) - const testExport = exporter(hash, ds) - testExport.on('file', (data) => { - ds.get(hash, (err, fetchedNode) => { - expect(err).to.not.exist - const unmarsh = UnixFS.unmarshal(fetchedNode.data) - expect(unmarsh.data).to.deep.equal(data.stream._readableState.buffer[0]) - done() - }) - }) - }) - - it('export a small file with links', (done) => { - const hash = 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q' - const bs = new BlockService(repo) - const ds = new DAGService(bs) - const testExport = exporter(hash, ds) - testExport.on('file', (data) => { - expect(data.stream).to.exist - done() - }) - }) - - it('export a large file > 5mb', (done) => { - const hash = 'QmRQgufjp9vLE8XK2LGKZSsPCFCF6e4iynCQtNB5X2HBKE' - const bs = new BlockService(repo) - const ds = new DAGService(bs) - const testExport = exporter(hash, ds) - testExport.on('file', (data) => { - expect(data.stream).to.exist - done() - }) - }) - - it('export a directory', (done) => { - const hash = 'QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN' - const bs = new BlockService(repo) - const ds = new DAGService(bs) - const testExport = exporter(hash, ds) - var fs = [] - testExport.on('file', (data) => { - fs.push(data) - }) - setTimeout(() => { - expect(fs[0].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/200Bytes.txt') - expect(fs[1].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/dir-another') - expect(fs[2].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/level-1/200Bytes.txt') - expect(fs[3].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/level-1/level-2') - done() - }, 1000) - }) - }) -} diff --git a/test/node.js b/test/node.js index 94d254b6..1d4a9edf 100644 --- a/test/node.js +++ b/test/node.js @@ -1,5 +1,4 @@ /* eslint-env mocha */ - 'use strict' const fs = require('fs') @@ -7,6 +6,8 @@ const ncp = require('ncp').ncp const rimraf = require('rimraf') const expect = require('chai').expect const path = require('path') +const IPFSRepo = require('ipfs-repo') +const fsbs = require('fs-blob-store') describe('core', () => { const repoExample = path.join(process.cwd(), '/test/repo-example') @@ -20,6 +21,21 @@ describe('core', () => { }) }) + before((done) => { + fs.stat(path.join(__dirname, '/test-data/dir-nested/dir-another'), (err, exists) => { + if (err) { + fs.mkdirSync(path.join(__dirname, '/test-data/dir-nested/dir-another')) + } + }) + + fs.stat(path.join(__dirname, '/test-data/dir-nested/level-1/level-2'), (err, exists) => { + if (err) { + fs.mkdirSync(path.join(__dirname, '/test-data/dir-nested/level-1/level-2')) + } + done() + }) + }) + after((done) => { rimraf(repoTests, (err) => { expect(err).to.equal(null) @@ -27,19 +43,8 @@ describe('core', () => { }) }) - const tests = fs.readdirSync(__dirname) - tests.filter((file) => { - if (file === 'index.js' || - file === 'browser.js' || - file === 'test-data' || - file === 'repo-example' || - file === 'buffer-test.js' || - file.indexOf('repo-tests') > -1) { - return false - } - - return true - }).forEach((file) => { - require('./' + file) - }) + const repo = new IPFSRepo(repoTests, {stores: fsbs}) + require('./test-exporter')(repo) + require('./test-importer')(repo) + require('./test-fixed-size-chunker') }) diff --git a/test/repo-example/blocks/122000e5/122000e508d684a83e258b5230e5791d6c35dc3c287dbcc8ea26bb3bcf3d7c4ad942.data b/test/repo-example/blocks/122000e5/122000e508d684a83e258b5230e5791d6c35dc3c287dbcc8ea26bb3bcf3d7c4ad942.data new file mode 100644 index 00000000..38a7ed3a Binary files /dev/null and b/test/repo-example/blocks/122000e5/122000e508d684a83e258b5230e5791d6c35dc3c287dbcc8ea26bb3bcf3d7c4ad942.data differ diff --git a/test/repo-example/blocks/1220039c/1220039c0842ef4f653b86630496e6d686b63271de581cedfb1de84bdea6f504ec12.data b/test/repo-example/blocks/1220039c/1220039c0842ef4f653b86630496e6d686b63271de581cedfb1de84bdea6f504ec12.data new file mode 100644 index 00000000..1d48c015 Binary files /dev/null and b/test/repo-example/blocks/1220039c/1220039c0842ef4f653b86630496e6d686b63271de581cedfb1de84bdea6f504ec12.data differ diff --git a/test/repo-example/blocks/12200975/12200975fdafa3ecdb026118837fe67a9ed6ed11ef5aacd61a516cddf519b1cb56e1.data b/test/repo-example/blocks/12200975/12200975fdafa3ecdb026118837fe67a9ed6ed11ef5aacd61a516cddf519b1cb56e1.data new file mode 100644 index 00000000..cbd601a6 Binary files /dev/null and b/test/repo-example/blocks/12200975/12200975fdafa3ecdb026118837fe67a9ed6ed11ef5aacd61a516cddf519b1cb56e1.data differ diff --git a/test/repo-example/blocks/12200d06/12200d06d4afb85a411662dc882c52c9c79e7422bec62c066f8215705880b6d3a29c.data b/test/repo-example/blocks/12200d06/12200d06d4afb85a411662dc882c52c9c79e7422bec62c066f8215705880b6d3a29c.data new file mode 100644 index 00000000..72674694 Binary files /dev/null and b/test/repo-example/blocks/12200d06/12200d06d4afb85a411662dc882c52c9c79e7422bec62c066f8215705880b6d3a29c.data differ diff --git a/test/repo-example/blocks/12200e72/12200e725b1743efb7d00acec61eaf7ba84fafc2a0443cd606301d8018bb79d7b41e.data b/test/repo-example/blocks/12200e72/12200e725b1743efb7d00acec61eaf7ba84fafc2a0443cd606301d8018bb79d7b41e.data new file mode 100644 index 00000000..f9810363 --- /dev/null +++ b/test/repo-example/blocks/12200e72/12200e725b1743efb7d00acec61eaf7ba84fafc2a0443cd606301d8018bb79d7b41e.data @@ -0,0 +1,4728 @@ + +����There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some�� \ No newline at end of file diff --git a/test/repo-example/blocks/1220141a/1220141a2aa747a6b67ff33e0f68be055ed1dde2f90350090a781b6bed84cf4ae810.data b/test/repo-example/blocks/1220141a/1220141a2aa747a6b67ff33e0f68be055ed1dde2f90350090a781b6bed84cf4ae810.data new file mode 100644 index 00000000..c3a2f685 Binary files /dev/null and b/test/repo-example/blocks/1220141a/1220141a2aa747a6b67ff33e0f68be055ed1dde2f90350090a781b6bed84cf4ae810.data differ diff --git a/test/repo-example/blocks/1220184e/1220184e2152a396caf16f41a1cc4bec30817fb2ab953ca645c57e83ac431cfc8a6a.data b/test/repo-example/blocks/1220184e/1220184e2152a396caf16f41a1cc4bec30817fb2ab953ca645c57e83ac431cfc8a6a.data new file mode 100644 index 00000000..4e910622 Binary files /dev/null and b/test/repo-example/blocks/1220184e/1220184e2152a396caf16f41a1cc4bec30817fb2ab953ca645c57e83ac431cfc8a6a.data differ diff --git a/test/repo-example/blocks/12201dab/12201dab1bf33e76651d0ae2f63dbf5b9e71e979b34c1cc7a19b8276d8c11966027c.data b/test/repo-example/blocks/12201dab/12201dab1bf33e76651d0ae2f63dbf5b9e71e979b34c1cc7a19b8276d8c11966027c.data new file mode 100644 index 00000000..0b520379 Binary files /dev/null and b/test/repo-example/blocks/12201dab/12201dab1bf33e76651d0ae2f63dbf5b9e71e979b34c1cc7a19b8276d8c11966027c.data differ diff --git a/test/repo-example/blocks/1220204e/1220204e693ccb04abd065623b9f2182eb3b9e398e8db8f5d4d9be789213c2b6a9aa.data b/test/repo-example/blocks/1220204e/1220204e693ccb04abd065623b9f2182eb3b9e398e8db8f5d4d9be789213c2b6a9aa.data new file mode 100644 index 00000000..a4027d46 Binary files /dev/null and b/test/repo-example/blocks/1220204e/1220204e693ccb04abd065623b9f2182eb3b9e398e8db8f5d4d9be789213c2b6a9aa.data differ diff --git a/test/repo-example/blocks/122023e3/122023e3cf165402916213caef9870f26b75881cf86c1e7c7204fa35b55917021aa7.data b/test/repo-example/blocks/122023e3/122023e3cf165402916213caef9870f26b75881cf86c1e7c7204fa35b55917021aa7.data new file mode 100644 index 00000000..9cda061b Binary files /dev/null and b/test/repo-example/blocks/122023e3/122023e3cf165402916213caef9870f26b75881cf86c1e7c7204fa35b55917021aa7.data differ diff --git a/test/repo-example/blocks/12202418/12202418e7fe47e72ca241ee347bb0afcf78845a2f2e1a9b8aa0a70403446013f817.data b/test/repo-example/blocks/12202418/12202418e7fe47e72ca241ee347bb0afcf78845a2f2e1a9b8aa0a70403446013f817.data new file mode 100644 index 00000000..a655cf83 --- /dev/null +++ b/test/repo-example/blocks/12202418/12202418e7fe47e72ca241ee347bb0afcf78845a2f2e1a9b8aa0a70403446013f817.data @@ -0,0 +1,5 @@ + +���wx���xM��{ +D���zH/&^�� ��RS���/��v,��R +�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�K E�~J ֕읝�o�j��b�n3�eT�)D+;s +컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e�� \ No newline at end of file diff --git a/test/repo-example/blocks/12202562/12202562b552a79e9ff1184ee0c7fa50f39fe3810564c6a261b20cd04205372941a6.data b/test/repo-example/blocks/12202562/12202562b552a79e9ff1184ee0c7fa50f39fe3810564c6a261b20cd04205372941a6.data new file mode 100644 index 00000000..b99ceb21 --- /dev/null +++ b/test/repo-example/blocks/12202562/12202562b552a79e9ff1184ee0c7fa50f39fe3810564c6a261b20cd04205372941a6.data @@ -0,0 +1,3 @@ +, +" ��`�u�>/2��l il�f��YB�'M%���bar� + \ No newline at end of file diff --git a/test/repo-example/blocks/1220259c/1220259cae55bae8fa6c5b8945839ac89a8a1fa03369f8f5c204913a2913905fad04.data b/test/repo-example/blocks/1220259c/1220259cae55bae8fa6c5b8945839ac89a8a1fa03369f8f5c204913a2913905fad04.data new file mode 100644 index 00000000..70641f44 --- /dev/null +++ b/test/repo-example/blocks/1220259c/1220259cae55bae8fa6c5b8945839ac89a8a1fa03369f8f5c204913a2913905fad04.data @@ -0,0 +1,5 @@ + +����wx���xM��{ +D���zH/&^�� ��RS���/��v,��R +�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�K E�~J ֕읝�o�j��b�n3�eT�)D+;s +컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e��� \ No newline at end of file diff --git a/test/repo-example/blocks/122028d0/122028d0abf61304b10a47837f9f33d87304d79d20d50b8b00a127b8a4fd18e9b237.data b/test/repo-example/blocks/122028d0/122028d0abf61304b10a47837f9f33d87304d79d20d50b8b00a127b8a4fd18e9b237.data new file mode 100644 index 00000000..e705b9b0 Binary files /dev/null and b/test/repo-example/blocks/122028d0/122028d0abf61304b10a47837f9f33d87304d79d20d50b8b00a127b8a4fd18e9b237.data differ diff --git a/test/repo-example/blocks/12202a59/12202a59b6b978159fe9c8457654d80cba54bcc9f5bd27eac8cf15f7a6ad271174ed.data b/test/repo-example/blocks/12202a59/12202a59b6b978159fe9c8457654d80cba54bcc9f5bd27eac8cf15f7a6ad271174ed.data new file mode 100644 index 00000000..00360cfb --- /dev/null +++ b/test/repo-example/blocks/12202a59/12202a59b6b978159fe9c8457654d80cba54bcc9f5bd27eac8cf15f7a6ad271174ed.data @@ -0,0 +1,3 @@ +4 +" ��U��Fr���n����b��⇾�?��|<� test-data��� + \ No newline at end of file diff --git a/test/repo-example/blocks/12202cb7/12202cb76f3bebf2a211cea2c40a935710ce084e5293cec018d315be10447b6b6b71.data b/test/repo-example/blocks/12202cb7/12202cb76f3bebf2a211cea2c40a935710ce084e5293cec018d315be10447b6b6b71.data new file mode 100644 index 00000000..81663143 Binary files /dev/null and b/test/repo-example/blocks/12202cb7/12202cb76f3bebf2a211cea2c40a935710ce084e5293cec018d315be10447b6b6b71.data differ diff --git a/test/repo-example/blocks/1220350a/1220350acf62bdbd344db9a98e4e7cf76710b329ede24582feef6db74baafe4f500d.data b/test/repo-example/blocks/1220350a/1220350acf62bdbd344db9a98e4e7cf76710b329ede24582feef6db74baafe4f500d.data new file mode 100644 index 00000000..ee87b15f --- /dev/null +++ b/test/repo-example/blocks/1220350a/1220350acf62bdbd344db9a98e4e7cf76710b329ede24582feef6db74baafe4f500d.data @@ -0,0 +1,1452 @@ + +����l systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + + +�� \ No newline at end of file diff --git a/test/repo-example/blocks/12203706/12203706326544f96d4bafa6ab0bd2a1efd89ba605b2ee188db80354e372f42637fd.data b/test/repo-example/blocks/12203706/12203706326544f96d4bafa6ab0bd2a1efd89ba605b2ee188db80354e372f42637fd.data new file mode 100644 index 00000000..bb713c56 Binary files /dev/null and b/test/repo-example/blocks/12203706/12203706326544f96d4bafa6ab0bd2a1efd89ba605b2ee188db80354e372f42637fd.data differ diff --git a/test/repo-example/blocks/12203b5c/12203b5c1250bc4f33b8a1ecb29363eba288017410ee5300cdb50615b0e2693edac6.data b/test/repo-example/blocks/12203b5c/12203b5c1250bc4f33b8a1ecb29363eba288017410ee5300cdb50615b0e2693edac6.data new file mode 100644 index 00000000..64ce0aeb Binary files /dev/null and b/test/repo-example/blocks/12203b5c/12203b5c1250bc4f33b8a1ecb29363eba288017410ee5300cdb50615b0e2693edac6.data differ diff --git a/test/repo-example/blocks/12203ff8/12203ff87b7c43c3fa04f0be34df5ae62f82914b235e731c897c33fd6f3c5f11406d.data b/test/repo-example/blocks/12203ff8/12203ff87b7c43c3fa04f0be34df5ae62f82914b235e731c897c33fd6f3c5f11406d.data new file mode 100644 index 00000000..46fecabf --- /dev/null +++ b/test/repo-example/blocks/12203ff8/12203ff87b7c43c3fa04f0be34df5ae62f82914b235e731c897c33fd6f3c5f11406d.data @@ -0,0 +1,2 @@ + +����Q������� \ No newline at end of file diff --git a/test/repo-example/blocks/122040f1/122040f13ab05f941ce841b8b28b7578a8bfa228153c370f81b4bc62ba0d66640555.data b/test/repo-example/blocks/122040f1/122040f13ab05f941ce841b8b28b7578a8bfa228153c370f81b4bc62ba0d66640555.data new file mode 100644 index 00000000..9f1e7af6 Binary files /dev/null and b/test/repo-example/blocks/122040f1/122040f13ab05f941ce841b8b28b7578a8bfa228153c370f81b4bc62ba0d66640555.data differ diff --git a/test/repo-example/blocks/122044fb/122044fbd45a9685c6b9d7f332982816e81aacdcfb0b7a742d7ce8d70fd3d62972b9.data b/test/repo-example/blocks/122044fb/122044fbd45a9685c6b9d7f332982816e81aacdcfb0b7a742d7ce8d70fd3d62972b9.data new file mode 100644 index 00000000..2a6dbb58 --- /dev/null +++ b/test/repo-example/blocks/122044fb/122044fbd45a9685c6b9d7f332982816e81aacdcfb0b7a742d7ce8d70fd3d62972b9.data @@ -0,0 +1,2 @@ + + ��L �� �� �� �� ��  \ No newline at end of file diff --git a/test/repo-example/blocks/12204849/12204849953dbcc44598f39816866c6d96355f33eaf1004d61ab1880fdeb3edde2f0.data b/test/repo-example/blocks/12204849/12204849953dbcc44598f39816866c6d96355f33eaf1004d61ab1880fdeb3edde2f0.data new file mode 100644 index 00000000..bfe6600f Binary files /dev/null and b/test/repo-example/blocks/12204849/12204849953dbcc44598f39816866c6d96355f33eaf1004d61ab1880fdeb3edde2f0.data differ diff --git a/test/repo-example/blocks/12204f7b/12204f7b3de7c738dd8c5eaeba868fa71f6e1cd8d9dae9eb43d7e562597f0b38a8dc.data b/test/repo-example/blocks/12204f7b/12204f7b3de7c738dd8c5eaeba868fa71f6e1cd8d9dae9eb43d7e562597f0b38a8dc.data new file mode 100644 index 00000000..74f62a02 --- /dev/null +++ b/test/repo-example/blocks/12204f7b/12204f7b3de7c738dd8c5eaeba868fa71f6e1cd8d9dae9eb43d7e562597f0b38a8dc.data @@ -0,0 +1,3 @@ + + +�'Q��#�� diff --git a/test/repo-example/blocks/122066df/122066df09f34f09cdb6c7c9f62dd5c8fa1895895ecfafc48898434b52285426ffc6.data b/test/repo-example/blocks/122066df/122066df09f34f09cdb6c7c9f62dd5c8fa1895895ecfafc48898434b52285426ffc6.data new file mode 100644 index 00000000..42c502e2 Binary files /dev/null and b/test/repo-example/blocks/122066df/122066df09f34f09cdb6c7c9f62dd5c8fa1895895ecfafc48898434b52285426ffc6.data differ diff --git a/test/repo-example/blocks/12206990/1220699077ec4b2a853f2c007964cc174ee772e6e9b20cc73d2039b6a950226f60e3.data b/test/repo-example/blocks/12206990/1220699077ec4b2a853f2c007964cc174ee772e6e9b20cc73d2039b6a950226f60e3.data new file mode 100644 index 00000000..a9c1c069 Binary files /dev/null and b/test/repo-example/blocks/12206990/1220699077ec4b2a853f2c007964cc174ee772e6e9b20cc73d2039b6a950226f60e3.data differ diff --git a/test/repo-example/blocks/12206bbb/12206bbba0768a844ab1194e5876dfb6ce4399b2ea87a4b718c9850db60faeb50105.data b/test/repo-example/blocks/12206bbb/12206bbba0768a844ab1194e5876dfb6ce4399b2ea87a4b718c9850db60faeb50105.data new file mode 100644 index 00000000..96566028 --- /dev/null +++ b/test/repo-example/blocks/12206bbb/12206bbba0768a844ab1194e5876dfb6ce4399b2ea87a4b718c9850db60faeb50105.data @@ -0,0 +1,6 @@ + +��5 +" $��G�,�A�4{���x�Z/.����D`� 200Bytes.txt�3 +" Y��9_)a���˹2�R�m�Ŗke�9�� dir-another0 +" Ty�5 ;_9Yf�q��F�Lhyl���/��level-1� +� \ No newline at end of file diff --git a/test/repo-example/blocks/12207248/12207248d65fb34acf915665eaeb29e2075fc63e678b0281c5b5dc9c36b199e6c051.data b/test/repo-example/blocks/12207248/12207248d65fb34acf915665eaeb29e2075fc63e678b0281c5b5dc9c36b199e6c051.data new file mode 100644 index 00000000..f4c039c2 Binary files /dev/null and b/test/repo-example/blocks/12207248/12207248d65fb34acf915665eaeb29e2075fc63e678b0281c5b5dc9c36b199e6c051.data differ diff --git a/test/repo-example/blocks/12207369/12207369ddf6b9bb22adc2b957eb3ca7f6a6f3477c85b665f21db50834091f902033.data b/test/repo-example/blocks/12207369/12207369ddf6b9bb22adc2b957eb3ca7f6a6f3477c85b665f21db50834091f902033.data new file mode 100644 index 00000000..f57749f0 Binary files /dev/null and b/test/repo-example/blocks/12207369/12207369ddf6b9bb22adc2b957eb3ca7f6a6f3477c85b665f21db50834091f902033.data differ diff --git a/test/repo-example/blocks/12207395/12207395ead6f5161ddf1dc041c8640cb41a635756afb5a66fe49e8675b80a1b49ef.data b/test/repo-example/blocks/12207395/12207395ead6f5161ddf1dc041c8640cb41a635756afb5a66fe49e8675b80a1b49ef.data new file mode 100644 index 00000000..5a3836e9 Binary files /dev/null and b/test/repo-example/blocks/12207395/12207395ead6f5161ddf1dc041c8640cb41a635756afb5a66fe49e8675b80a1b49ef.data differ diff --git a/test/repo-example/blocks/1220742e/1220742ede16a7d165f4b000e5f4094933b61d3d0b6ffb3cb6ec5b3eed32c0f2a38f.data b/test/repo-example/blocks/1220742e/1220742ede16a7d165f4b000e5f4094933b61d3d0b6ffb3cb6ec5b3eed32c0f2a38f.data new file mode 100644 index 00000000..e845c839 Binary files /dev/null and b/test/repo-example/blocks/1220742e/1220742ede16a7d165f4b000e5f4094933b61d3d0b6ffb3cb6ec5b3eed32c0f2a38f.data differ diff --git a/test/repo-example/blocks/1220751e/1220751e71f050b51a74159c51829b210f326e73bc4f0a712e1f636625a8515426d8.data b/test/repo-example/blocks/1220751e/1220751e71f050b51a74159c51829b210f326e73bc4f0a712e1f636625a8515426d8.data new file mode 100644 index 00000000..44403205 --- /dev/null +++ b/test/repo-example/blocks/1220751e/1220751e71f050b51a74159c51829b210f326e73bc4f0a712e1f636625a8515426d8.data @@ -0,0 +1,3 @@ + + +x\�΃�� diff --git a/test/repo-example/blocks/1220783d/1220783d550428fcd841a9579a08e0d10619a4238b6acdc73c8cf6932120e1f6e2df.data b/test/repo-example/blocks/1220783d/1220783d550428fcd841a9579a08e0d10619a4238b6acdc73c8cf6932120e1f6e2df.data new file mode 100644 index 00000000..8c345f38 --- /dev/null +++ b/test/repo-example/blocks/1220783d/1220783d550428fcd841a9579a08e0d10619a4238b6acdc73c8cf6932120e1f6e2df.data @@ -0,0 +1,4732 @@ + +���� systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among th�� \ No newline at end of file diff --git a/test/repo-example/blocks/1220797b/1220797bea239eddf7c11a47143062975b00b162ab286a019a0db2da5abfb67e516e.data b/test/repo-example/blocks/1220797b/1220797bea239eddf7c11a47143062975b00b162ab286a019a0db2da5abfb67e516e.data new file mode 100644 index 00000000..4741988d --- /dev/null +++ b/test/repo-example/blocks/1220797b/1220797bea239eddf7c11a47143062975b00b162ab286a019a0db2da5abfb67e516e.data @@ -0,0 +1,4729 @@ + +����e academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others �� \ No newline at end of file diff --git a/test/repo-example/blocks/12207a19/12207a19de7e4b4d9e5db771aeca6fff1cc9f6518a7375f1671c61b9219f689c3851.data b/test/repo-example/blocks/12207a19/12207a19de7e4b4d9e5db771aeca6fff1cc9f6518a7375f1671c61b9219f689c3851.data new file mode 100644 index 00000000..6d043733 --- /dev/null +++ b/test/repo-example/blocks/12207a19/12207a19de7e4b4d9e5db771aeca6fff1cc9f6518a7375f1671c61b9219f689c3851.data @@ -0,0 +1,2 @@ + +r�ː�'Q��#�� \ No newline at end of file diff --git a/test/repo-example/blocks/12207f93/12207f93b47af638189785c73d414d653468aedcdccff814e89ac755a5502e7041e5.data b/test/repo-example/blocks/12207f93/12207f93b47af638189785c73d414d653468aedcdccff814e89ac755a5502e7041e5.data new file mode 100644 index 00000000..be380799 --- /dev/null +++ b/test/repo-example/blocks/12207f93/12207f93b47af638189785c73d414d653468aedcdccff814e89ac755a5502e7041e5.data @@ -0,0 +1,4730 @@ + +����[7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successfu�� \ No newline at end of file diff --git a/test/repo-example/blocks/1220853a/1220853a6e9a61a6943c06d8c59d590cdae42b700677790812ce68f1eb2389e6ea23.data b/test/repo-example/blocks/1220853a/1220853a6e9a61a6943c06d8c59d590cdae42b700677790812ce68f1eb2389e6ea23.data new file mode 100644 index 00000000..871a6bf0 --- /dev/null +++ b/test/repo-example/blocks/1220853a/1220853a6e9a61a6943c06d8c59d590cdae42b700677790812ce68f1eb2389e6ea23.data @@ -0,0 +1,4729 @@ + +���� +����There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. + +There have been many attempts at constructing a global +distributed file system. Some systems have seen signifi- +cant success, and others failed completely. Among the academic +attempts, AFS [6] has succeeded widely and is still +in use today. Others [7, ?] have not attained the same +success. Outside of academia, the most successful systems +have been peer-to-peer file-sharing applications primarily +geared toward large media (audio and video). Most notably, +Napster, KaZaA, and BitTorrent [2] deployed large +file distribution systems supporting over 100 million simultaneous +users. Even today, BitTorrent maintains a massive +deployment where tens of millions of nodes churn daily [16]. +These applications saw greater numbers of users and files distributed +than their academic file system counterparts. However, +the applications were not designed as infrastructure to +be built upon. While there have been successful repurposings1 +, no general file-system has emerged that offers global, +low-latency, and decentralized distribution. +Perhaps this is because a “good enough” system for most +use cases already exists: HTTP. By far, HTTP is the most +successful “distributed system of files” ever deployed. Coupled +with the browser, HTTP has had enormous technical +and social impact. It has become the de facto way to transmit +files across the internet. Yet, it fails to take advantage +of dozens of brilliant file distribution techniques invented in +the last fifteen years. From one prespective, evolving Web +infrastructure is near-impossible, given the number of backwards +compatibility constraints and the number of strong +1For example, Linux distributions use BitTorrent to transmit +disk images, and Blizzard, Inc. uses it to distribute +video game content. +parties invested in the current model. But from another perspective, +new protocols have emerged and gained wide use +since the emergence of HTTP. What is lacking is upgrading +design: enhancing the current HTTP web, and introducing +new functionality without degrading user experience. +Industry has gotten away with using HTTP this long because +moving small files around is relatively cheap, even for +small organizations with lots of traffic. But we are entering +a new era of data distribution with new challenges: (a) +hosting and distributing petabyte datasets, (b) computing +on large data across organizations, (c) high-volume highdefinition +on-demand or real-time media streams, (d) versioning +and linking of massive datasets, (e) preventing accidental +disappearance of important files, and more. Many +of these can be boiled down to “lots of data, accessible everywhere.” +Pressed by critical features and bandwidth concerns, +we have already given up HTTP for different data +distribution protocols. The next step is making them part +of the Web itself. +Orthogonal to efficient data distribution, version control +systems have managed to develop important data collaboration +workflows. Git, the distributed source code version +control system, developed many useful ways to model and +implement distributed data operations. The Git toolchain +offers versatile versioning functionality that large file distribution +systems severely lack. New solutions inspired by Git +are emerging, such as Camlistore [?], a personal file storage +system, and Dat [?] a data collaboration toolchain +and dataset package manager. Git has already influenced +distributed filesystem design [9], as its content addressed +Merkle DAG data model enables powerful file distribution +strategies. What remains to be explored is how this data +structure can influence the design of high-throughput oriented +file systems, and how it might upgrade the Web itself. +This paper introduces IPFS, a novel peer-to-peer versioncontrolled +filesystem seeking to reconcile these issues. IPFS +synthesizes learnings from many past successful systems. +Careful interface-focused integration yields a system greater +than the sum of its parts. The central IPFS principle is +modeling all data as part of the same Merkle DAG. +There have been many attempts at constructing a global +distributed file sy�� \ No newline at end of file diff --git a/test/repo-example/blocks/12208ef4/12208ef4750c671599c429b6d952b4a8c5d2d761827c119bd382efdcb0773ec435.data b/test/repo-example/blocks/12208ef4/12208ef4750c671599c429b6d952b4a8c5d2d761827c119bd382efdcb0773ec435.data new file mode 100644 index 00000000..7c40850f Binary files /dev/null and b/test/repo-example/blocks/12208ef4/12208ef4750c671599c429b6d952b4a8c5d2d761827c119bd382efdcb0773ec435.data differ diff --git a/test/repo-example/blocks/1220941f/1220941f8cb777c014ea4f229b48038414fea4bfe9523e9c06d15b4d731d30d6d624.data b/test/repo-example/blocks/1220941f/1220941f8cb777c014ea4f229b48038414fea4bfe9523e9c06d15b4d731d30d6d624.data new file mode 100644 index 00000000..637f391c --- /dev/null +++ b/test/repo-example/blocks/1220941f/1220941f8cb777c014ea4f229b48038414fea4bfe9523e9c06d15b4d731d30d6d624.data @@ -0,0 +1,2 @@ + +����x\�΃�� \ No newline at end of file diff --git a/test/repo-example/blocks/122095a8/122095a86a3b176d88282a3c56b77118e0986ff4784eb0c864e21c306dc6a632d5b9.data b/test/repo-example/blocks/122095a8/122095a86a3b176d88282a3c56b77118e0986ff4784eb0c864e21c306dc6a632d5b9.data new file mode 100644 index 00000000..ca141be2 Binary files /dev/null and b/test/repo-example/blocks/122095a8/122095a86a3b176d88282a3c56b77118e0986ff4784eb0c864e21c306dc6a632d5b9.data differ diff --git a/test/repo-example/blocks/12209c37/12209c376abcbaf28dbb16e8a44f24043a4d67324293d32caac64fd374cf73f3194a.data b/test/repo-example/blocks/12209c37/12209c376abcbaf28dbb16e8a44f24043a4d67324293d32caac64fd374cf73f3194a.data new file mode 100644 index 00000000..562529a2 Binary files /dev/null and b/test/repo-example/blocks/12209c37/12209c376abcbaf28dbb16e8a44f24043a4d67324293d32caac64fd374cf73f3194a.data differ diff --git a/test/repo-example/blocks/12209e8e/12209e8eabd807dd930cb0606fb18010e98b2469efc12f55077b1d64e485c8e81255.data b/test/repo-example/blocks/12209e8e/12209e8eabd807dd930cb0606fb18010e98b2469efc12f55077b1d64e485c8e81255.data new file mode 100644 index 00000000..725a9b22 --- /dev/null +++ b/test/repo-example/blocks/12209e8e/12209e8eabd807dd930cb0606fb18010e98b2469efc12f55077b1d64e485c8e81255.data @@ -0,0 +1,5 @@ + +@:4 +" si����"�¹W�<����G|��e��4 � 3 +1.2MiB.txt��L +: \ No newline at end of file diff --git a/test/repo-example/blocks/1220a89a/1220a89aa29de372b8989fe4ce0843acd69557211293a56bb0a1c7218bbc2050a5d1.data b/test/repo-example/blocks/1220a89a/1220a89aa29de372b8989fe4ce0843acd69557211293a56bb0a1c7218bbc2050a5d1.data new file mode 100644 index 00000000..69e8f9e4 --- /dev/null +++ b/test/repo-example/blocks/1220a89a/1220a89aa29de372b8989fe4ce0843acd69557211293a56bb0a1c7218bbc2050a5d1.data @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/test/repo-example/blocks/1220aebc/1220aebc7f9014417e6f5e2f2d9330882b8995f1ee71e3cd1f18e003781983ae8169.data b/test/repo-example/blocks/1220aebc/1220aebc7f9014417e6f5e2f2d9330882b8995f1ee71e3cd1f18e003781983ae8169.data new file mode 100644 index 00000000..1067edb4 --- /dev/null +++ b/test/repo-example/blocks/1220aebc/1220aebc7f9014417e6f5e2f2d9330882b8995f1ee71e3cd1f18e003781983ae8169.data @@ -0,0 +1,6 @@ + +�� +���wx���xM��{ +D���zH/&^�� ��RS���/��v,��R +�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�K E�~J ֕읝�o�j��b�n3�eT�)D+;s +컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e��� \ No newline at end of file diff --git a/test/repo-example/blocks/1220aee7/1220aee755c2de4672ffe9ad07eb6eaff7f3eb62c5c119e287be963fedf07c193cbf.data b/test/repo-example/blocks/1220aee7/1220aee755c2de4672ffe9ad07eb6eaff7f3eb62c5c119e287be963fedf07c193cbf.data new file mode 100644 index 00000000..c57d7186 Binary files /dev/null and b/test/repo-example/blocks/1220aee7/1220aee755c2de4672ffe9ad07eb6eaff7f3eb62c5c119e287be963fedf07c193cbf.data differ diff --git a/test/repo-example/blocks/1220b106/1220b106e54902a7d812437110f5c1b8dd9dbc3758d306c6adc0fe7a04d5c8fd3d3c.data b/test/repo-example/blocks/1220b106/1220b106e54902a7d812437110f5c1b8dd9dbc3758d306c6adc0fe7a04d5c8fd3d3c.data new file mode 100644 index 00000000..ce734230 --- /dev/null +++ b/test/repo-example/blocks/1220b106/1220b106e54902a7d812437110f5c1b8dd9dbc3758d306c6adc0fe7a04d5c8fd3d3c.data @@ -0,0 +1,4 @@ +4 +" si����"�¹W�<����G|��e��4 � 3 +1.2MiB.txt��L + \ No newline at end of file diff --git a/test/repo-example/blocks/1220b2c2/1220b2c2d9773a8316428106953b22061a79b6a3f36b7abed609c13a6b06e13c716c.data b/test/repo-example/blocks/1220b2c2/1220b2c2d9773a8316428106953b22061a79b6a3f36b7abed609c13a6b06e13c716c.data new file mode 100644 index 00000000..df20559d Binary files /dev/null and b/test/repo-example/blocks/1220b2c2/1220b2c2d9773a8316428106953b22061a79b6a3f36b7abed609c13a6b06e13c716c.data differ diff --git a/test/repo-example/blocks/1220b42c/1220b42c0cb8915e874e6909edcd67aaa5dd0fba77fe8ee9262b8ffa6c02ab94d033.data b/test/repo-example/blocks/1220b42c/1220b42c0cb8915e874e6909edcd67aaa5dd0fba77fe8ee9262b8ffa6c02ab94d033.data new file mode 100644 index 00000000..6a0cbe82 --- /dev/null +++ b/test/repo-example/blocks/1220b42c/1220b42c0cb8915e874e6909edcd67aaa5dd0fba77fe8ee9262b8ffa6c02ab94d033.data @@ -0,0 +1,3 @@ + + +Q������� diff --git a/test/repo-example/blocks/1220bfcc/1220bfccda787baba32b59c78450ac3d20b633360b43992c77289f9ed46d843561e6.data b/test/repo-example/blocks/1220bfcc/1220bfccda787baba32b59c78450ac3d20b633360b43992c77289f9ed46d843561e6.data new file mode 100644 index 00000000..508cff2e Binary files /dev/null and b/test/repo-example/blocks/1220bfcc/1220bfccda787baba32b59c78450ac3d20b633360b43992c77289f9ed46d843561e6.data differ diff --git a/test/repo-example/blocks/1220c597/1220c59777258deac9d4b8e49e84daf2d449cd81ab2e8b7d2a89e2f5a02d114588a5.data b/test/repo-example/blocks/1220c597/1220c59777258deac9d4b8e49e84daf2d449cd81ab2e8b7d2a89e2f5a02d114588a5.data new file mode 100644 index 00000000..5a59204a --- /dev/null +++ b/test/repo-example/blocks/1220c597/1220c59777258deac9d4b8e49e84daf2d449cd81ab2e8b7d2a89e2f5a02d114588a5.data @@ -0,0 +1,2 @@ + +stem. Some�� \ No newline at end of file diff --git a/test/repo-example/blocks/1220c8af/1220c8af6ecba1e2daa242da154307928e671ed7744c5a1474f67722a3ccb0be2469.data b/test/repo-example/blocks/1220c8af/1220c8af6ecba1e2daa242da154307928e671ed7744c5a1474f67722a3ccb0be2469.data new file mode 100644 index 00000000..8e5a1d76 Binary files /dev/null and b/test/repo-example/blocks/1220c8af/1220c8af6ecba1e2daa242da154307928e671ed7744c5a1474f67722a3ccb0be2469.data differ diff --git a/test/repo-example/blocks/1220cadf/1220cadf66920934b4e5db9c7fcd8b387fcaf9842671daf799fa4e9ae994cfe3a9d9.data b/test/repo-example/blocks/1220cadf/1220cadf66920934b4e5db9c7fcd8b387fcaf9842671daf799fa4e9ae994cfe3a9d9.data new file mode 100644 index 00000000..c1f9899a Binary files /dev/null and b/test/repo-example/blocks/1220cadf/1220cadf66920934b4e5db9c7fcd8b387fcaf9842671daf799fa4e9ae994cfe3a9d9.data differ diff --git a/test/repo-example/blocks/1220cc1a/1220cc1a6b60658b47f7f67dff1a135eedd6ba4736a126b2cc04a38e7e3e1a243407.data b/test/repo-example/blocks/1220cc1a/1220cc1a6b60658b47f7f67dff1a135eedd6ba4736a126b2cc04a38e7e3e1a243407.data new file mode 100644 index 00000000..5b090964 Binary files /dev/null and b/test/repo-example/blocks/1220cc1a/1220cc1a6b60658b47f7f67dff1a135eedd6ba4736a126b2cc04a38e7e3e1a243407.data differ diff --git a/test/repo-example/blocks/1220cefe/1220cefeb7568c689275e79f9480d743d04ebbc98d140afc21126c08a3053ef24a8f.data b/test/repo-example/blocks/1220cefe/1220cefeb7568c689275e79f9480d743d04ebbc98d140afc21126c08a3053ef24a8f.data new file mode 100644 index 00000000..6b72d373 --- /dev/null +++ b/test/repo-example/blocks/1220cefe/1220cefeb7568c689275e79f9480d743d04ebbc98d140afc21126c08a3053ef24a8f.data @@ -0,0 +1,2 @@ + +u���� ��r[�� \ No newline at end of file diff --git a/test/repo-example/blocks/1220d313/1220d313d073066579f96f3f1a5fcc2775e98d319a203f238a389408140add211e41.data b/test/repo-example/blocks/1220d313/1220d313d073066579f96f3f1a5fcc2775e98d319a203f238a389408140add211e41.data new file mode 100644 index 00000000..4f882793 --- /dev/null +++ b/test/repo-example/blocks/1220d313/1220d313d073066579f96f3f1a5fcc2775e98d319a203f238a389408140add211e41.data @@ -0,0 +1,3 @@ + + +� ��r[�� diff --git a/test/repo-example/blocks/1220dfb8/1220dfb83d070291965675a9d7e9e7321b92e0aeb724606b42b5df689e6b547ccc21.data b/test/repo-example/blocks/1220dfb8/1220dfb83d070291965675a9d7e9e7321b92e0aeb724606b42b5df689e6b547ccc21.data new file mode 100644 index 00000000..b1df8c51 Binary files /dev/null and b/test/repo-example/blocks/1220dfb8/1220dfb83d070291965675a9d7e9e7321b92e0aeb724606b42b5df689e6b547ccc21.data differ diff --git a/test/repo-example/blocks/1220e213/1220e213c7180c2051bec353ab32c9a67cf9a32f33c4b560cf34d11bf50c4b6f9b1e.data b/test/repo-example/blocks/1220e213/1220e213c7180c2051bec353ab32c9a67cf9a32f33c4b560cf34d11bf50c4b6f9b1e.data new file mode 100644 index 00000000..b75d8023 Binary files /dev/null and b/test/repo-example/blocks/1220e213/1220e213c7180c2051bec353ab32c9a67cf9a32f33c4b560cf34d11bf50c4b6f9b1e.data differ diff --git a/test/repo-example/blocks/1220e482/1220e482600f1178538696dbe42801a86cb455fd3c20e0c172235e4c86e0a76c670f.data b/test/repo-example/blocks/1220e482/1220e482600f1178538696dbe42801a86cb455fd3c20e0c172235e4c86e0a76c670f.data new file mode 100644 index 00000000..f0b3a599 Binary files /dev/null and b/test/repo-example/blocks/1220e482/1220e482600f1178538696dbe42801a86cb455fd3c20e0c172235e4c86e0a76c670f.data differ diff --git a/test/repo-example/blocks/1220e93a/1220e93a39630237c5d8d370d0ad31c31e6a6b6ced9791fc2c9fe282fe25491c73d7.data b/test/repo-example/blocks/1220e93a/1220e93a39630237c5d8d370d0ad31c31e6a6b6ced9791fc2c9fe282fe25491c73d7.data new file mode 100644 index 00000000..4eb5d7bf --- /dev/null +++ b/test/repo-example/blocks/1220e93a/1220e93a39630237c5d8d370d0ad31c31e6a6b6ced9791fc2c9fe282fe25491c73d7.data @@ -0,0 +1,4 @@ + +A;5 +" $��G�,�A�4{���x�Z/.����D`� 200Bytes.txt� +; \ No newline at end of file diff --git a/test/repo-example/blocks/1220f145/1220f145e82f37fcd08875e13c15894d210860ff30faa3f540365dd01f7ab4b00dfb.data b/test/repo-example/blocks/1220f145/1220f145e82f37fcd08875e13c15894d210860ff30faa3f540365dd01f7ab4b00dfb.data new file mode 100644 index 00000000..c52fcda4 Binary files /dev/null and b/test/repo-example/blocks/1220f145/1220f145e82f37fcd08875e13c15894d210860ff30faa3f540365dd01f7ab4b00dfb.data differ diff --git a/test/repo-example/blocks/1220f7a0/1220f7a06300a26e3a858261f3d9a05aa65f57b3ad326df0643a6d9d08a01c0903d7.data b/test/repo-example/blocks/1220f7a0/1220f7a06300a26e3a858261f3d9a05aa65f57b3ad326df0643a6d9d08a01c0903d7.data new file mode 100644 index 00000000..a762644a Binary files /dev/null and b/test/repo-example/blocks/1220f7a0/1220f7a06300a26e3a858261f3d9a05aa65f57b3ad326df0643a6d9d08a01c0903d7.data differ diff --git a/test/repo-example/blocks/1220f8a0/1220f8a01860aa759f3e2f32aee26c0d0b696cc966dac959429127134d1d2592a7de.data b/test/repo-example/blocks/1220f8a0/1220f8a01860aa759f3e2f32aee26c0d0b696cc966dac959429127134d1d2592a7de.data new file mode 100644 index 00000000..5accb645 --- /dev/null +++ b/test/repo-example/blocks/1220f8a0/1220f8a01860aa759f3e2f32aee26c0d0b696cc966dac959429127134d1d2592a7de.data @@ -0,0 +1,3 @@ +5 +" $��G�,�A�4{���x�Z/.����D`� 200Bytes.txt� + \ No newline at end of file diff --git a/test/repo-example/blocks/1220ff70/1220ff700c84c919c855797d1b071dcf9e0ca68f385a3b93e3145e289bc555646f66.data b/test/repo-example/blocks/1220ff70/1220ff700c84c919c855797d1b071dcf9e0ca68f385a3b93e3145e289bc555646f66.data new file mode 100644 index 00000000..42f65bd9 Binary files /dev/null and b/test/repo-example/blocks/1220ff70/1220ff700c84c919c855797d1b071dcf9e0ca68f385a3b93e3145e289bc555646f66.data differ diff --git a/test/repo-example/datastore/CURRENT b/test/repo-example/datastore/CURRENT index 875cf233..6ba31a31 100644 --- a/test/repo-example/datastore/CURRENT +++ b/test/repo-example/datastore/CURRENT @@ -1 +1 @@ -MANIFEST-000007 +MANIFEST-000009 diff --git a/test/repo-example/datastore/LOG b/test/repo-example/datastore/LOG index 863b68fd..f5ffd612 100644 --- a/test/repo-example/datastore/LOG +++ b/test/repo-example/datastore/LOG @@ -1,10 +1,7 @@ -=============== Dec 10, 2015 (PST) =============== -07:50:02.056578 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed -07:50:02.057231 db@open opening -07:50:02.057312 journal@recovery F·1 -07:50:02.057514 journal@recovery recovering @3 -07:50:02.058921 mem@flush created L0@5 N·4 S·1KiB "/ip..\xf6\xe4\xa9,v5":"/pk..\xf6\xe4\xa9,v6" -07:50:02.059983 db@janitor F·4 G·0 -07:50:02.060001 db@open done T·2.755926ms -07:50:02.073183 db@close closing -07:50:02.073285 db@close done T·97.522µs +=============== Apr 22, 2016 (WEST) =============== +03:16:42.272495 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed +03:16:42.272857 db@open opening +03:16:42.275673 db@janitor F·4 G·0 +03:16:42.275700 db@open done T·2.831108ms +03:16:42.596938 db@close closing +03:16:42.597082 db@close done T·139.194µs diff --git a/test/repo-example/datastore/LOG.old b/test/repo-example/datastore/LOG.old index 708351e7..863b68fd 100644 --- a/test/repo-example/datastore/LOG.old +++ b/test/repo-example/datastore/LOG.old @@ -1,10 +1,10 @@ =============== Dec 10, 2015 (PST) =============== -07:49:57.048841 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed -07:49:57.049014 db@open opening -07:49:57.049066 journal@recovery F·1 -07:49:57.049233 journal@recovery recovering @1 -07:49:57.049693 mem@flush created L0@2 N·2 S·211B "/lo..oot,v2":"/lo..ins,v1" -07:49:57.050381 db@janitor F·3 G·0 -07:49:57.050397 db@open done T·1.375431ms -07:49:57.064580 db@close closing -07:49:57.064655 db@close done T·72.59µs +07:50:02.056578 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed +07:50:02.057231 db@open opening +07:50:02.057312 journal@recovery F·1 +07:50:02.057514 journal@recovery recovering @3 +07:50:02.058921 mem@flush created L0@5 N·4 S·1KiB "/ip..\xf6\xe4\xa9,v5":"/pk..\xf6\xe4\xa9,v6" +07:50:02.059983 db@janitor F·4 G·0 +07:50:02.060001 db@open done T·2.755926ms +07:50:02.073183 db@close closing +07:50:02.073285 db@close done T·97.522µs diff --git a/test/repo-example/datastore/MANIFEST-000007 b/test/repo-example/datastore/MANIFEST-000007 deleted file mode 100644 index 6af3b545..00000000 Binary files a/test/repo-example/datastore/MANIFEST-000007 and /dev/null differ diff --git a/test/repo-example/datastore/MANIFEST-000009 b/test/repo-example/datastore/MANIFEST-000009 new file mode 100644 index 00000000..a69b18a4 Binary files /dev/null and b/test/repo-example/datastore/MANIFEST-000009 differ diff --git a/test/test-data/foo-big/1.2MiB.txt b/test/test-data/foo-big/1.2MiB.txt new file mode 100644 index 00000000..6e306c55 Binary files /dev/null and b/test/test-data/foo-big/1.2MiB.txt differ diff --git a/test/test-data/foo/bar/200Bytes.txt b/test/test-data/foo/bar/200Bytes.txt new file mode 100644 index 00000000..d95023c7 --- /dev/null +++ b/test/test-data/foo/bar/200Bytes.txt @@ -0,0 +1,4 @@ +�wx���xM��{ +D���zH/&^�� ��RS���/��v,��R +�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�K E�~J ֕읝�o�j��b�n3�eT�)D+;s +컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e� \ No newline at end of file diff --git a/test/test-data/pam/1.2MiB.txt b/test/test-data/pam/1.2MiB.txt new file mode 100644 index 00000000..6e306c55 Binary files /dev/null and b/test/test-data/pam/1.2MiB.txt differ diff --git a/test/test-data/pam/pum/1.2MiB.txt b/test/test-data/pam/pum/1.2MiB.txt new file mode 100644 index 00000000..6e306c55 Binary files /dev/null and b/test/test-data/pam/pum/1.2MiB.txt differ diff --git a/test/test-data/pam/pum/200Bytes.txt b/test/test-data/pam/pum/200Bytes.txt new file mode 100644 index 00000000..d95023c7 --- /dev/null +++ b/test/test-data/pam/pum/200Bytes.txt @@ -0,0 +1,4 @@ +�wx���xM��{ +D���zH/&^�� ��RS���/��v,��R +�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�K E�~J ֕읝�o�j��b�n3�eT�)D+;s +컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e� \ No newline at end of file diff --git a/test/test-data/pim/1.2MiB.txt b/test/test-data/pim/1.2MiB.txt new file mode 100644 index 00000000..6e306c55 Binary files /dev/null and b/test/test-data/pim/1.2MiB.txt differ diff --git a/test/test-data/pim/200Bytes.txt b/test/test-data/pim/200Bytes.txt new file mode 100644 index 00000000..d95023c7 --- /dev/null +++ b/test/test-data/pim/200Bytes.txt @@ -0,0 +1,4 @@ +�wx���xM��{ +D���zH/&^�� ��RS���/��v,��R +�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�K E�~J ֕읝�o�j��b�n3�eT�)D+;s +컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e� \ No newline at end of file diff --git a/test/test-exporter.js b/test/test-exporter.js new file mode 100644 index 00000000..3cd0603e --- /dev/null +++ b/test/test-exporter.js @@ -0,0 +1,78 @@ +/* eslint-env mocha */ +'use strict' + +const unixFSEngine = require('./../src') +const exporter = unixFSEngine.exporter +const expect = require('chai').expect +const BlockService = require('ipfs-blocks').BlockService +const DAGService = require('ipfs-merkle-dag').DAGService +const UnixFS = require('ipfs-unixfs') + +let ds + +module.exports = function (repo) { + describe('exporter', function () { + before((done) => { + const bs = new BlockService(repo) + expect(bs).to.exist + ds = new DAGService(bs) + expect(ds).to.exist + done() + }) + + it('export a file with no links', (done) => { + const hash = 'QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8' + const bs = new BlockService(repo) + const ds = new DAGService(bs) + const testExport = exporter(hash, ds) + testExport.on('file', (data) => { + ds.get(hash, (err, fetchedNode) => { + expect(err).to.not.exist + const unmarsh = UnixFS.unmarshal(fetchedNode.data) + expect(unmarsh.data).to.deep.equal(data.stream._readableState.buffer[0]) + done() + }) + }) + }) + + it('export a small file with links', (done) => { + const hash = 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q' + const bs = new BlockService(repo) + const ds = new DAGService(bs) + const testExport = exporter(hash, ds) + testExport.on('file', (data) => { + expect(data.stream).to.exist + done() + }) + }) + + it('export a large file > 5mb', (done) => { + const hash = 'QmRQgufjp9vLE8XK2LGKZSsPCFCF6e4iynCQtNB5X2HBKE' + const bs = new BlockService(repo) + const ds = new DAGService(bs) + const testExport = exporter(hash, ds) + testExport.on('file', (data) => { + expect(data.stream).to.exist + done() + }) + }) + + it('export a directory', (done) => { + const hash = 'QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN' + const bs = new BlockService(repo) + const ds = new DAGService(bs) + const testExport = exporter(hash, ds) + var fsa = [] + testExport.on('file', (data) => { + fsa.push(data) + }) + setTimeout(() => { + expect(fsa[0].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/200Bytes.txt') + expect(fsa[1].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/dir-another') + expect(fsa[2].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/level-1/200Bytes.txt') + expect(fsa[3].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/level-1/level-2') + done() + }, 1000) + }) + }) +} diff --git a/test/test-fixed-size-chunker.js b/test/test-fixed-size-chunker.js index 2cc0ccac..516b3cb0 100644 --- a/test/test-fixed-size-chunker.js +++ b/test/test-fixed-size-chunker.js @@ -7,19 +7,12 @@ const expect = require('chai').expect const stringToStream = require('string-to-stream') const through = require('through2') const path = require('path') -const isNode = !global.window -let fileStream -if (isNode) { - fileStream = function () { - return fs.createReadStream(path.join(__dirname, '/test-data/1MiB.txt')) - } -} else { - const myFile = require('buffer!./test-data/1MiB.txt') - fileStream = function () { - return stringToStream(myFile) - } -} +const fileStream = () => stringToStream( + fs.readFileSync( + path.join(__dirname, '/test-data/1MiB.txt') + ).toString('hex') +) describe('chunker: fixed size', function () { it('256 Bytes chunks', function (done) { diff --git a/test/test-importer.js b/test/test-importer.js new file mode 100644 index 00000000..0324b61f --- /dev/null +++ b/test/test-importer.js @@ -0,0 +1,205 @@ +/* eslint-env mocha */ +'use strict' + +const Importer = require('./../src').importer +const expect = require('chai').expect +const BlockService = require('ipfs-blocks').BlockService +const DAGService = require('ipfs-merkle-dag').DAGService +// const DAGNode = require('ipfs-merkle-dag').DAGNode +const bs58 = require('bs58') +const fs = require('fs') +// const UnixFS = require('ipfs-unixfs') +const path = require('path') +const streamifier = require('streamifier') + +let ds + +module.exports = function (repo) { + describe('importer', function () { + const bigFile = fs.readFileSync(path.join(__dirname, '/test-data/1.2MiB.txt')) + const smallFile = fs.readFileSync(path.join(__dirname, '/test-data/200Bytes.txt')) + + // const dirSmall = path.join(__dirname, '/test-data/dir-small') + // const dirBig = path.join(__dirname, '/test-data/dir-big') + // const dirNested = path.join(__dirname, '/test-data/dir-nested') + + before((done) => { + const bs = new BlockService(repo) + expect(bs).to.exist + ds = new DAGService(bs) + expect(ds).to.exist + done() + }) + + it('small file (smaller than a chunk)', (done) => { + const buffered = smallFile + const r = streamifier.createReadStream(buffered) + const i = new Importer(ds) + i.on('file', (file) => { + expect(file.path).to.equal('200Bytes.txt') + expect(bs58.encode(file.multihash)).to.equal('QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8') + expect(file.size).to.equal(211) + done() + }) + i.add({path: '200Bytes.txt', stream: r}) + i.finish() + }) + + it('small file (smaller than a chunk) inside a dir', (done) => { + const buffered = smallFile + const r = streamifier.createReadStream(buffered) + const i = new Importer(ds) + var counter = 0 + i.on('file', (file) => { + counter++ + if (file.path === 'foo/bar/200Bytes.txt') { + expect(bs58.encode(file.multihash).toString()) + .to.equal('QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8') + } + if (file.path === 'foo/bar') { + expect(bs58.encode(file.multihash).toString()) + .to.equal('Qmf5BQbTUyUAvd6Ewct83GYGnE1F6btiC3acLhR8MDxgkD') + } + if (file.path === 'foo') { + expect(bs58.encode(file.multihash).toString()) + .to.equal('QmQrb6KKWGo8w7zKfx2JksptY6wN7B2ysSBdKZr4xMU36d') + } + if (counter === 3) { + done() + } + }) + i.on('err', (err) => { + expect(err).to.not.exist + }) + i.add({path: 'foo/bar/200Bytes.txt', stream: r}) + i.finish() + }) + + it('file bigger than a single chunk', (done) => { + const buffered = bigFile + const r = streamifier.createReadStream(buffered) + const i = new Importer(ds) + i.on('file', (file) => { + expect(file.path).to.equal('1.2MiB.txt') + expect(bs58.encode(file.multihash)).to.equal('QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q') + expect(file.size).to.equal(1258318) + done() + }) + i.add({path: '1.2MiB.txt', stream: r}) + i.finish() + }) + + it('file bigger than a single chunk inside a dir', (done) => { + const buffered = bigFile + const r = streamifier.createReadStream(buffered) + const i = new Importer(ds) + var counter = 0 + i.on('file', (file) => { + counter++ + if (file.path === 'foo-big/1.2Mib.txt') { + expect(bs58.encode(file.multihash)).to.equal('QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q') + expect(file.size).to.equal(1258318) + } + if (file.path === 'foo-big') { + expect(bs58.encode(file.multihash)).to.equal('QmaFgyFJUP4fxFySJCddg2Pj6rpwSywopWk87VEVv52RSj') + expect(file.size).to.equal(1258376) + } + if (counter === 2) { + done() + } + }) + i.add({path: 'foo-big/1.2MiB.txt', stream: r}) + i.finish() + }) + + it.skip('file (that chunk number exceeds max links)', (done) => { + // TODO + }) + + it('empty directory', (done) => { + const i = new Importer(ds) + i.on('file', (file) => { + expect(file.path).to.equal('empty-dir') + expect(bs58.encode(file.multihash)).to.equal('QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn') + expect(file.size).to.equal(4) + done() + }) + i.add({path: 'empty-dir'}) + i.finish() + }) + + it('directory with files', (done) => { + const r1 = streamifier.createReadStream(smallFile) + const r2 = streamifier.createReadStream(bigFile) + + const i = new Importer(ds) + var counter = 0 + i.on('file', (file) => { + counter++ + if (file.path === 'pim/200Bytes.txt') { + expect(bs58.encode(file.multihash).toString()) + .to.equal('QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8') + } + if (file.path === 'pim/1.2MiB.txt') { + expect(bs58.encode(file.multihash).toString()) + .to.equal('QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q') + } + if (file.path === 'pim') { + expect(bs58.encode(file.multihash).toString()) + .to.equal('QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i') + } + if (counter === 3) { + done() + } + }) + i.on('err', (err) => { + expect(err).to.not.exist + }) + i.add({path: 'pim/200Bytes.txt', stream: r1}) + i.add({path: 'pim/1.2MiB.txt', stream: r2}) + i.finish() + }) + + it('nested directory (2 levels deep)', (done) => { + const r1 = streamifier.createReadStream(smallFile) + const r2 = streamifier.createReadStream(bigFile) + const r3 = streamifier.createReadStream(bigFile) + + const i = new Importer(ds) + var counter = 0 + i.on('file', (file) => { + counter++ + if (file.path === 'pam/pum/200Bytes.txt') { + expect(bs58.encode(file.multihash).toString()) + .to.equal('QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8') + } + if (file.path === 'pam/pum/1.2MiB.txt') { + expect(bs58.encode(file.multihash).toString()) + .to.equal('QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q') + } + if (file.path === 'pam/1.2MiB.txt') { + expect(bs58.encode(file.multihash).toString()) + .to.equal('QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q') + } + if (file.path === 'pam/pum') { + expect(bs58.encode(file.multihash).toString()) + .to.equal('QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i') + } + if (file.path === 'pam') { + expect(bs58.encode(file.multihash).toString()) + .to.equal('QmRgdtzNx1H1BPJqShdhvWZ2D4DA2HUgZJ3XLtoXei27Av') + } + if (counter === 5) { + done() + } + }) + i.on('err', (err) => { + expect(err).to.not.exist + }) + i.add({path: 'pam/pum/200Bytes.txt', stream: r1}) + i.add({path: 'pam/pum/1.2MiB.txt', stream: r2}) + i.add({path: 'pam/1.2MiB.txt', stream: r3}) + i.finish() + }) + }) +} diff --git a/test/test-unixfs-engine.js b/test/test-unixfs-engine.js deleted file mode 100644 index 4d563e9e..00000000 --- a/test/test-unixfs-engine.js +++ /dev/null @@ -1,332 +0,0 @@ -/* eslint-env mocha */ -'use strict' - -const unixFSEngine = require('./../src') -const importer = unixFSEngine.importer -const exporter = unixFSEngine.exporter -const expect = require('chai').expect -const IPFSRepo = require('ipfs-repo') -const BlockService = require('ipfs-blocks').BlockService -const DAGService = require('ipfs-merkle-dag').DAGService -const DAGNode = require('ipfs-merkle-dag').DAGNode -const fsBlobStore = require('fs-blob-store') -const bs58 = require('bs58') -const fs = require('fs') -const UnixFS = require('ipfs-unixfs') -const path = require('path') - -let ds - -describe('layout: importer', function () { - const big = path.join(__dirname, '/test-data/1.2MiB.txt') - const small = path.join(__dirname, '/test-data/200Bytes.txt') - const dirSmall = path.join(__dirname, '/test-data/dir-small') - const dirBig = path.join(__dirname, '/test-data/dir-big') - const dirNested = path.join(__dirname, '/test-data/dir-nested') - - // check to see if missing empty dirs need to be created - - fs.stat(path.join(__dirname, '/test-data/dir-nested/dir-another'), function (err, exists) { - if (err) { - fs.mkdir(path.join(__dirname, '/test-data/dir-nested/dir-another')) - } - }) - fs.stat(path.join(__dirname, '/test-data/dir-nested/level-1/level-2'), function (err, exists) { - if (err) { - fs.mkdir(path.join(__dirname, '/test-data/dir-nested/level-1/level-2')) - } - }) - - it('start dag service', function (done) { - const options = { - stores: { - keys: fsBlobStore, - config: fsBlobStore, - datastore: fsBlobStore, - // datastoreLegacy: needs https://github.com/ipfsBlobStore/js-ipfsBlobStore-repo/issues/6#issuecomment-164650642 - logs: fsBlobStore, - locks: fsBlobStore, - version: fsBlobStore - } - } - const repo = new IPFSRepo(process.env.IPFS_PATH, options) - const bs = new BlockService(repo) - ds = new DAGService(bs) - expect(bs).to.exist - expect(ds).to.exist - done() - }) - - it('import a bad path', (done) => { - importer.import('/foo/bar/quux/a!wofjaeiwojfoiew', ds, function (err, stat) { - expect(err).to.exist - done() - }) - }) - - it('import a small file', (done) => { - importer.import(small, ds, function (err, stat) { - expect(err).to.not.exist - ds.get(stat.Hash, (err, node) => { - expect(err).to.not.exist - const smallDAGNode = new DAGNode() - const buf = fs.readFileSync(small + '.block') - smallDAGNode.unMarshal(buf) - expect(node.size()).to.equal(smallDAGNode.size()) - expect(node.multihash()).to.deep.equal(smallDAGNode.multihash()) - done() - }) - }) - }) - - it('import a big file', (done) => { - importer.import(big, ds, function (err, stat) { - expect(err).to.not.exist - ds.get(stat.Hash, (err, node) => { - expect(err).to.not.exist - - const bigDAGNode = new DAGNode() - const buf = fs.readFileSync(big + '.block') - bigDAGNode.unMarshal(buf) - expect(node.size()).to.equal(bigDAGNode.size()) - expect(node.links).to.deep.equal(bigDAGNode.links) - - const nodeUnixFS = UnixFS.unmarshal(node.data) - const bigDAGNodeUnixFS = UnixFS.unmarshal(bigDAGNode.data) - expect(nodeUnixFS.type).to.equal(bigDAGNodeUnixFS.type) - expect(nodeUnixFS.data).to.deep.equal(bigDAGNodeUnixFS.data) - expect(nodeUnixFS.blockSizes).to.deep.equal(bigDAGNodeUnixFS.blockSizes) - expect(nodeUnixFS.fileSize()).to.equal(bigDAGNodeUnixFS.fileSize()) - - expect(node.data).to.deep.equal(bigDAGNode.data) - expect(node.multihash()).to.deep.equal(bigDAGNode.multihash()) - - ds.get(node.links[0].hash, (err, node) => { - expect(err).to.not.exist - const leaf = new DAGNode() - const buf2 = fs.readFileSync(big + '.link-block0') - leaf.unMarshal(buf2) - expect(node.links).to.deep.equal(leaf.links) - expect(node.links.length).to.equal(0) - expect(leaf.links.length).to.equal(0) - expect(leaf.marshal()).to.deep.equal(buf2) - const nodeUnixFS = UnixFS.unmarshal(node.data) - const leafUnixFS = UnixFS.unmarshal(leaf.data) - expect(nodeUnixFS.type).to.equal(leafUnixFS.type) - expect(nodeUnixFS.fileSize()).to.equal(leafUnixFS.fileSize()) - expect(nodeUnixFS.data).to.deep.equal(leafUnixFS.data) - expect(nodeUnixFS.blockSizes).to.deep.equal(leafUnixFS.blockSizes) - expect(node.data).to.deep.equal(leaf.data) - expect(node.marshal()).to.deep.equal(leaf.marshal()) - done() - }) - }) - }) - }) - - it('import a small directory', (done) => { - importer.import(dirSmall, ds, { - recursive: true - }, function (err, stats) { - expect(err).to.not.exist - - ds.get(stats.Hash, (err, node) => { - expect(err).to.not.exist - const dirSmallNode = new DAGNode() - const buf = fs.readFileSync(dirSmall + '.block') - dirSmallNode.unMarshal(buf) - expect(node.links).to.deep.equal(dirSmallNode.links) - - const nodeUnixFS = UnixFS.unmarshal(node.data) - const dirUnixFS = UnixFS.unmarshal(dirSmallNode.data) - - expect(nodeUnixFS.type).to.equal(dirUnixFS.type) - expect(nodeUnixFS.fileSize()).to.equal(dirUnixFS.fileSize()) - expect(nodeUnixFS.data).to.deep.equal(dirUnixFS.data) - expect(nodeUnixFS.blockSizes).to.deep.equal(dirUnixFS.blockSizes) - expect(node.data).to.deep.equal(dirSmallNode.data) - expect(node.marshal()).to.deep.equal(dirSmallNode.marshal()) - done() - }) - }) - }) - - it('import a big directory', (done) => { - importer.import(dirBig, ds, { - recursive: true - }, function (err, stats) { - expect(err).to.not.exist - - ds.get(stats.Hash, (err, node) => { - expect(err).to.not.exist - const dirNode = new DAGNode() - const buf = fs.readFileSync(dirBig + '.block') - dirNode.unMarshal(buf) - expect(node.links).to.deep.equal(dirNode.links) - - const nodeUnixFS = UnixFS.unmarshal(node.data) - const dirUnixFS = UnixFS.unmarshal(dirNode.data) - - expect(nodeUnixFS.type).to.equal(dirUnixFS.type) - expect(nodeUnixFS.fileSize()).to.equal(dirUnixFS.fileSize()) - expect(nodeUnixFS.data).to.deep.equal(dirUnixFS.data) - expect(nodeUnixFS.blockSizes).to.deep.equal(dirUnixFS.blockSizes) - expect(node.data).to.deep.equal(dirNode.data) - expect(node.marshal()).to.deep.equal(dirNode.marshal()) - done() - }) - }) - }) - - it('import a nested directory', (done) => { - importer.import(dirNested, ds, { - recursive: true - }, function (err, stats) { - expect(err).to.not.exist - expect(bs58.encode(stats.Hash).toString()).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN') - - ds.get(stats.Hash, (err, node) => { - expect(err).to.not.exist - expect(node.links.length).to.equal(3) - - const dirNode = new DAGNode() - const buf = fs.readFileSync(dirNested + '.block') - dirNode.unMarshal(buf) - expect(node.links).to.deep.equal(dirNode.links) - expect(node.data).to.deep.equal(dirNode.data) - done() - }) - }) - }) - - it('import a small buffer', (done) => { - // this is just like "import a small file" - const buf = fs.readFileSync(path.join(__dirname, '/test-data/200Bytes.txt')) - importer.import(buf, ds, function (err, stat) { - expect(err).to.not.exist - ds.get(stat.Hash, (err, node) => { - expect(err).to.not.exist - const smallDAGNode = new DAGNode() - const marbuf = fs.readFileSync(small + '.block') - smallDAGNode.unMarshal(marbuf) - expect(node.size()).to.equal(smallDAGNode.size()) - expect(node.multihash()).to.deep.equal(smallDAGNode.multihash()) - done() - }) - }) - }) - - it('import a big buffer', (done) => { - // this is just like "import a big file" - const buf = fs.readFileSync(path.join(__dirname, '/test-data/1.2MiB.txt')) - importer.import(buf, ds, function (err, stat) { - expect(err).to.not.exist - ds.get(stat.Hash, (err, node) => { - expect(err).to.not.exist - - const bigDAGNode = new DAGNode() - const marbuf = fs.readFileSync(big + '.block') - bigDAGNode.unMarshal(marbuf) - expect(node.size()).to.equal(bigDAGNode.size()) - expect(node.links).to.deep.equal(bigDAGNode.links) - - const nodeUnixFS = UnixFS.unmarshal(node.data) - const bigDAGNodeUnixFS = UnixFS.unmarshal(bigDAGNode.data) - expect(nodeUnixFS.type).to.equal(bigDAGNodeUnixFS.type) - expect(nodeUnixFS.data).to.deep.equal(bigDAGNodeUnixFS.data) - expect(nodeUnixFS.blockSizes).to.deep.equal(bigDAGNodeUnixFS.blockSizes) - expect(nodeUnixFS.fileSize()).to.equal(bigDAGNodeUnixFS.fileSize()) - - expect(node.data).to.deep.equal(bigDAGNode.data) - expect(node.multihash()).to.deep.equal(bigDAGNode.multihash()) - - ds.get(node.links[0].hash, (err, node) => { - expect(err).to.not.exist - const leaf = new DAGNode() - - const marbuf2 = fs.readFileSync(big + '.link-block0') - leaf.unMarshal(marbuf2) - expect(node.links).to.deep.equal(leaf.links) - expect(node.links.length).to.equal(0) - expect(leaf.links.length).to.equal(0) - expect(leaf.marshal()).to.deep.equal(marbuf2) - const nodeUnixFS = UnixFS.unmarshal(node.data) - const leafUnixFS = UnixFS.unmarshal(leaf.data) - expect(nodeUnixFS.type).to.equal(leafUnixFS.type) - expect(nodeUnixFS.fileSize()).to.equal(leafUnixFS.fileSize()) - expect(nodeUnixFS.data).to.deep.equal(leafUnixFS.data) - expect(nodeUnixFS.blockSizes).to.deep.equal(leafUnixFS.blockSizes) - expect(node.data).to.deep.equal(leaf.data) - expect(node.marshal()).to.deep.equal(leaf.marshal()) - done() - }) - }) - }) - }) - - it.skip('import from a readable stream', (done) => { - }) -}) - -describe('layout: exporter', function () { - it('export a file with no links', (done) => { - const hash = 'QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8' - const testExport = exporter(hash, ds) - testExport.on('file', (data) => { - ds.get(hash, (err, fetchedNode) => { - expect(err).to.not.exist - const unmarsh = UnixFS.unmarshal(fetchedNode.data) - expect(unmarsh.data).to.deep.equal(data.stream._readableState.buffer[0]) - done() - }) - }) - }) - - it('export a small file with links', (done) => { - const hash = 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q' - const testExport = exporter(hash, ds) - testExport.on('file', (data) => { - var ws = fs.createWriteStream(path.join(process.cwd(), '/test', data.path)) - data.stream.pipe(ws) - data.stream.on('end', () => { - const stats = fs.existsSync(path.join(process.cwd(), '/test', data.path)) - expect(stats).to.equal(true) - fs.unlinkSync(path.join(process.cwd(), '/test', data.path)) - done() - }) - }) - }) - - it('export a large file > 5mb', (done) => { - const hash = 'QmRQgufjp9vLE8XK2LGKZSsPCFCF6e4iynCQtNB5X2HBKE' - const testExport = exporter(hash, ds) - testExport.on('file', (data) => { - var ws = fs.createWriteStream(path.join(process.cwd(), '/test', data.path)) - data.stream.pipe(ws) - data.stream.on('end', () => { - const stats = fs.existsSync(path.join(process.cwd(), '/test', data.path)) - expect(stats).to.equal(true) - fs.unlinkSync(path.join(process.cwd(), '/test', data.path)) - done() - }) - }) - }) - - it('export a directory', (done) => { - const hash = 'QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN' - var testExport = exporter(hash, ds) - var fs = [] - var x = 0 - testExport.on('file', (data) => { - fs.push(data) - x++ - if (x === 4) { - expect(fs[0].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/200Bytes.txt') - expect(fs[1].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/dir-another') - expect(fs[2].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/level-1/200Bytes.txt') - expect(fs[3].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/level-1/level-2') - done() - } - }) - }) -})